Skip to content
Snippets Groups Projects
Commit fa601f18 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

add flexible region masker

parent cc9337de
No related branches found
No related tags found
No related merge requests found
Pipeline #17213 passed
...@@ -39,8 +39,11 @@ def launch_preprocessing(args): ...@@ -39,8 +39,11 @@ def launch_preprocessing(args):
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = read_reference(args.ref_path, bool(args.mask_MHC), float(args.minimum_MAF))
if args.additional_masked_region is None:
ref = jp.map_reference.read_reference(args.ref_path, bool(args.mask_MHC), float(args.minimum_MAF))
else:
ref = jp.map_reference.read_reference(args.ref_path, bool(args.mask_MHC), float(args.minimum_MAF), region_to_mask=eval(args.additional_masked_region))
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas) mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas) mgwas = jp.compute_score.compute_z_score(mgwas)
...@@ -70,6 +73,7 @@ def add_preprocessing_argument(): ...@@ -70,6 +73,7 @@ def add_preprocessing_argument():
parser.add_argument('--minimum-MAF', required=False, help= "Filter the reference panel by minimum allele frequency", default=0.01) parser.add_argument('--minimum-MAF', required=False, help= "Filter the reference panel by minimum allele frequency", default=0.01)
parser.add_argument('--mask-MHC', required=False, help= "Whether the MHC region should be masked or not. default is False", default=False) parser.add_argument('--mask-MHC', required=False, help= "Whether the MHC region should be masked or not. default is False", default=False)
parser.add_argument('--additional-masked-region', required=False, help= "List of dictionary containing coordinate of region to mask. For example :[{'chr':6, 'start':50000000, 'end': 70000000}, {'chr':6, 'start':100000000, 'end': 120000000}]", default=None)
parser.set_defaults(func=launch_preprocessing) parser.set_defaults(func=launch_preprocessing)
......
...@@ -8,9 +8,17 @@ import numpy as np ...@@ -8,9 +8,17 @@ import numpy as np
import jass_preprocessing.dna_utils as dna_u import jass_preprocessing.dna_utils as dna_u
import warnings import warnings
def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None): def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, region_to_mask=None):
""" """
helper function to name correctly the column helper function to name correctly the column
Args:
gwas_reference_panel (str): path toward the reference panel file
mask_MHC (bool): Whether the MHC region should be masked or not. default is False
Filter the reference panel by minimum allele frequency (hg19 coordinate)
minimum_MAF (float): minimum allele frequency for a SNPs to be retain in the panel
region_to_mask (dict): a list of additional regions to mask
Return:
ref (pandas dataframe): the reference_panel with the specified filter applied
""" """
ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t", ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
...@@ -21,6 +29,9 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None): ...@@ -21,6 +29,9 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None):
if minimum_MAF is not None: if minimum_MAF is not None:
ref = ref.loc[ref.MAF > minimum_MAF] ref = ref.loc[ref.MAF > minimum_MAF]
if region_to_mask is not None:
for reg in region_to_mask:
ref = ref.loc[(ref.chr != reg["chr"]) | (ref.pos <reg["start"])|(ref.pos > reg["end"])]
return(ref) return(ref)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment