Commit 0d7f22cb authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Merge branch 'hot_fix' into 'master'

Hot fix

See merge request !1
parents 3ecc09f7 c5ca18bf
Pipeline #68829 passed with stages
in 1 minute and 7 seconds
...@@ -95,7 +95,7 @@ def add_preprocessing_argument(): ...@@ -95,7 +95,7 @@ def add_preprocessing_argument():
parser.add_argument('--additional-masked-region', required=False, help= "List of dictionary containing coordinate of region to mask. For example :[{'chr':6, 'start':50000000, 'end': 70000000}, {'chr':6, 'start':100000000, 'end': 120000000}]", default='None') parser.add_argument('--additional-masked-region', required=False, help= "List of dictionary containing coordinate of region to mask. For example :[{'chr':6, 'start':50000000, 'end': 70000000}, {'chr':6, 'start':100000000, 'end': 120000000}]", default='None')
parser.add_argument('--imputation-quality-treshold', required=False, help= "minimum imputation quality in summary statistics", default='None') parser.add_argument('--imputation-quality-treshold', required=False, help= "minimum imputation quality in summary statistics", default='None')
parser.add_argument('--index-type', required=False, help= "type of index : rsID or chr:postion:ref_allele:alt_allele", default='rsid') parser.add_argument('--index-type', required=False, help= "type of index : rsID or chr:postion:ref_allele:alt_allele", default='rs-number')
parser.set_defaults(func=launch_preprocessing) parser.set_defaults(func=launch_preprocessing)
......
...@@ -153,6 +153,12 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None): ...@@ -153,6 +153,12 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None):
'NA', 'NULL', 'NaN', 'NA', 'NULL', 'NaN',
'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float}) 'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float})
print(fullGWAS.head()) print(fullGWAS.head())
#Ensure that allele are written in upper cases:
fullGWAS.a1 = fullGWAS.a1.str.upper()
fullGWAS.a2 = fullGWAS.a2.str.upper()
def sorted_alleles(x): def sorted_alleles(x):
return "".join(sorted(x)) return "".join(sorted(x))
# either rs ID or full position must be available: # either rs ID or full position must be available:
......
...@@ -15,13 +15,13 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio ...@@ -15,13 +15,13 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
Filter the reference panel by minimum allele frequency (hg19 coordinate) Filter the reference panel by minimum allele frequency (hg19 coordinate)
minimum_MAF (float): minimum allele frequency for a SNPs to be retain in the panel minimum_MAF (float): minimum allele frequency for a SNPs to be retain in the panel
region_to_mask (dict): a list of additional regions to mask region_to_mask (dict): a list of additional regions to mask
type_of_index(str): 'rsid' or 'positional' type_of_index(str): 'rs-number' or 'positional'
Return: Return:
ref (pandas dataframe): the reference_panel with the specified filter applied ref (pandas dataframe): the reference_panel with the specified filter applied
""" """
ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t", ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t",
names =[ 'chr', "snp_id", "MAF","pos", "ref", "alt"], names =[ 'chr', "snp_id", "MAF","pos", "ref", "alt"],
dtype = {"chr": str, "snp_id":str, "MAF": np.float, "pos":np.int, "ref":str, "alt":str}, dtype = {"chr": str, "snp_id":str, "MAF": np.float, "pos":np.int, "ref":str, "alt":str},
index_col="snp_id") index_col="snp_id")
def sorted_alleles(x): def sorted_alleles(x):
...@@ -44,7 +44,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio ...@@ -44,7 +44,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"): def map_on_ref_panel(gw_df , ref_panel, index_type="rs-number"):
""" """
Merge Gwas dataframe with the reference panel Merge Gwas dataframe with the reference panel
Make sure that the same SNPs are in the reference panel and the gwas Make sure that the same SNPs are in the reference panel and the gwas
...@@ -58,7 +58,7 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"): ...@@ -58,7 +58,7 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
""" """
if index_type=="rsid": if index_type=="rs-number":
merge_GWAS = pd.merge(ref_panel, gw_df, merge_GWAS = pd.merge(ref_panel, gw_df,
how='inner', indicator=True, left_index=True, right_index=True) how='inner', indicator=True, left_index=True, right_index=True)
print("SNPs {}".format(merge_GWAS.shape[0])) print("SNPs {}".format(merge_GWAS.shape[0]))
...@@ -80,11 +80,13 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"): ...@@ -80,11 +80,13 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
print(merge_GWAS) print(merge_GWAS)
merge_GWAS.set_index("snp_id", inplace=True) merge_GWAS.set_index("snp_id", inplace=True)
else: else:
raise ValueError("index_type can take only two values: 'rsid' or 'positional'") raise ValueError("index_type can take only two values: 'rs-number' or 'positional'")
if ((merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)] if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns)):
else: if ((merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
raise ValueError("SNP positions in reference panel and in Summary statistic are different! Different assembly?") merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)]
else:
raise ValueError("SNP positions in reference panel and in Summary statistic are different! Different assembly?")
print("before filter") print("before filter")
print(merge_GWAS.shape) print(merge_GWAS.shape)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment