Commit 0d7f22cb authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Merge branch 'hot_fix' into 'master'

Hot fix

See merge request !1
parents 3ecc09f7 c5ca18bf
Pipeline #68829 passed with stages
in 1 minute and 7 seconds
......@@ -95,7 +95,7 @@ def add_preprocessing_argument():
parser.add_argument('--additional-masked-region', required=False, help= "List of dictionary containing coordinate of region to mask. For example :[{'chr':6, 'start':50000000, 'end': 70000000}, {'chr':6, 'start':100000000, 'end': 120000000}]", default='None')
parser.add_argument('--imputation-quality-treshold', required=False, help= "minimum imputation quality in summary statistics", default='None')
parser.add_argument('--index-type', required=False, help= "type of index : rsID or chr:postion:ref_allele:alt_allele", default='rsid')
parser.add_argument('--index-type', required=False, help= "type of index : rsID or chr:postion:ref_allele:alt_allele", default='rs-number')
parser.set_defaults(func=launch_preprocessing)
......
......@@ -153,6 +153,12 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None):
'NA', 'NULL', 'NaN',
'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float})
print(fullGWAS.head())
#Ensure that allele are written in upper cases:
fullGWAS.a1 = fullGWAS.a1.str.upper()
fullGWAS.a2 = fullGWAS.a2.str.upper()
def sorted_alleles(x):
return "".join(sorted(x))
# either rs ID or full position must be available:
......
......@@ -15,7 +15,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
Filter the reference panel by minimum allele frequency (hg19 coordinate)
minimum_MAF (float): minimum allele frequency for a SNPs to be retain in the panel
region_to_mask (dict): a list of additional regions to mask
type_of_index(str): 'rsid' or 'positional'
type_of_index(str): 'rs-number' or 'positional'
Return:
ref (pandas dataframe): the reference_panel with the specified filter applied
"""
......@@ -44,7 +44,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
def map_on_ref_panel(gw_df , ref_panel, index_type="rs-number"):
"""
Merge Gwas dataframe with the reference panel
Make sure that the same SNPs are in the reference panel and the gwas
......@@ -58,7 +58,7 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
"""
if index_type=="rsid":
if index_type=="rs-number":
merge_GWAS = pd.merge(ref_panel, gw_df,
how='inner', indicator=True, left_index=True, right_index=True)
print("SNPs {}".format(merge_GWAS.shape[0]))
......@@ -80,7 +80,9 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
print(merge_GWAS)
merge_GWAS.set_index("snp_id", inplace=True)
else:
raise ValueError("index_type can take only two values: 'rsid' or 'positional'")
raise ValueError("index_type can take only two values: 'rs-number' or 'positional'")
if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns)):
if ((merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)]
else:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment