Commit 8f3a98d9 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

FIX issue #8

parent 3ecc09f7
...@@ -153,6 +153,12 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None): ...@@ -153,6 +153,12 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None):
'NA', 'NULL', 'NaN', 'NA', 'NULL', 'NaN',
'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float}) 'nan', 'na', '.', '-'], dtype={"snpid":str, "a1":str,"a2":str,"freq":float, "z":float,"se":float, "pval":float})
print(fullGWAS.head()) print(fullGWAS.head())
#Ensure that allele are written in upper cases:
fullGWAS.a1 = fullGWAS.a1.str.upper()
fullGWAS.a2 = fullGWAS.a2.str.upper()
def sorted_alleles(x): def sorted_alleles(x):
return "".join(sorted(x)) return "".join(sorted(x))
# either rs ID or full position must be available: # either rs ID or full position must be available:
......
...@@ -21,7 +21,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio ...@@ -21,7 +21,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
""" """
ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t", ref = pd.read_csv(gwas_reference_panel, header=None, sep= "\t",
names =[ 'chr', "snp_id", "MAF","pos", "ref", "alt"], names =[ 'chr', "snp_id", "MAF","pos", "ref", "alt"],
dtype = {"chr": str, "snp_id":str, "MAF": np.float, "pos":np.int, "ref":str, "alt":str}, dtype = {"chr": str, "snp_id":str, "MAF": np.float, "pos":np.int, "ref":str, "alt":str},
index_col="snp_id") index_col="snp_id")
def sorted_alleles(x): def sorted_alleles(x):
...@@ -81,10 +81,12 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"): ...@@ -81,10 +81,12 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
merge_GWAS.set_index("snp_id", inplace=True) merge_GWAS.set_index("snp_id", inplace=True)
else: else:
raise ValueError("index_type can take only two values: 'rsid' or 'positional'") raise ValueError("index_type can take only two values: 'rsid' or 'positional'")
if ((merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)] if (("pos" in merge_GWAS.columns) and ("POS" in merge_GWAS.columns))
else: if (merge_GWAS.pos == merge_GWAS.POS).mean()> 0.95):
raise ValueError("SNP positions in reference panel and in Summary statistic are different! Different assembly?") merge_GWAS = merge_GWAS.loc[(merge_GWAS.pos == merge_GWAS.POS)]
else:
raise ValueError("SNP positions in reference panel and in Summary statistic are different! Different assembly?")
print("before filter") print("before filter")
print(merge_GWAS.shape) print(merge_GWAS.shape)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment