module to align GWAS onto the reference panel

d2f5af12 · Hanna JULIENNE · b3fb8ef1 · d2f5af12 · d2f5af12
Commit d2f5af12 authored 7 years ago by Hanna JULIENNE
--- a/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py
+++ b/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py
@@ -23,8 +23,12 @@ def map_on_ref_panel(gw_df , ref_panel):

    ref_panel['key2'] = ref_panel.apply(key2,1)

-    merge_GWAS = pd.merge(ref_panel, gw_df, how='inner', indicator=True, left_index=True, right_index=True)
-    other_snp = pd.merge(ref_panel, gw_df, how='inner', indicator=True, left_on ='key2', right_index=True)
+    inter_index = ref_panel.index.intersection(gw_df.index)
+    print("SNps {}".format(len(inter_index)))
+    merge_GWAS = pd.merge(ref_panel.loc[inter_index], gw_df.loc[inter_index], how='inner', indicator=True, left_index=True, right_index=True)
+
+    inter_index = gw_df.index.intersection(ref_panel.index)
+    other_snp = pd.merge(ref_panel.loc[inter_index], gw_df.loc[inter_index], how='inner', indicator=True, left_on ='key2', right_index=True)

    merge_GWAS.loc[other_snp.index] = other_snp
    return(merge_GWAS)

--- a/main_preprocessing.py
+++ b/main_preprocessing.py
@@ -13,7 +13,7 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import jass_preprocessing as jp
 import pandas as pd
-
+import seaborn as sns

 perSS = 0.7
 netPath = "/mnt/atlas/"  # '/home/genstat/ATLAS/'
@@ -27,22 +27,22 @@ outFileName = netPath+'PCMA/1._DATA/ZSCORE_merged_ALL_NO_strand_ambiguous.hdf5'
 def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN',
               '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN', 'nan', 'na', '.']
 out_summary = "summary_GWAS.csv"
-ImpG_output_Folder = netPath+ 'PCMA/1._DATA/ImpG_zfiles/'
-
+ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/'

-gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
+GWAS_labels
+gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0, nrows=10)

-GWAS_table = ["GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", "GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt"]
+GWAS_table = ["GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", "GWAS_PP_recoded.txt","GWAS_SBP_recoded_dummy.txt"]

 gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
-
+gwas
 column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')


 my_labels = column_dict[column_dict['filename'] == gwas.iloc[0,0]]
 column_dict[['freq']]
    # READ GWAS
-GWAS_filename = GWAS_table[0]
+GWAS_filename = GWAS_table[3]

 GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
 GWAS_link
@@ -54,12 +54,20 @@ gw_df.head()
 ref = pd.read_csv(REF_filename, header=None, sep= "\t",
                  names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
                   index_col="snp_id")
+inter_index = ref.index.intersection(gw_df.index)

+test_merge = pd.merge(ref.loc[inter_index], gw_df.loc[inter_index], how='inner',
+                      indicator=True, left_index=True, right_index=True)
+
+
+print(jp.map_reference.map_on_ref_panel)
 mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
+mgwas
 mgwas = jp.map_reference.compute_snp_alignement(mgwas)

-mgwas.head()
-zscore = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * np.sign(mgwas.z) * mgwas["sign_flip"]
-
+mgwas = jp.compute_score.compute_z_score(mgwas)
+mgwas = jp.compute_score.compute_sample_size(mgwas, "/mnt/atlas/PCMA/1._DATA/RAW.GWAS/ICPB_bloodPress/", "test_samp")
+mgwas.reset_index(inplace=True)
+mgwas.set_index("chr", inplace=True)

-np.isinf(ref.head().pos).any()
+jp.