Adapt effect size detection to binary case

664bf830 · Hanna JULIENNE · f436305c · 664bf830 · 664bf830 · 664bf830
Commit 664bf830 authored 7 years ago by Hanna JULIENNE
--- a/jass_preprocessing/jass_preprocessing/compute_score/compute.py
+++ b/jass_preprocessing/jass_preprocessing/compute_score/compute.py
@@ -13,7 +13,18 @@ def compute_z_score(mgwas):
    Compute zscore value and sign1
    add the corresponding column to the mgwas dataframe
    """
-    mgwas["computed_z"] = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * np.sign(mgwas.z) * mgwas["sign_flip"]
+    if 'z' in mgwas.columns:
+        sign_vect = np.sign(mgwas.z)
+    else:
+        if "OR" in mgwas.columns:
+            sign_vect = np.sign(mgwas["OR"] - 1.0 + 10**(-8))
+        else:
+            raise ValueError(
+                'The gwas data frame doesn"t contain effect column')
+    mgwas["computed_z"] = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * sign_vect * mgwas["sign_flip"]
    return mgwas
 def compute_sample_size(mgwas, diagnostic_folder, trait):

--- a/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py
+++ b/jass_preprocessing/jass_preprocessing/map_reference/map_reference.py
@@ -33,8 +33,11 @@ def map_on_ref_panel(gw_df , ref_panel):
        ref_panel['key2'] = "chr"+ref_panel.chr.map(str) + ":" +ref_panel.pos.map(str)
        other_snp = pd.merge(ref_panel, gw_df, how='inner', indicator=True,
                             left_on ='key2', right_index=True)
-        merge_GWAS.loc[other_snp.index] = other_snp
+        merge_GWAS = pd.concat([other_snp, merge_GWAS])
+    merge_GWAS = merge_GWAS[~merge_GWAS.index.duplicated(keep='first')]
+    merge_GWAS.index.rename("snp_id", inplace=True)
    return(merge_GWAS)

--- a/main_preprocessing.py
+++ b/main_preprocessing.py
@@ -14,6 +14,7 @@ import matplotlib.pyplot as plt
 import jass_preprocessing as jp
 import pandas as pd
 import seaborn as sns
+import time
 perSS = 0.7
 netPath = "/mnt/atlas/"
@@ -33,18 +34,18 @@ def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN',
 out_summary = "summary_GWAS.csv"
 ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/'
 gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
+gwas_map.index
-GWAS_table = ["EUR.CD.gwas_filtered.assoc"]
+GWAS_table = gwas_map.index["GIANT_2015_HIP_COMBINED_EUR.txt"]#"EUR.CD.gwas_filtered.assoc"]
 # "GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", #"GWAS_SBP_recoded_dummy.txt"
 #              "GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt",
 #              "GIANT_2015_HIP_COMBINED_EUR.txt",
 #              ]
+for GWAS_filename in GWAS_table.index:
-for GWAS_filename in GWAS_table:
+    tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
+                           gwas_map.loc[GWAS_filename, 'outcome'])
-    tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'], gwas_map.loc[GWAS_filename, 'outcome'])
    print('processing GWAS: {}'.format(tag))
+    start = time.time()
    gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
    column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
@@ -52,23 +53,26 @@ for GWAS_filename in GWAS_table:
    mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
    gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
    ref = pd.read_csv(REF_filename, header=None, sep= "\t",
                      names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
                       index_col="snp_id")
-    inter_index = ref.index.intersection(gw_df.index)
+    if gw_df.index.map(str).str.contains("^chr*", case=False).any():
-    test_merge = pd.merge(ref.loc[inter_index], gw_df.loc[inter_index], how='inner',
+        ref['key2'] = "chr"+ref.chr.map(str) + ":" +ref.pos.map(str)
-                          indicator=True, left_index=True, right_index=True)
+        other_snp = pd.merge(ref, gw_df, how='inner', indicator=True,
+                             left_on ='key2', left_index=False, right_index=True)
-    print(jp.map_reference.map_on_ref_panel)
    mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
    mgwas = jp.map_reference.compute_snp_alignement(mgwas)
    mgwas = jp.compute_score.compute_z_score(mgwas)
    mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
+    end = time.time()
+    print("Preprocessing of {0} in {1}s".format(tag, end-start))
    jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
    jp.save_output.save_output(mgwas, ldscore_format, tag)
-    jp.save_output.save_output
 mgwas.reset_index(inplace=True)
 mgwas.sort_values(['chr', "pos"]).head(100)