Skip to content
Snippets Groups Projects
Commit 664bf830 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

Adapt effect size detection to binary case

parent f436305c
No related branches found
No related tags found
No related merge requests found
...@@ -13,7 +13,18 @@ def compute_z_score(mgwas): ...@@ -13,7 +13,18 @@ def compute_z_score(mgwas):
Compute zscore value and sign1 Compute zscore value and sign1
add the corresponding column to the mgwas dataframe add the corresponding column to the mgwas dataframe
""" """
mgwas["computed_z"] = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * np.sign(mgwas.z) * mgwas["sign_flip"]
if 'z' in mgwas.columns:
sign_vect = np.sign(mgwas.z)
else:
if "OR" in mgwas.columns:
sign_vect = np.sign(mgwas["OR"] - 1.0 + 10**(-8))
else:
raise ValueError(
'The gwas data frame doesn"t contain effect column')
mgwas["computed_z"] = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * sign_vect * mgwas["sign_flip"]
return mgwas return mgwas
def compute_sample_size(mgwas, diagnostic_folder, trait): def compute_sample_size(mgwas, diagnostic_folder, trait):
......
...@@ -33,8 +33,11 @@ def map_on_ref_panel(gw_df , ref_panel): ...@@ -33,8 +33,11 @@ def map_on_ref_panel(gw_df , ref_panel):
ref_panel['key2'] = "chr"+ref_panel.chr.map(str) + ":" +ref_panel.pos.map(str) ref_panel['key2'] = "chr"+ref_panel.chr.map(str) + ":" +ref_panel.pos.map(str)
other_snp = pd.merge(ref_panel, gw_df, how='inner', indicator=True, other_snp = pd.merge(ref_panel, gw_df, how='inner', indicator=True,
left_on ='key2', right_index=True) left_on ='key2', right_index=True)
merge_GWAS.loc[other_snp.index] = other_snp merge_GWAS = pd.concat([other_snp, merge_GWAS])
merge_GWAS = merge_GWAS[~merge_GWAS.index.duplicated(keep='first')]
merge_GWAS.index.rename("snp_id", inplace=True)
return(merge_GWAS) return(merge_GWAS)
......
...@@ -14,6 +14,7 @@ import matplotlib.pyplot as plt ...@@ -14,6 +14,7 @@ import matplotlib.pyplot as plt
import jass_preprocessing as jp import jass_preprocessing as jp
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import time
perSS = 0.7 perSS = 0.7
netPath = "/mnt/atlas/" netPath = "/mnt/atlas/"
...@@ -33,18 +34,18 @@ def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', ...@@ -33,18 +34,18 @@ def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN',
out_summary = "summary_GWAS.csv" out_summary = "summary_GWAS.csv"
ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/' ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/'
gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0) gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
gwas_map.index
GWAS_table = ["EUR.CD.gwas_filtered.assoc"] GWAS_table = gwas_map.index["GIANT_2015_HIP_COMBINED_EUR.txt"]#"EUR.CD.gwas_filtered.assoc"]
# "GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", #"GWAS_SBP_recoded_dummy.txt" # "GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", #"GWAS_SBP_recoded_dummy.txt"
# "GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt", # "GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt",
# "GIANT_2015_HIP_COMBINED_EUR.txt", # "GIANT_2015_HIP_COMBINED_EUR.txt",
# ] # ]
for GWAS_filename in GWAS_table.index:
for GWAS_filename in GWAS_table: tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
gwas_map.loc[GWAS_filename, 'outcome'])
tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'], gwas_map.loc[GWAS_filename, 'outcome'])
print('processing GWAS: {}'.format(tag)) print('processing GWAS: {}'.format(tag))
start = time.time()
gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path) gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na') column_dict = pd.read_csv(GWAS_labels, sep='\t', na_values='na')
...@@ -52,23 +53,26 @@ for GWAS_filename in GWAS_table: ...@@ -52,23 +53,26 @@ for GWAS_filename in GWAS_table:
mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels) mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(REF_filename, header=None, sep= "\t", ref = pd.read_csv(REF_filename, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id") index_col="snp_id")
inter_index = ref.index.intersection(gw_df.index) if gw_df.index.map(str).str.contains("^chr*", case=False).any():
test_merge = pd.merge(ref.loc[inter_index], gw_df.loc[inter_index], how='inner', ref['key2'] = "chr"+ref.chr.map(str) + ":" +ref.pos.map(str)
indicator=True, left_index=True, right_index=True) other_snp = pd.merge(ref, gw_df, how='inner', indicator=True,
left_on ='key2', left_index=False, right_index=True)
print(jp.map_reference.map_on_ref_panel)
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas) mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas) mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag) mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
end = time.time()
print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag) jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
jp.save_output.save_output(mgwas, ldscore_format, tag) jp.save_output.save_output(mgwas, ldscore_format, tag)
jp.save_output.save_output
mgwas.reset_index(inplace=True) mgwas.reset_index(inplace=True)
mgwas.sort_values(['chr', "pos"]).head(100) mgwas.sort_values(['chr', "pos"]).head(100)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment