Skip to content
Snippets Groups Projects
Commit 2954eacb authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

fixed diagnostic plot

parent 1a6d0f05
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import numpy as np
import jass_preprocessing.dna_utils as dna_u
import warnings
import scipy.stats as ss
import seaborn as sns
import matplotlib.pyplot as plt
perSS = 0.7
def compute_z_score(mgwas):
"""
Compute zscore value and sign1
add the corresponding column to the mgwas dataframe
"""
mgwas["computed_z"] = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * np.sign(mgwas.z) * mgwas["sign_flip"]
return mgwas
def compute_sample_size(mgwas, diagnostic_folder, trait ):
if 'n' in mgwas.columns:
myN = mgwas.n
#--- freq, case-cont N exist
elif(('ncas' in mgwas.columns) & ('ncont' in mgwas.columns)):
sumN = mgwas.ncas + mgwas.ncont
perCase = mgwas.ncas / sumN
myN = sumN * perCase * (1-perCase)
#---se, freq exist and QT
elif(('se' in mgwas.columns) & ('freq' in mgwas.columns)):
myN = 1/(mgwas.freq * (1-mgwas.freq)* 2 * mgwas.se**2)
#---se exists but no freq, // use ref panel freq
elif(('se' in mgwas.columns) & ('freq' not in mgwas.columns)):
myN = 1/(mgwas.MAF * (1-mgwas.MAF)* 2 * mgwas.se**2)
else:
raise ValueError(
'Both Sample size and SE(beta) are missing: sample size filtering cannot be applied')
# replace infinite value by the max finite one
if(np.isinf(myN).any()):
myN[np.isinf(myN)] = np.max(myN[np.isfinite(myN)])
warnings.warn("Some snp had an infinite sample size")
myW_thres = np.percentile(myN.dropna(), 90)
ss_thres = perSS * myW_thres
mgwas["computed_N"] = myN
plt.clf()
p1 = sns.distplot(mgwas.computed_N)
p1.axvline(x=ss_thres)
fo = "{0}/Sample_size_distribution_{1}.png".format(diagnostic_folder, trait)
p1.figure.savefig(fo)
# Filter SNP with a too small sample _SampleSize
print("NSNP before sample size filtering: {}".format(mgwas.shape[0]))
mgwas = mgwas.loc[(myN >= ss_thres)]
print("NSNP after sample size filtering: {}".format(mgwas.shape[0]))
return(mgwas)
......@@ -7,16 +7,19 @@ def save_output_by_chromosome(mgwas, ImpG_output_Folder, my_study):
Write the preprocessed Gwas
"""
mgwas.reset_index(inplace=True)
mgwas = pd.DataFrame({
'rsID': mgwas_chr.snp_id,
'pos': mgwas_chr.pos,
'A0': mgwas_chr.ref,
'A1':mgwas_chr.alt,
'Z': mgwas_chr.computed_z
}, columns= ['rsID', 'pos', 'A0', "A1", "Z" ])
mgwas.set_index("chr", inplace=True)
mgwas.dropna(subset=["Z"], how="any", inplace=True)
for chrom in mgwas.index.unique:
impg_output_file = ImpG_output_Folder + 'z_'+ my_study +'_chr_'+str(chrom)+".txt"
print("WRITING CHR {} results for {} to:".format(chrom, my_study))
mgwas.loc[chrom].to_csv(impg_output_file, sep="\t", index=False)
mgwas.dropna(subset=["computed_z"], how="any", inplace=True)
for chrom in mgwas.index.unique():
mgwas_chr = pd.DataFrame({
'rsID': mgwas.loc[chrom].snp_id,
'pos': mgwas.loc[chrom].pos,
'A0': mgwas.loc[chrom].ref,
'A1':mgwas.loc[chrom].alt,
'Z': mgwas.loc[chrom].computed_z
}, columns= ['rsID', 'pos', 'A0', "A1", "Z" ])
impg_output_file = ImpG_output_Folder + 'z_'+ my_study +'_chr'+str(chrom)+".txt"
print("WRITING CHR {} results for {} to: {}".format(chrom, my_study, ImpG_output_Folder))
mgwas_chr.sort_values(by="pos").to_csv(impg_output_file, sep="\t", index=False)
......@@ -32,11 +32,12 @@ def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN',
out_summary = "summary_GWAS.csv"
ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/'
GWAS_labels
gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
GWAS_table = ["GWAS_SBP_recoded_dummy.txt"]#["GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt",
#"GWAS_PP_recoded.txt","GWAS_SBP_recoded_dummy.txt"]
GWAS_table = ["GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt",
"GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt",
"GIANT_2015_HIP_COMBINED_EUR.txt", "MAGIC_HbA1C.txt",
"EUR.CD.gwas_filtered.assoc"]
for GWAS_filename in GWAS_table:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment