Skip to content
Snippets Groups Projects
Commit a2448fd0 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

add function to save whole GWAS

parent 2954eacb
Branches
Tags
No related merge requests found
...@@ -16,7 +16,7 @@ def compute_z_score(mgwas): ...@@ -16,7 +16,7 @@ def compute_z_score(mgwas):
mgwas["computed_z"] = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * np.sign(mgwas.z) * mgwas["sign_flip"] mgwas["computed_z"] = np.sqrt(ss.chi2.isf(mgwas['pval'], 1)) * np.sign(mgwas.z) * mgwas["sign_flip"]
return mgwas return mgwas
def compute_sample_size(mgwas, diagnostic_folder, trait ): def compute_sample_size(mgwas, diagnostic_folder, trait):
if 'n' in mgwas.columns: if 'n' in mgwas.columns:
myN = mgwas.n myN = mgwas.n
......
...@@ -16,7 +16,9 @@ def read_reference(gwas_reference_panel): ...@@ -16,7 +16,9 @@ def read_reference(gwas_reference_panel):
def map_on_ref_panel(gw_df , ref_panel): def map_on_ref_panel(gw_df , ref_panel):
""" """
Merge Gwas df with the reference panel Merge Gwas dataframe with the reference panel
Make sure that the same SNPs are in the reference panel and the gwas
""" """
def key2(x): def key2(x):
return ('chr' + str(x["chr"]) + ":" + str(x["pos"])) return ('chr' + str(x["chr"]) + ":" + str(x["pos"]))
...@@ -35,6 +37,11 @@ def map_on_ref_panel(gw_df , ref_panel): ...@@ -35,6 +37,11 @@ def map_on_ref_panel(gw_df , ref_panel):
def compute_is_flipped(mgwas): def compute_is_flipped(mgwas):
"""
Check if the reference panel and the GWAS data have the same reference
allele. return a boolean vector.
"""
flipped = pd.DataFrame({"ref_flipped" : (mgwas.ref == mgwas.a2), "alt_flipped" : (mgwas.alt == mgwas.a1)}) flipped = pd.DataFrame({"ref_flipped" : (mgwas.ref == mgwas.a2), "alt_flipped" : (mgwas.alt == mgwas.a1)})
flipped_complement = pd.DataFrame({"ref_flippedc" : (mgwas.ref == mgwas.a2c), "alt_flippedc" : (mgwas.alt == mgwas.a1c)}) flipped_complement = pd.DataFrame({"ref_flippedc" : (mgwas.ref == mgwas.a2c), "alt_flippedc" : (mgwas.alt == mgwas.a1c)})
...@@ -44,6 +51,13 @@ def compute_is_flipped(mgwas): ...@@ -44,6 +51,13 @@ def compute_is_flipped(mgwas):
return is_flipped return is_flipped
def compute_is_aligned(mgwas): def compute_is_aligned(mgwas):
"""
Check if the reference panel and the GWAS data have the same reference
allele. return a boolean vector.
The function should be the complement of "is_flipped" but we still compute
the two function to eventually detect weird cases (more than two alleles for
instance)
"""
aligned = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1), "alt_ok" : (mgwas.alt == mgwas.a2)}) aligned = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1), "alt_ok" : (mgwas.alt == mgwas.a2)})
aligned_complement = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1c), "alt_ok" : (mgwas.alt == mgwas.a2c)}) aligned_complement = pd.DataFrame({"ref_ok" : (mgwas.ref == mgwas.a1c), "alt_ok" : (mgwas.alt == mgwas.a2c)})
...@@ -53,7 +67,6 @@ def compute_is_aligned(mgwas): ...@@ -53,7 +67,6 @@ def compute_is_aligned(mgwas):
return is_aligned return is_aligned
def compute_snp_alignement(mgwas): def compute_snp_alignement(mgwas):
""" """
Add a column to mgwas indicating if the reference and coded Add a column to mgwas indicating if the reference and coded
allele is flipped compared to the reference panel. allele is flipped compared to the reference panel.
......
...@@ -4,22 +4,46 @@ import pandas as pd ...@@ -4,22 +4,46 @@ import pandas as pd
def save_output_by_chromosome(mgwas, ImpG_output_Folder, my_study): def save_output_by_chromosome(mgwas, ImpG_output_Folder, my_study):
""" """
Write the preprocessed Gwas Write the preprocessed Gwas for imputation
""" """
mgwas.reset_index(inplace=True) mgwas_copy = mgwas.reset_index(inplace=False)
mgwas.set_index("chr", inplace=True) mgwas_copy.set_index("chr", inplace=True)
mgwas.dropna(subset=["computed_z"], how="any", inplace=True) mgwas_copy.dropna(subset=["computed_z"], how="any", inplace=True)
for chrom in mgwas.index.unique(): for chrom in mgwas_copy.index.unique():
mgwas_chr = pd.DataFrame({ mgwas_chr = pd.DataFrame({
'rsID': mgwas.loc[chrom].snp_id, 'rsID': mgwas_copy.loc[chrom].snp_id,
'pos': mgwas.loc[chrom].pos, 'pos': mgwas_copy.loc[chrom].pos,
'A0': mgwas.loc[chrom].ref, 'A0': mgwas_copy.loc[chrom].ref,
'A1':mgwas.loc[chrom].alt, 'A1':mgwas_copy.loc[chrom].alt,
'Z': mgwas.loc[chrom].computed_z 'Z': mgwas_copy.loc[chrom].computed_z,
}, columns= ['rsID', 'pos', 'A0', "A1", "Z" ]) 'P': mgwas_copy.loc[chrom].pval
}, columns= ['rsID', 'pos', 'A0', "A1", "Z", "P" ])
impg_output_file = ImpG_output_Folder + 'z_'+ my_study +'_chr'+str(chrom)+".txt" impg_output_file = ImpG_output_Folder + 'z_'+ my_study +'_chr'+str(chrom)+".txt"
print("WRITING CHR {} results for {} to: {}".format(chrom, my_study, ImpG_output_Folder)) print("WRITING CHR {} results for {} to: {}".format(chrom, my_study, ImpG_output_Folder))
mgwas_chr.sort_values(by="pos").to_csv(impg_output_file, sep="\t", index=False) mgwas_chr.sort_values(by="pos").to_csv(impg_output_file, sep="\t", index=False)
def save_output(mgwas, ImpG_output_Folder, my_study):
"""
Write the preprocessed Gwas for ldscore analysis
"""
mgwas_copy = mgwas.reset_index(inplace=False)
mgwas_copy.dropna(subset=["computed_z"], how="any", inplace=True)
mgwas_copy = pd.DataFrame({
'chrom':mgwas_copy.chr,
'rsID': mgwas_copy.snp_id,
'pos': mgwas_copy.pos,
'A0': mgwas_copy.ref,
'A1':mgwas_copy.alt,
'Z': mgwas_copy.computed_z,
'P': mgwas_copy.pval
}, columns= ['chrom','rsID', 'pos', 'A0', "A1", "Z", "P" ])
impg_output_file = ImpG_output_Folder + 'z_'+ my_study +".txt"
print("WRITING results for {} to: {}".format( my_study, ImpG_output_Folder))
print(mgwas_copy.head())
mgwas_copy.sort_values(['chrom','pos']).to_csv(impg_output_file, sep="\t", index=False)
...@@ -21,6 +21,7 @@ GWAS_labels = netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' ...@@ -21,6 +21,7 @@ GWAS_labels = netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv'
GWAS_path = netPath+'PCMA/1._DATA/RAW.GWAS/' GWAS_path = netPath+'PCMA/1._DATA/RAW.GWAS/'
diagnostic_folder= "/mnt/atlas/PCMA/1._DATA/sample_size_distribution/" diagnostic_folder= "/mnt/atlas/PCMA/1._DATA/sample_size_distribution/"
ldscore_format="/mnt/atlas/PCMA/1._DATA/ldscore_data/"
REF_filename = netPath+'PCMA/0._REF/1KGENOME/summary_genome_Filter_part2.out' REF_filename = netPath+'PCMA/0._REF/1KGENOME/summary_genome_Filter_part2.out'
pathOUT = netPath+'PCMA/1._DATA/RAW.summary/' pathOUT = netPath+'PCMA/1._DATA/RAW.summary/'
...@@ -31,13 +32,13 @@ def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', ...@@ -31,13 +32,13 @@ def_missing = ['', '#N/A', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN',
out_summary = "summary_GWAS.csv" out_summary = "summary_GWAS.csv"
ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/' ImpG_output_Folder = netPath+ 'PCMA/1._DATA/preprocessing_test/'
gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0) gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
GWAS_table = ["GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", GWAS_table = ["EUR.CD.gwas_filtered.assoc"]
"GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt", # "GWAS_DBP_recoded.txt","GWAS_MAP_recoded.txt", #"GWAS_SBP_recoded_dummy.txt"
"GIANT_2015_HIP_COMBINED_EUR.txt", "MAGIC_HbA1C.txt", # "GWAS_PP_recoded.txt","GWAS_SBP_recoded.txt",
"EUR.CD.gwas_filtered.assoc"] # "GIANT_2015_HIP_COMBINED_EUR.txt",
# ]
for GWAS_filename in GWAS_table: for GWAS_filename in GWAS_table:
...@@ -66,3 +67,8 @@ for GWAS_filename in GWAS_table: ...@@ -66,3 +67,8 @@ for GWAS_filename in GWAS_table:
mgwas = jp.compute_score.compute_z_score(mgwas) mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag) mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag) jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
jp.save_output.save_output(mgwas, ldscore_format, tag)
jp.save_output.save_output
mgwas.reset_index(inplace=True)
mgwas.sort_values(['chr', "pos"]).head(100)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment