Commit 31337253 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

write output while they are calculated

parent 3ad69c2e
...@@ -5,21 +5,29 @@ from impute_jass.imputation_launcher import ImputationLauncher ...@@ -5,21 +5,29 @@ from impute_jass.imputation_launcher import ImputationLauncher
def launch_chromosome_imputation(args): def launch_chromosome_imputation(args):
""" """
Function whose only purpose is to allow the calling of the ImputationLauncher.chromosome_imputation method Function allow the calling of the ImputationLauncher.chromosome_imputation
from an entry point method from an entry point
Args:
args (dict): Argument parsed from the command line see the
__main__.add_chromosome_imputation_argument(parser) function.
""" """
print("Imputation of {0} gwas for chromosome {1}".format(args.gwas, args.chrom)) print("Imputation of {0} gwas for chromosome {1}".format(args.gwas, args.chrom))
# Imputer settings
imputer = ImputationLauncher( window_size=int(args.window_size), buf=int(args.buffer_size), imputer = ImputationLauncher( window_size=int(args.window_size), buf=int(args.buffer_size),
lamb= float(args.l2_regularization), pinv_rcond = float(args.eigen_treshold)) lamb= float(args.l2_regularization), pinv_rcond = float(args.eigen_treshold))
# Reading of inputs
z_file = "{0}/z_{1}_{2}.txt".format(args.zscore_folder, args.gwas, args.chrom) z_file = "{0}/z_{1}_{2}.txt".format(args.zscore_folder, args.gwas, args.chrom)
zscore = pd.read_csv(z_file,index_col=0, sep="\t") zscore = pd.read_csv(z_file,index_col=0, sep="\t")
ref_panel_file = args.ref_folder + "/"+ args.chrom +".eur.1pct.bim" ref_panel_file = args.ref_folder + "/"+ args.chrom +".eur.1pct.bim"
ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1) ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
# imputation
imputed_zscore = imputer.chromosome_imputation(args.chrom, zscore, ref_panel, args.ld_folder) imputed_zscore = imputer.chromosome_imputation(args.chrom, zscore, ref_panel, args.ld_folder)
print("Imputation DONE") print("Imputation DONE")
# Saving results
z_fo = "{0}/z_{1}_{2}.txt".format(args.output_folder, args.gwas, args.chrom) z_fo = "{0}/z_{1}_{2}.txt".format(args.output_folder, args.gwas, args.chrom)
imputed_zscore.to_csv(z_fo, sep='\t') imputed_zscore.to_csv(z_fo, sep='\t')
print("Save imputation done at {0}".format(z_fo)) print("Save imputation done at {0}".format(z_fo))
...@@ -40,7 +48,6 @@ def add_chromosome_imputation_argument(parser): ...@@ -40,7 +48,6 @@ def add_chromosome_imputation_argument(parser):
parser.set_defaults(func=launch_chromosome_imputation) parser.set_defaults(func=launch_chromosome_imputation)
return(parser) return(parser)
def main(): def main():
parser = argparse.ArgumentParser()#prog='impute_jass') parser = argparse.ArgumentParser()#prog='impute_jass')
......
...@@ -15,16 +15,21 @@ class ImputationLauncher(object): ...@@ -15,16 +15,21 @@ class ImputationLauncher(object):
""" """
def __init__(self, window_size=10000, imputation_style="batch", buf=2500, lamb= 0.01, pinv_rcond = 0.01 ): def __init__(self, window_size=10000, imputation_style="batch", buf=2500, lamb= 0.01, pinv_rcond = 0.01 ):
""" """
Initialise the imputation object. Fix the windows size, the buffer size
and the king of imputation employed
Args: Args:
window_size (int): size of the imputation window in bp window_size (int): size of the imputation window in bp
imputation_style (str): define if the windows while span the genome in a non overlapping fashion ("batch") or imputation_style (str): define if the windows while span the genome
by being centered on each snp to impute ('online') in a non overlapping fashion ("batch") or
buffer (int): the size of the padding around the windows of imputation (relevant only for batch imputation) by being centered on each snp to impute
lamb (float): size of the increment added to snp correlation matrices to make it less singular ('online')
pinv_rcond (float): the rcond scipy.linalg.pinv function argument. The scipy.linalg.pinv is used to invert buffer (int): the size of the padding around the windows of
imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation
matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument.
The scipy.linalg.pinv is used to invert
the correlationmatrices the correlationmatrices
""" """
self.imputation_style = imputation_style self.imputation_style = imputation_style
self.window_size = window_size self.window_size = window_size
...@@ -34,7 +39,8 @@ class ImputationLauncher(object): ...@@ -34,7 +39,8 @@ class ImputationLauncher(object):
def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder): def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
""" """
Impute the panel zscore score for one chromosome and with the specified parameters Impute the panel zscore score for one chromosome and with the specified
parameters
Args: Args:
chrom : str specifying chromosome chrom : str specifying chromosome
...@@ -62,7 +68,7 @@ class ImputationLauncher(object): ...@@ -62,7 +68,7 @@ class ImputationLauncher(object):
def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output): def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output):
""" """
Launch imputation on all chromosome Launch imputation on all chromosome for one trait
""" """
......
...@@ -27,6 +27,7 @@ def realigned_zfiles_on_panel(ref_panel, zscore): ...@@ -27,6 +27,7 @@ def realigned_zfiles_on_panel(ref_panel, zscore):
If not, the coded and other allele are inverted and the zscore sign If not, the coded and other allele are inverted and the zscore sign
is inverted also. is inverted also.
""" """
sub_ref_panel = ref_panel.loc[zscore.index] sub_ref_panel = ref_panel.loc[zscore.index]
allele_inverted = (sub_ref_panel['Ref_all'] != zscore.A0) allele_inverted = (sub_ref_panel['Ref_all'] != zscore.A0)
...@@ -41,7 +42,7 @@ def prepare_zscore_for_imputation(ref_panel, zscore): ...@@ -41,7 +42,7 @@ def prepare_zscore_for_imputation(ref_panel, zscore):
""" """
Prepare the known Z score by realigning them on the reference ref_panel Prepare the known Z score by realigning them on the reference ref_panel
the snps that are not present in the ref panel are filtered the snps that are not present in the ref panel are filtered
""" """
zscore = realigned_zfiles_on_panel(ref_panel, zscore) zscore = realigned_zfiles_on_panel(ref_panel, zscore)
zscore['Var'] = -1 zscore['Var'] = -1
...@@ -94,7 +95,8 @@ def print_progression(i, Nwindows): ...@@ -94,7 +95,8 @@ def print_progression(i, Nwindows):
if i%(np.ceil(Nwindows/10)) == 0: if i%(np.ceil(Nwindows/10)) == 0:
print("{0}\%".format(np.round(i/Nwindows,3))) print("{0}\%".format(np.round(i/Nwindows,3)))
def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, rcond, unknowns=pd.Series([])): def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
rcond, file_output, unknowns=pd.Series([])):
""" """
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument Argument
...@@ -108,6 +110,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, ...@@ -108,6 +110,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
all_unknowns = ref_panel.loc[ref_panel.index.difference(zscore.index)] all_unknowns = ref_panel.loc[ref_panel.index.difference(zscore.index)]
zscore = prepare_zscore_for_imputation(ref_panel, zscore) zscore = prepare_zscore_for_imputation(ref_panel, zscore)
zscore.to_csv(z_fo, sep='\t')
zscore_results = zscore.copy(deep=True) zscore_results = zscore.copy(deep=True)
print("### Imputation of {0} snps ###".format(all_unknowns.shape[0])) print("### Imputation of {0} snps ###".format(all_unknowns.shape[0]))
...@@ -142,6 +145,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, ...@@ -142,6 +145,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
# keep only SNP with non negligible explained variance # keep only SNP with non negligible explained variance
snp_well_predicted = (batch_df.Var < 0.9) snp_well_predicted = (batch_df.Var < 0.9)
batch_df_filt = batch_df.loc[in_core_window & snp_well_predicted, zscore_results.columns] batch_df_filt = batch_df.loc[in_core_window & snp_well_predicted, zscore_results.columns]
batch_df_filt.to_csv(z_fo, sep='\t', mode = 'a')
zscore_results = pd.concat([zscore_results, batch_df_filt]) zscore_results = pd.concat([zscore_results, batch_df_filt])
i = i+1 i = i+1
print_progression(i, Nwindows) print_progression(i, Nwindows)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment