Commit 31337253 authored by Hanna  JULIENNE's avatar Hanna JULIENNE

write output while they are calculated

parent 3ad69c2e
......@@ -5,21 +5,29 @@ from impute_jass.imputation_launcher import ImputationLauncher
def launch_chromosome_imputation(args):
"""
Function whose only purpose is to allow the calling of the ImputationLauncher.chromosome_imputation method
from an entry point
Function allow the calling of the ImputationLauncher.chromosome_imputation
method from an entry point
Args:
args (dict): Argument parsed from the command line see the
__main__.add_chromosome_imputation_argument(parser) function.
"""
print("Imputation of {0} gwas for chromosome {1}".format(args.gwas, args.chrom))
# Imputer settings
imputer = ImputationLauncher( window_size=int(args.window_size), buf=int(args.buffer_size),
lamb= float(args.l2_regularization), pinv_rcond = float(args.eigen_treshold))
# Reading of inputs
z_file = "{0}/z_{1}_{2}.txt".format(args.zscore_folder, args.gwas, args.chrom)
zscore = pd.read_csv(z_file,index_col=0, sep="\t")
ref_panel_file = args.ref_folder + "/"+ args.chrom +".eur.1pct.bim"
ref_panel = pd.read_csv(ref_panel_file, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
# imputation
imputed_zscore = imputer.chromosome_imputation(args.chrom, zscore, ref_panel, args.ld_folder)
print("Imputation DONE")
# Saving results
z_fo = "{0}/z_{1}_{2}.txt".format(args.output_folder, args.gwas, args.chrom)
imputed_zscore.to_csv(z_fo, sep='\t')
print("Save imputation done at {0}".format(z_fo))
......@@ -40,7 +48,6 @@ def add_chromosome_imputation_argument(parser):
parser.set_defaults(func=launch_chromosome_imputation)
return(parser)
def main():
parser = argparse.ArgumentParser()#prog='impute_jass')
......
......@@ -15,16 +15,21 @@ class ImputationLauncher(object):
"""
def __init__(self, window_size=10000, imputation_style="batch", buf=2500, lamb= 0.01, pinv_rcond = 0.01 ):
"""
Initialise the imputation object. Fix the windows size, the buffer size
and the king of imputation employed
Args:
window_size (int): size of the imputation window in bp
imputation_style (str): define if the windows while span the genome in a non overlapping fashion ("batch") or
by being centered on each snp to impute ('online')
buffer (int): the size of the padding around the windows of imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument. The scipy.linalg.pinv is used to invert
imputation_style (str): define if the windows while span the genome
in a non overlapping fashion ("batch") or
by being centered on each snp to impute
('online')
buffer (int): the size of the padding around the windows of
imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation
matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument.
The scipy.linalg.pinv is used to invert
the correlationmatrices
"""
self.imputation_style = imputation_style
self.window_size = window_size
......@@ -34,7 +39,8 @@ class ImputationLauncher(object):
def chromosome_imputation(self, chrom, zscore, ref_panel, ld_folder):
"""
Impute the panel zscore score for one chromosome and with the specified parameters
Impute the panel zscore score for one chromosome and with the specified
parameters
Args:
chrom : str specifying chromosome
......@@ -62,7 +68,7 @@ class ImputationLauncher(object):
def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output):
"""
Launch imputation on all chromosome
Launch imputation on all chromosome for one trait
"""
......
......@@ -27,6 +27,7 @@ def realigned_zfiles_on_panel(ref_panel, zscore):
If not, the coded and other allele are inverted and the zscore sign
is inverted also.
"""
sub_ref_panel = ref_panel.loc[zscore.index]
allele_inverted = (sub_ref_panel['Ref_all'] != zscore.A0)
......@@ -41,7 +42,7 @@ def prepare_zscore_for_imputation(ref_panel, zscore):
"""
Prepare the known Z score by realigning them on the reference ref_panel
the snps that are not present in the ref panel are filtered
"""
zscore = realigned_zfiles_on_panel(ref_panel, zscore)
zscore['Var'] = -1
......@@ -94,7 +95,8 @@ def print_progression(i, Nwindows):
if i%(np.ceil(Nwindows/10)) == 0:
print("{0}\%".format(np.round(i/Nwindows,3)))
def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb, rcond, unknowns=pd.Series([])):
def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
rcond, file_output, unknowns=pd.Series([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
......@@ -108,6 +110,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
all_unknowns = ref_panel.loc[ref_panel.index.difference(zscore.index)]
zscore = prepare_zscore_for_imputation(ref_panel, zscore)
zscore.to_csv(z_fo, sep='\t')
zscore_results = zscore.copy(deep=True)
print("### Imputation of {0} snps ###".format(all_unknowns.shape[0]))
......@@ -142,6 +145,7 @@ def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
# keep only SNP with non negligible explained variance
snp_well_predicted = (batch_df.Var < 0.9)
batch_df_filt = batch_df.loc[in_core_window & snp_well_predicted, zscore_results.columns]
batch_df_filt.to_csv(z_fo, sep='\t', mode = 'a')
zscore_results = pd.concat([zscore_results, batch_df_filt])
i = i+1
print_progression(i, Nwindows)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment