Commit 5fc49314 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

modify main

parent d644c800
Pipeline #14548 passed with stages
in 1 minute and 10 seconds
...@@ -29,45 +29,46 @@ def launch_preprocessing(args): ...@@ -29,45 +29,46 @@ def launch_preprocessing(args):
""" """
gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0) gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'], for gwas_filename in gwas_map[['filename']]:
gwas_map.loc[args.gwas_filename, 'outcome']) tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'],
gwas_map.loc[gwas_filename, 'outcome'])
print('processing GWAS: {}'.format(tag)) print('processing GWAS: {}'.format(tag))
start = time.time() start = time.time()
GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2] GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, gwas_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(args.ref_path, header=None, sep= "\t", ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id") index_col="snp_id")
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas) mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas) mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size) mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
end = time.time() end = time.time()
print("Preprocessing of {0} in {1}s".format(tag, end-start)) print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag) jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)
if(args.output_folder_1_file): if(args.output_folder_1_file):
jp.save_output.save_output(mgwas, args.output_folder_1_file, tag) jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)
def add_preprocessing_argument(): def add_preprocessing_argument():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files") parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files with correct header")
parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize") #parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)") parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'") parser.add_argument('--input-folder', required=True, help= "Path to the folder containing the Raw GWASs summary statistic files, must end by '/'")
parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution") parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution")
parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)") parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns") parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns (useful to compute LDSC correlation for instance)")
parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7) parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
parser.set_defaults(func=launch_preprocessing) parser.set_defaults(func=launch_preprocessing)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment