diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py index ce5fb5eafc4147b11b77a6594954a69e70de9b8d..bee0a4fea0ec077888f6c029ef543d251dd50b0d 100644 --- a/jass_preprocessing/__main__.py +++ b/jass_preprocessing/__main__.py @@ -29,45 +29,46 @@ def launch_preprocessing(args): """ gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0) - tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'], - gwas_map.loc[args.gwas_filename, 'outcome']) + for gwas_filename in gwas_map[['filename']]: + tag = "{0}_{1}".format(gwas_map.loc[gwas_filename, 'consortia'], + gwas_map.loc[gwas_filename, 'outcome']) - print('processing GWAS: {}'.format(tag)) - start = time.time() - GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2] - mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) + print('processing GWAS: {}'.format(tag)) + start = time.time() + GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, gwas_filename)[2] + mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) - gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) + gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) - ref = pd.read_csv(args.ref_path, header=None, sep= "\t", - names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], - index_col="snp_id") + ref = pd.read_csv(args.ref_path, header=None, sep= "\t", + names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], + index_col="snp_id") - mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) - mgwas = jp.map_reference.compute_snp_alignement(mgwas) - mgwas = jp.compute_score.compute_z_score(mgwas) - mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size) + mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) + mgwas = jp.map_reference.compute_snp_alignement(mgwas) + mgwas = jp.compute_score.compute_z_score(mgwas) + mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size) - end = time.time() + end = time.time() - print("Preprocessing of {0} in {1}s".format(tag, end-start)) - jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag) + print("Preprocessing of {0} in {1}s".format(tag, end-start)) + jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag) - if(args.output_folder_1_file): - jp.save_output.save_output(mgwas, args.output_folder_1_file, tag) + if(args.output_folder_1_file): + jp.save_output.save_output(mgwas, args.output_folder_1_file, tag) def add_preprocessing_argument(): parser = argparse.ArgumentParser() - parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files") - parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize") + parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files with correct header") + #parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize") parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)") - parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'") + parser.add_argument('--input-folder', required=True, help= "Path to the folder containing the Raw GWASs summary statistic files, must end by '/'") parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution") parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)") - parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns") + parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns (useful to compute LDSC correlation for instance)") parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7) parser.set_defaults(func=launch_preprocessing)