From c10239e25d4fb905e46de8631fa738d97fe781d2 Mon Sep 17 00:00:00 2001 From: hanna julienne <hanna.julienne@pasteur.fr> Date: Thu, 29 Nov 2018 15:18:57 +0100 Subject: [PATCH] fixed command line arguments --- jass_preprocessing/__main__.py | 35 +++++++++++++++-------------- jass_preprocessing/compute_score.py | 4 ++-- setup.py | 2 +- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py index 46943b8..a4a9a56 100644 --- a/jass_preprocessing/__main__.py +++ b/jass_preprocessing/__main__.py @@ -13,7 +13,7 @@ import argparse #| variable name | description | current default value| #|---------------|-------------|----------------------| #| netPath | Main project folder, must end by "/" | /mnt/atlas/ | -#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' | +#| args.gwas_info* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/args.gwas_info.csv' | #| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'| #| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ | #| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ | @@ -27,55 +27,56 @@ def launch_preprocessing(args): """ Preprocessing GWAS dataset """ - gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0) + gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0) - tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'], - gwas_map.loc[GWAS_filename, 'outcome']) + tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'], + gwas_map.loc[args.gwas_filename, 'outcome']) print('processing GWAS: {}'.format(tag)) start = time.time() - gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path) - GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2] - mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels) + gwas = jp.map_gwas.gwas_internal_link(GWAS_table, args.gwas_folder) + GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2] + mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info) print(mapgw) gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) - ref = pd.read_csv(REF_filename, header=None, sep= "\t", + ref = pd.read_csv(args.ref_path, header=None, sep= "\t", names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], index_col="snp_id") mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) mgwas = jp.map_reference.compute_snp_alignement(mgwas) mgwas = jp.compute_score.compute_z_score(mgwas) - mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag) + mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size) + end = time.time() print("Preprocessing of {0} in {1}s".format(tag, end-start)) + jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag) - jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag) - jp.save_output.save_output(mgwas, ldscore_format, tag) + if(args.output_folder_1_file): + jp.save_output.save_output(mgwas, args.output_folder_1_file, tag) def add_preprocessing_argument(): parser = argparse.ArgumentParser() - parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs") - parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files") - parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)") - parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'") + parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize") + parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)") + parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'") + parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution") parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)") parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns") - + parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7) parser.set_defaults(func=launch_preprocessing) return parser def main(): - parser = add_preprocessing_argument() args = parser.parse_args() args.func(args) diff --git a/jass_preprocessing/compute_score.py b/jass_preprocessing/compute_score.py index fd29ff2..db0e324 100644 --- a/jass_preprocessing/compute_score.py +++ b/jass_preprocessing/compute_score.py @@ -6,7 +6,7 @@ import scipy.stats as ss import seaborn as sns import matplotlib.pyplot as plt -perSS = 0.7 + def compute_z_score(mgwas): """ @@ -27,7 +27,7 @@ def compute_z_score(mgwas): return mgwas -def compute_sample_size(mgwas, diagnostic_folder, trait): +def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7): if 'n' in mgwas.columns: myN = mgwas.n diff --git a/setup.py b/setup.py index aac8ae5..b0413ec 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup(name='jass_preprocessing', description='Preprocess and standardize heterogeneous GWAS summary statistic for JASS', url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing', author='Hugues Aschard, Hanna Julienne, Vincent Laville', - author_email='hugues.aschard@pasteur.fr', + author_email='hanna.julienne@pasteur.fr', license='MIT', #package_dir = {'': 'jass_preprocessing'}, packages= ['jass_preprocessing'], -- GitLab