From c10239e25d4fb905e46de8631fa738d97fe781d2 Mon Sep 17 00:00:00 2001
From: hanna julienne <hanna.julienne@pasteur.fr>
Date: Thu, 29 Nov 2018 15:18:57 +0100
Subject: [PATCH] fixed command line arguments

---
 jass_preprocessing/__main__.py      | 35 +++++++++++++++--------------
 jass_preprocessing/compute_score.py |  4 ++--
 setup.py                            |  2 +-
 3 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py
index 46943b8..a4a9a56 100644
--- a/jass_preprocessing/__main__.py
+++ b/jass_preprocessing/__main__.py
@@ -13,7 +13,7 @@ import argparse
 #| variable name | description | current default value|
 #|---------------|-------------|----------------------|
 #| netPath | Main project folder, must end by "/" | /mnt/atlas/ |
-#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' |
+#| args.gwas_info* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/args.gwas_info.csv' |
 #| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'|
 #| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ |
 #| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ |
@@ -27,55 +27,56 @@ def launch_preprocessing(args):
     """
     Preprocessing GWAS dataset
     """
-    gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
+    gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)
 
-    tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
-                           gwas_map.loc[GWAS_filename, 'outcome'])
+    tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'],
+                           gwas_map.loc[args.gwas_filename, 'outcome'])
 
     print('processing GWAS: {}'.format(tag))
     start = time.time()
-    gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
-    GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
-    mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
+    gwas = jp.map_gwas.gwas_internal_link(GWAS_table, args.gwas_folder)
+    GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2]
+    mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
     print(mapgw)
 
     gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
 
-    ref = pd.read_csv(REF_filename, header=None, sep= "\t",
+    ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
                       names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
                        index_col="snp_id")
 
     mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
     mgwas = jp.map_reference.compute_snp_alignement(mgwas)
     mgwas = jp.compute_score.compute_z_score(mgwas)
-    mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
+    mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
+
     end = time.time()
 
     print("Preprocessing of {0} in {1}s".format(tag, end-start))
+    jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)
 
-    jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
-    jp.save_output.save_output(mgwas, ldscore_format, tag)
+    if(args.output_folder_1_file):
+        jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)
 
 
 def add_preprocessing_argument():
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs")
-
     parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
-    parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)")
-    parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'")
+    parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
+    parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)")
+    parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'")
+    parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution")
 
     parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
     parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
-
+    parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
     parser.set_defaults(func=launch_preprocessing)
 
     return parser
 
 def main():
 
-
     parser = add_preprocessing_argument()
     args = parser.parse_args()
     args.func(args)
diff --git a/jass_preprocessing/compute_score.py b/jass_preprocessing/compute_score.py
index fd29ff2..db0e324 100644
--- a/jass_preprocessing/compute_score.py
+++ b/jass_preprocessing/compute_score.py
@@ -6,7 +6,7 @@ import scipy.stats as ss
 import seaborn as sns
 import matplotlib.pyplot as plt
 
-perSS = 0.7
+
 
 def compute_z_score(mgwas):
     """
@@ -27,7 +27,7 @@ def compute_z_score(mgwas):
 
     return mgwas
 
-def compute_sample_size(mgwas, diagnostic_folder, trait):
+def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7):
 
     if 'n' in mgwas.columns:
         myN = mgwas.n
diff --git a/setup.py b/setup.py
index aac8ae5..b0413ec 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ setup(name='jass_preprocessing',
       description='Preprocess and standardize heterogeneous GWAS summary statistic for JASS',
       url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
       author='Hugues Aschard, Hanna Julienne, Vincent Laville',
-      author_email='hugues.aschard@pasteur.fr',
+      author_email='hanna.julienne@pasteur.fr',
       license='MIT',
       #package_dir = {'': 'jass_preprocessing'},
       packages= ['jass_preprocessing'],
-- 
GitLab