fixed command line arguments

c10239e2 · Hanna JULIENNE · c2563922 · c10239e2 · c10239e2 · c10239e2
Commit c10239e2 authored 6 years ago by Hanna JULIENNE
--- a/jass_preprocessing/__main__.py
+++ b/jass_preprocessing/__main__.py
@@ -13,7 +13,7 @@ import argparse
 #| variable name | description | current default value|
 #|---------------|-------------|----------------------|
 #| netPath | Main project folder, must end by "/" | /mnt/atlas/ |
-#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' |
+#| args.gwas_info* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/args.gwas_info.csv' |
 #| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'|
 #| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ |
 #| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ |
@@ -27,55 +27,56 @@ def launch_preprocessing(args):
    """
    Preprocessing GWAS dataset
    """
-    gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
+    gwas_map = pd.read_csv(args.gwas_info, sep="\t", index_col=0)

-    tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
-                           gwas_map.loc[GWAS_filename, 'outcome'])
+    tag = "{0}_{1}".format(gwas_map.loc[args.gwas_filename, 'consortia'],
+                           gwas_map.loc[args.gwas_filename, 'outcome'])

    print('processing GWAS: {}'.format(tag))
    start = time.time()
-    gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
-    GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
-    mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
+    gwas = jp.map_gwas.gwas_internal_link(GWAS_table, args.gwas_folder)
+    GWAS_link = jp.map_gwas.walkfs(args.gwas_folder, args.gwas_filename)[2]
+    mapgw = jp.map_gwas.map_columns_position(GWAS_link, args.gwas_info)
    print(mapgw)

    gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)

-    ref = pd.read_csv(REF_filename, header=None, sep= "\t",
+    ref = pd.read_csv(args.ref_path, header=None, sep= "\t",
                      names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
                       index_col="snp_id")

    mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
    mgwas = jp.map_reference.compute_snp_alignement(mgwas)
    mgwas = jp.compute_score.compute_z_score(mgwas)
-    mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
+    mgwas = jp.compute_score.compute_sample_size(mgwas, args.diagnostic_folder, tag, args.percent_sample_size)
+
    end = time.time()

    print("Preprocessing of {0} in {1}s".format(tag, end-start))
+    jp.save_output.save_output_by_chromosome(mgwas, args.output_folder, tag)

-    jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
-    jp.save_output.save_output(mgwas, ldscore_format, tag)
+    if(args.output_folder_1_file):
+        jp.save_output.save_output(mgwas, args.output_folder_1_file, tag)


 def add_preprocessing_argument():

    parser = argparse.ArgumentParser()
-    parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs")
-
    parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
-    parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)")
-    parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'")
+    parser.add_argument('--gwas-filename', required=True, help= "Name of the raw GWAS file to standardize")
+    parser.add_argument('--ref-path', required=True, help= "reference panel location (used to determine which snp to impute)")
+    parser.add_argument('--gwas-folder', required=True, help= "Path to the folder containing the GWASs summ stat files, must end by '/'")
+    parser.add_argument('--diagnostic-folder', required=True, help= "Path to the reporting information on the PreProcessing such as the SNPs sample size distribution")

    parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
    parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
-
+    parser.add_argument('--percent-sample-size', required=False, help= "the proportion (between 0 and 1) of the 90th percentile of the sample size used to filter the SNPs", default=0.7)
    parser.set_defaults(func=launch_preprocessing)

    return parser

 def main():

-
    parser = add_preprocessing_argument()
    args = parser.parse_args()
    args.func(args)

--- a/jass_preprocessing/compute_score.py
+++ b/jass_preprocessing/compute_score.py
@@ -6,7 +6,7 @@ import scipy.stats as ss
 import seaborn as sns
 import matplotlib.pyplot as plt

-perSS = 0.7
+

 def compute_z_score(mgwas):
    """
@@ -27,7 +27,7 @@ def compute_z_score(mgwas):

    return mgwas

-def compute_sample_size(mgwas, diagnostic_folder, trait):
+def compute_sample_size(mgwas, diagnostic_folder, trait, perSS = 0.7):

    if 'n' in mgwas.columns:
        myN = mgwas.n

--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ setup(name='jass_preprocessing',
      description='Preprocess and standardize heterogeneous GWAS summary statistic for JASS',
      url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
      author='Hugues Aschard, Hanna Julienne, Vincent Laville',
-      author_email='hugues.aschard@pasteur.fr',
+      author_email='hanna.julienne@pasteur.fr',
      license='MIT',
      #package_dir = {'': 'jass_preprocessing'},
      packages= ['jass_preprocessing'],