command line tool

95d3c141 · hjulienne · 5a81ea93 · 95d3c141 · 95d3c141 · 95d3c141
Commit 95d3c141 authored 6 years ago by hjulienne
--- a/doc/_build/doctrees/_autosummary/jass_preprocessing.doctree
+++ b/doc/_build/doctrees/_autosummary/jass_preprocessing.doctree
--- a/doc/_build/doctrees/_autosummary/modules.doctree
+++ b/doc/_build/doctrees/_autosummary/modules.doctree
--- a/doc/_build/doctrees/environment.pickle
+++ b/doc/_build/doctrees/environment.pickle
--- a/doc/_build/doctrees/index.doctree
+++ b/doc/_build/doctrees/index.doctree
--- a/doc/_build/html/_autosummary/jass_preprocessing.html
+++ b/doc/_build/html/_autosummary/jass_preprocessing.html
@@ -309,6 +309,29 @@ Make sure that the same SNPs are in the reference panel and the gwas</p>
 </div>
 <div class="section" id="module-jass_preprocessing">
 <span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-jass_preprocessing" title="Permalink to this headline">¶</a></h2>
+<table border="1" class="longtable docutils">
+<colgroup>
+<col width="10%" />
+<col width="90%" />
+</colgroup>
+<tbody valign="top">
+<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.map_gwas" title="jass_preprocessing.map_gwas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_gwas</span></code></a></td>
+<td>Map GWAS</td>
+</tr>
+<tr class="row-even"><td><a class="reference internal" href="#module-jass_preprocessing.dna_utils" title="jass_preprocessing.dna_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dna_utils</span></code></a></td>
+<td>Few fonction to to compute DNA complement</td>
+</tr>
+<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.map_reference" title="jass_preprocessing.map_reference"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_reference</span></code></a></td>
+<td>Module of function</td>
+</tr>
+<tr class="row-even"><td><a class="reference internal" href="#module-jass_preprocessing.compute_score" title="jass_preprocessing.compute_score"><code class="xref py py-obj docutils literal notranslate"><span class="pre">compute_score</span></code></a></td>
+<td></td>
+</tr>
+<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.save_output" title="jass_preprocessing.save_output"><code class="xref py py-obj docutils literal notranslate"><span class="pre">save_output</span></code></a></td>
+<td></td>
+</tr>
+</tbody>
+</table>
 </div>
 </div>


--- a/doc/_build/html/_autosummary/modules.html
+++ b/doc/_build/html/_autosummary/modules.html
@@ -74,7 +74,9 @@
 <li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.map_gwas">jass_preprocessing.map_gwas module</a></li>
 <li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.map_reference">jass_preprocessing.map_reference module</a></li>
 <li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.save_output">jass_preprocessing.save_output module</a></li>
-<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing">Module contents</a></li>
+<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing">Module contents</a><ul class="simple">
+</ul>
+</li>
 </ul>
 </li>
 </ul>

--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -24,6 +24,8 @@ The QC and preprocessing step goes as follow:
 * Select GWAS SNPs that are in the input reference panel
 * Align coded allele of the GWAS with the reference panel
 * Infer Number of sample by SNPs if not present in input data
+* Filter SNPs with a small sample size
+* Normalize the effect size by sample size to have Z-scores
 * Save the output by chromosome as the following example:

 +----------+-------+------+-----+--------+

--- a/jass_preprocessing/__main__.py
+++ b/jass_preprocessing/__main__.py
+"""
+Read raw GWAS summary statistics, filter and format
+Write clean GWAS datasets by chromosome
+"""
+__updated__ = '2018-26-06'
+
+import pandas as pd
+import jass_preprocessing as jp
+import time
+import argparse
+
+
+#| variable name | description | current default value|
+#|---------------|-------------|----------------------|
+#| netPath | Main project folder, must end by "/" | /mnt/atlas/ |
+#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' |
+#| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'|
+#| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ |
+#| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ |
+#| REF_filename* | file containing the reference panel for imputation | netPath+'PCMA/0._REF/1KGENOME/summary_genome_Filter_part2.out' |
+#| pathOUT | **unused in main_preprocessing.py**  | netPath+'PCMA/1._DATA/RAW.summary/'|
+#| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |
+
+
+
+def launch_preprocessing(args):
+    """
+    Preprocessing GWAS dataset
+    """
+    gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
+
+    tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
+                           gwas_map.loc[GWAS_filename, 'outcome'])
+
+    print('processing GWAS: {}'.format(tag))
+    start = time.time()
+    gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
+    GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
+    mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
+    print(mapgw)
+
+    gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
+
+    ref = pd.read_csv(REF_filename, header=None, sep= "\t",
+                      names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
+                       index_col="snp_id")
+
+    mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
+    mgwas = jp.map_reference.compute_snp_alignement(mgwas)
+    mgwas = jp.compute_score.compute_z_score(mgwas)
+    mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
+    end = time.time()
+
+    print("Preprocessing of {0} in {1}s".format(tag, end-start))
+
+    jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
+    jp.save_output.save_output(mgwas, ldscore_format, tag)
+
+
+def add_preprocessing_argument():
+
+    parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs")
+
+    parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
+    parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)")
+    parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'")
+
+    parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
+    parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
+
+    parser.set_defaults(func=launch_preprocessing)
+
+
+def main():
+
+    parser = argparse.ArgumentParser()#prog='impute_jass')
+    parser = add_preprocessing_argument(parser)
+    args = parser.parse_args()
+    args.func(args)
+
+
+if __name__=="__main__":
+    main()
--- a/setup.py
+++ b/setup.py
@@ -9,6 +9,14 @@ setup(name='jass_preprocessing',
      license='MIT',
      #package_dir = {'': 'jass_preprocessing'},
      packages= ['jass_preprocessing'],
-      zip_safe=False)
+      zip_safe=False,
+            install_requires=[
+          'scipy', 'numpy', 'pandas', 'seaborn'
+      ],
+      entry_points={
+          'console_scripts' : [
+            'jass_preprocessing = jass_preprocessing.__main__:main'
+          ]
+      })

 #, "jass_preprocessing.map_gwas","jass_preprocessing.dna_utils", "jass_preprocessing.map_reference","jass_preprocessing.compute_score", "jass_preprocessing.save_output"