diff --git a/doc/_build/doctrees/_autosummary/jass_preprocessing.doctree b/doc/_build/doctrees/_autosummary/jass_preprocessing.doctree index 71b012fddaaf7515abcbd655dcdb0cd6a0ff43e4..d0913191263b087a2eb7716892dc73279b33e9cb 100644 Binary files a/doc/_build/doctrees/_autosummary/jass_preprocessing.doctree and b/doc/_build/doctrees/_autosummary/jass_preprocessing.doctree differ diff --git a/doc/_build/doctrees/_autosummary/modules.doctree b/doc/_build/doctrees/_autosummary/modules.doctree index 464e859d647dab68e6b5f1392379fd2ed9be44be..d81bf9478aba49c75c023312084bedfb7da10a8b 100644 Binary files a/doc/_build/doctrees/_autosummary/modules.doctree and b/doc/_build/doctrees/_autosummary/modules.doctree differ diff --git a/doc/_build/doctrees/environment.pickle b/doc/_build/doctrees/environment.pickle index a24532d06bf33310b5c4cf25cc0a00d6a42f8f4e..a25385815e8157b8176f6a3de98f8f3998f8d13c 100644 Binary files a/doc/_build/doctrees/environment.pickle and b/doc/_build/doctrees/environment.pickle differ diff --git a/doc/_build/doctrees/index.doctree b/doc/_build/doctrees/index.doctree index 15285561566ce5809c79bedfa141044075072575..5b61a86be1dbf42d20c6b122cd6a38b0dc169d47 100644 Binary files a/doc/_build/doctrees/index.doctree and b/doc/_build/doctrees/index.doctree differ diff --git a/doc/_build/html/_autosummary/jass_preprocessing.html b/doc/_build/html/_autosummary/jass_preprocessing.html index 7fbe1ec99fb5053206f298928718f14128ae2cbb..875b0e9feb52089a9ac7c1559ccc57390f565637 100644 --- a/doc/_build/html/_autosummary/jass_preprocessing.html +++ b/doc/_build/html/_autosummary/jass_preprocessing.html @@ -309,6 +309,29 @@ Make sure that the same SNPs are in the reference panel and the gwas</p> </div> <div class="section" id="module-jass_preprocessing"> <span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-jass_preprocessing" title="Permalink to this headline">¶</a></h2> +<table border="1" class="longtable docutils"> +<colgroup> +<col width="10%" /> +<col width="90%" /> +</colgroup> +<tbody valign="top"> +<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.map_gwas" title="jass_preprocessing.map_gwas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_gwas</span></code></a></td> +<td>Map GWAS</td> +</tr> +<tr class="row-even"><td><a class="reference internal" href="#module-jass_preprocessing.dna_utils" title="jass_preprocessing.dna_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dna_utils</span></code></a></td> +<td>Few fonction to to compute DNA complement</td> +</tr> +<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.map_reference" title="jass_preprocessing.map_reference"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_reference</span></code></a></td> +<td>Module of function</td> +</tr> +<tr class="row-even"><td><a class="reference internal" href="#module-jass_preprocessing.compute_score" title="jass_preprocessing.compute_score"><code class="xref py py-obj docutils literal notranslate"><span class="pre">compute_score</span></code></a></td> +<td></td> +</tr> +<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.save_output" title="jass_preprocessing.save_output"><code class="xref py py-obj docutils literal notranslate"><span class="pre">save_output</span></code></a></td> +<td></td> +</tr> +</tbody> +</table> </div> </div> diff --git a/doc/_build/html/_autosummary/modules.html b/doc/_build/html/_autosummary/modules.html index 111c31b19914afce422e31fa4690f80ba18d26f7..c7a9c3c1c7c2665e93f7a2fef06ba737f7259e25 100644 --- a/doc/_build/html/_autosummary/modules.html +++ b/doc/_build/html/_autosummary/modules.html @@ -74,7 +74,9 @@ <li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.map_gwas">jass_preprocessing.map_gwas module</a></li> <li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.map_reference">jass_preprocessing.map_reference module</a></li> <li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.save_output">jass_preprocessing.save_output module</a></li> -<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing">Module contents</a></li> +<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing">Module contents</a><ul class="simple"> +</ul> +</li> </ul> </li> </ul> diff --git a/doc/source/index.rst b/doc/source/index.rst index e424cf390f160df460033869f157f95fa2c19b27..ac94d8f10cb7c99ea17b97c2ab2f8d7027b7264e 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -24,6 +24,8 @@ The QC and preprocessing step goes as follow: * Select GWAS SNPs that are in the input reference panel * Align coded allele of the GWAS with the reference panel * Infer Number of sample by SNPs if not present in input data +* Filter SNPs with a small sample size +* Normalize the effect size by sample size to have Z-scores * Save the output by chromosome as the following example: +----------+-------+------+-----+--------+ diff --git a/jass_preprocessing/__main__.py b/jass_preprocessing/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..fe698cc57a2f26eb4f6542f3912dada4df7cf1e8 --- /dev/null +++ b/jass_preprocessing/__main__.py @@ -0,0 +1,83 @@ +""" +Read raw GWAS summary statistics, filter and format +Write clean GWAS datasets by chromosome +""" +__updated__ = '2018-26-06' + +import pandas as pd +import jass_preprocessing as jp +import time +import argparse + + +#| variable name | description | current default value| +#|---------------|-------------|----------------------| +#| netPath | Main project folder, must end by "/" | /mnt/atlas/ | +#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' | +#| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'| +#| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ | +#| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ | +#| REF_filename* | file containing the reference panel for imputation | netPath+'PCMA/0._REF/1KGENOME/summary_genome_Filter_part2.out' | +#| pathOUT | **unused in main_preprocessing.py** | netPath+'PCMA/1._DATA/RAW.summary/'| +#| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' | + + + +def launch_preprocessing(args): + """ + Preprocessing GWAS dataset + """ + gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0) + + tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'], + gwas_map.loc[GWAS_filename, 'outcome']) + + print('processing GWAS: {}'.format(tag)) + start = time.time() + gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path) + GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2] + mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels) + print(mapgw) + + gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw) + + ref = pd.read_csv(REF_filename, header=None, sep= "\t", + names =['chr', "pos", "snp_id", "ref", "alt", "MAF"], + index_col="snp_id") + + mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref) + mgwas = jp.map_reference.compute_snp_alignement(mgwas) + mgwas = jp.compute_score.compute_z_score(mgwas) + mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag) + end = time.time() + + print("Preprocessing of {0} in {1}s".format(tag, end-start)) + + jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag) + jp.save_output.save_output(mgwas, ldscore_format, tag) + + +def add_preprocessing_argument(): + + parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs") + + parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files") + parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)") + parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'") + + parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)") + parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns") + + parser.set_defaults(func=launch_preprocessing) + + +def main(): + + parser = argparse.ArgumentParser()#prog='impute_jass') + parser = add_preprocessing_argument(parser) + args = parser.parse_args() + args.func(args) + + +if __name__=="__main__": + main() diff --git a/setup.py b/setup.py index 748c456d9197b6b7cdbb5ac70b49c8967b3c5efa..aac8ae5c7bea9c9a9daf22e7b2bafb10adca3c77 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,14 @@ setup(name='jass_preprocessing', license='MIT', #package_dir = {'': 'jass_preprocessing'}, packages= ['jass_preprocessing'], - zip_safe=False) + zip_safe=False, + install_requires=[ + 'scipy', 'numpy', 'pandas', 'seaborn' + ], + entry_points={ + 'console_scripts' : [ + 'jass_preprocessing = jass_preprocessing.__main__:main' + ] + }) #, "jass_preprocessing.map_gwas","jass_preprocessing.dna_utils", "jass_preprocessing.map_reference","jass_preprocessing.compute_score", "jass_preprocessing.save_output"