Skip to content
Snippets Groups Projects
Commit 95d3c141 authored by hjulienne's avatar hjulienne
Browse files

command line tool

parent 5a81ea93
No related branches found
No related tags found
No related merge requests found
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
......@@ -309,6 +309,29 @@ Make sure that the same SNPs are in the reference panel and the gwas</p>
</div>
<div class="section" id="module-jass_preprocessing">
<span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-jass_preprocessing" title="Permalink to this headline"></a></h2>
<table border="1" class="longtable docutils">
<colgroup>
<col width="10%" />
<col width="90%" />
</colgroup>
<tbody valign="top">
<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.map_gwas" title="jass_preprocessing.map_gwas"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_gwas</span></code></a></td>
<td>Map GWAS</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#module-jass_preprocessing.dna_utils" title="jass_preprocessing.dna_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">dna_utils</span></code></a></td>
<td>Few fonction to to compute DNA complement</td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.map_reference" title="jass_preprocessing.map_reference"><code class="xref py py-obj docutils literal notranslate"><span class="pre">map_reference</span></code></a></td>
<td>Module of function</td>
</tr>
<tr class="row-even"><td><a class="reference internal" href="#module-jass_preprocessing.compute_score" title="jass_preprocessing.compute_score"><code class="xref py py-obj docutils literal notranslate"><span class="pre">compute_score</span></code></a></td>
<td></td>
</tr>
<tr class="row-odd"><td><a class="reference internal" href="#module-jass_preprocessing.save_output" title="jass_preprocessing.save_output"><code class="xref py py-obj docutils literal notranslate"><span class="pre">save_output</span></code></a></td>
<td></td>
</tr>
</tbody>
</table>
</div>
</div>
......
......@@ -74,7 +74,9 @@
<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.map_gwas">jass_preprocessing.map_gwas module</a></li>
<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.map_reference">jass_preprocessing.map_reference module</a></li>
<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing.save_output">jass_preprocessing.save_output module</a></li>
<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing">Module contents</a></li>
<li class="toctree-l2"><a class="reference internal" href="jass_preprocessing.html#module-jass_preprocessing">Module contents</a><ul class="simple">
</ul>
</li>
</ul>
</li>
</ul>
......
......@@ -24,6 +24,8 @@ The QC and preprocessing step goes as follow:
* Select GWAS SNPs that are in the input reference panel
* Align coded allele of the GWAS with the reference panel
* Infer Number of sample by SNPs if not present in input data
* Filter SNPs with a small sample size
* Normalize the effect size by sample size to have Z-scores
* Save the output by chromosome as the following example:
+----------+-------+------+-----+--------+
......
"""
Read raw GWAS summary statistics, filter and format
Write clean GWAS datasets by chromosome
"""
__updated__ = '2018-26-06'
import pandas as pd
import jass_preprocessing as jp
import time
import argparse
#| variable name | description | current default value|
#|---------------|-------------|----------------------|
#| netPath | Main project folder, must end by "/" | /mnt/atlas/ |
#| GWAS_labels* | Path to the file describing the format of the individual GWASs files | netPath+'PCMA/1._DATA/RAW.GWAS/GWAS_labels.csv' |
#| GWAS_path* | Path to the folder containing the GWASs summ stat files, must end by "/" | netPath+'PCMA/1._DATA/RAW.GWAS/'|
#| diagnostic_folder | folder for histograms of sample size distribution among SNPs | /mnt/atlas/PCMA/1._DATA/sample_size_distribution/ |
#| ldscore_format | data formated to use LDscore, 1 file per study | /mnt/atlas/PCMA/1._DATA/ldscore_data/ |
#| REF_filename* | file containing the reference panel for imputation | netPath+'PCMA/0._REF/1KGENOME/summary_genome_Filter_part2.out' |
#| pathOUT | **unused in main_preprocessing.py** | netPath+'PCMA/1._DATA/RAW.summary/'|
#| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |
def launch_preprocessing(args):
"""
Preprocessing GWAS dataset
"""
gwas_map = pd.read_csv(GWAS_labels, sep="\t", index_col=0)
tag = "{0}_{1}".format(gwas_map.loc[GWAS_filename, 'consortia'],
gwas_map.loc[GWAS_filename, 'outcome'])
print('processing GWAS: {}'.format(tag))
start = time.time()
gwas = jp.map_gwas.gwas_internal_link(GWAS_table, GWAS_path)
GWAS_link = jp.map_gwas.walkfs(GWAS_path, GWAS_filename)[2]
mapgw = jp.map_gwas.map_columns_position(GWAS_link, GWAS_labels)
print(mapgw)
gw_df = jp.map_gwas.read_gwas(GWAS_link, mapgw)
ref = pd.read_csv(REF_filename, header=None, sep= "\t",
names =['chr', "pos", "snp_id", "ref", "alt", "MAF"],
index_col="snp_id")
mgwas = jp.map_reference.map_on_ref_panel(gw_df, ref)
mgwas = jp.map_reference.compute_snp_alignement(mgwas)
mgwas = jp.compute_score.compute_z_score(mgwas)
mgwas = jp.compute_score.compute_sample_size(mgwas, diagnostic_folder, tag)
end = time.time()
print("Preprocessing of {0} in {1}s".format(tag, end-start))
jp.save_output.save_output_by_chromosome(mgwas, ImpG_output_Folder, tag)
jp.save_output.save_output(mgwas, ldscore_format, tag)
def add_preprocessing_argument():
parser.add_argument('--percent-sample-size', required=True, help= "the proportion of the 90th percentile of the sample size used to filter the SNPs")
parser.add_argument('--gwas-info', required=True, help= "Path to the file describing the format of the individual GWASs files")
parser.add_argument('--ref-folder', required=True, help= "reference panel location (used to determine which snp to impute)")
parser.add_argument('--gwas-folder', required=True, help= " Path to the folder containing the GWASs summ stat files, must end by '/'")
parser.add_argument('--output-folder', required=True, help= "Location of main ouput folder for preprocessed GWAS files (splitted by chromosome)")
parser.add_argument('--output-folder-1-file', required=False, help= "optional location to store the preprocessing in one tabular file with one chromosome columns")
parser.set_defaults(func=launch_preprocessing)
def main():
parser = argparse.ArgumentParser()#prog='impute_jass')
parser = add_preprocessing_argument(parser)
args = parser.parse_args()
args.func(args)
if __name__=="__main__":
main()
......@@ -9,6 +9,14 @@ setup(name='jass_preprocessing',
license='MIT',
#package_dir = {'': 'jass_preprocessing'},
packages= ['jass_preprocessing'],
zip_safe=False)
zip_safe=False,
install_requires=[
'scipy', 'numpy', 'pandas', 'seaborn'
],
entry_points={
'console_scripts' : [
'jass_preprocessing = jass_preprocessing.__main__:main'
]
})
#, "jass_preprocessing.map_gwas","jass_preprocessing.dna_utils", "jass_preprocessing.map_reference","jass_preprocessing.compute_score", "jass_preprocessing.save_output"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment