Commit d0e5bc5c authored by hjulienne's avatar hjulienne
parents a65d314e f3be1a6a
......@@ -14,10 +14,10 @@ pages:
- yum install -y make
- pip3 install sphinx
- pip3 install sphinxcontrib-bibtex sphinx_rtd_theme sphinx-argparse
- pip3 install -r impute_jass/requirements.txt
- cd impute_jass/doc
- pip3 install -r requirements.txt
- cd doc
- make html
- mv build/html/ ../../public
- mv build/html/ ../public
artifacts:
paths:
- public
......
......@@ -30,7 +30,7 @@ Installation
.. code-block:: shell
pip3 install git+https://gitlab.pasteur.fr/statistical-genetics/imputation_for_jass
pip3 install git+https://gitlab.pasteur.fr/statistical-genetics/raiss.git
Precomputation of LD-correlation
=================================
......@@ -99,8 +99,9 @@ Command Line Usage
==================
.. argparse::
:ref: impute_jass.__main__.add_chromosome_imputation_argument
:ref: raiss.__main__.add_chromosome_imputation_argument
:prog: raiss
Indices and tables
==================
......
......@@ -7,8 +7,10 @@
ld_matrix
stat_models
windows
filter_format_output
"""
import impute_jass.ld_matrix as LD
import impute_jass.stat_models as model
import impute_jass.windows
from impute_jass.imputation_launcher import ImputationLauncher
import raiss.ld_matrix as LD
import raiss.stat_models as model
import raiss.windows
import raiss.filter_format_output
from raiss.imputation_launcher import ImputationLauncher
import argparse
import pandas as pd
from impute_jass.imputation_launcher import ImputationLauncher
from raiss.filter_format_output import filter_output
from raiss.imputation_launcher import ImputationLauncher
def launch_chromosome_imputation(args):
......@@ -15,7 +16,7 @@ def launch_chromosome_imputation(args):
print("Imputation of {0} gwas for chromosome {1}".format(args.gwas, args.chrom))
# Imputer settings
imputer = ImputationLauncher( window_size=int(args.window_size), buf=int(args.buffer_size),
lamb= float(args.l2_regularization), pinv_rcond = float(args.eigen_treshold))
lamb= float(args.l2_regularization), pinv_rcond = float(args.eigen_threshold))
# Reading of inputs
z_file = "{0}/z_{1}_{2}.txt".format(args.zscore_folder, args.gwas, args.chrom)
......@@ -27,13 +28,14 @@ def launch_chromosome_imputation(args):
imputed_zscore = imputer.chromosome_imputation(args.chrom, zscore, ref_panel, args.ld_folder)
print("Imputation DONE")
# Saving results
#Formatting and filtering
# and Saving results
z_fo = "{0}/z_{1}_{2}.txt".format(args.output_folder, args.gwas, args.chrom)
imputed_zscore.to_csv(z_fo, sep='\t')
filter_output(imputed_zscore, z_fo, args.R2_threshold)
print("Save imputation done at {0}".format(z_fo))
def add_chromosome_imputation_argument():
parser = argparse.ArgumentParser()
parser.add_argument('--chrom', required=True, help= "chromosome to impute to the chr\d+ format")
parser.add_argument('--gwas', required=True, help= "GWAS to impute to the consortia_trait format")
......@@ -46,14 +48,17 @@ def add_chromosome_imputation_argument():
parser.add_argument('--window-size', help= "Size of the non overlapping window", default = 500000)
parser.add_argument('--buffer-size', help= "Size of the buffer around the imputation window", default = 125000)
parser.add_argument('--l2-regularization', help= "Size of the buffer around the imputation window", default = 0.1)
parser.add_argument('--eigen-treshold', help= "treshold under which eigen vectors are removed for the computation of the pseudo inverse", default = 0.1)
parser.add_argument('--eigen-threshold', help= "threshold under which eigen vectors are removed for the computation of the pseudo inverse", default = 0.1)
parser.add_argument('--R2-threshold', help= "R square (imputation quality) threshold bellow which SNPs are filtered from the output", default = 0.8)
parser.set_defaults(func=launch_chromosome_imputation)
return(parser)
def main():
#prog='impute_jass')
parser = add_chromosome_imputation_argument(parser)
parser = add_chromosome_imputation_argument()
args = parser.parse_args()
args.func(args)
......
"""
Module to filter SNPs on imputation quality
and format output for JASS
"""
def filter_output(zscores, fout, R2_threshold = 0.8):
"""
procedure that format output for JASS
Args:
zscores (pandas dataframe): imputed zscore
(as outputed from imputation_launcher.chromosome_imputation)
fout (filename): filename where to save the formatted and filtered output
R2_threshold (float): R square threshold bellow which SNPs are filtered from the output
"""
zscores.reset_index(inplace = True)
chr_fo = zscores[['index', 'pos', 'A0', 'A1', 'Z', 'Var', "ld_score"]]
chr_fo.columns = ['snp_ids', 'position', 'Ref_allele', 'Alt_allele','z_score', 'Var', "ld_score"]
chr_fo.loc[chr_fo.Var < (1-R2_threshold)].to_csv(fout, sep="\t", index=False)
......@@ -18,7 +18,7 @@ class ImputationLauncher(object):
lamb= 0.01, pinv_rcond = 0.01):
"""
Initialise the imputation object. Fix the windows size, the buffer size
and the king of imputation employed
and the kind of imputation employed
Args:
window_size (int): size of the imputation window in bp
......@@ -68,7 +68,6 @@ class ImputationLauncher(object):
zscore_results = realigned_zfiles_on_panel(ref_panel, zscore_results)
return zscore_results
def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output):
"""
Launch imputation on all chromosome for one trait by calling
......
......@@ -47,7 +47,7 @@ def prepare_zscore_for_imputation(ref_panel, zscore):
- filtering snps that are not present in the ref panel
- Adding columns that will contain information on imputation:
* Var : theoritical variance estimate of z
* Nsnp_to_impute : Number of known snp
* Nsnp_to_impute : Number of known snp used to perform imputation
* ld_score : the sum of the square correlation of the snp with all other
known snp (give an idea if the we have enough information to compute a
precise z estimate)
......@@ -119,10 +119,12 @@ def empty_imputed_dataframe():
"correct_inversion", "Nsnp_to_impute"]
zscore_results = pd.DataFrame(columns = column_order)
return zscore_results
def impg_like_imputation(ld_file, ref_panel, zscore, window_size, buffer, lamb,
rcond, unknowns=pd.Series([])):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Each missing Snp is imputed by known snps found in a window
Argument.
Args:
ld_file (str): Linkage desiquilibrium matrix files
......
from setuptools import setup, find_packages
setup(name='impute_jass',
setup(name='raiss',
version='0.1',
description='Imputation of summary statistics',
url='http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing',
url='https://gitlab.pasteur.fr/statistical-genetics/raiss',
author='Hanna Julienne',
author_email='hanna.julienne@pasteur.fr',
install_requires = ['scipy', "pandas"],
license='MIT',
#package_dir = {'': 'jass_preprocessing'},
packages= ['impute_jass'],
packages= ['raiss'],
package_data = {'impute_jass':'./data/*.csv'},
package_data = {'raiss':'./data/*.csv'},
zip_safe=False,
entry_points={
'console_scripts' : [
'impute_jass = impute_jass.__main__:main'
'raiss = raiss.__main__:main'
]
}
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment