Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
61926e2f
Commit
61926e2f
authored
Aug 22, 2018
by
Hanna JULIENNE
Browse files
enriched doc string
parent
e47676db
Changes
4
Hide whitespace changes
Inline
Side-by-side
impute_jass/doc/source/index.rst
View file @
61926e2f
...
...
@@ -13,6 +13,12 @@ Welcome to Peppa-PIG's documentation!
What is Peppa-PIG ?
===================
Dependancies
============
peppa-pig require plink version
Installation
============
...
...
impute_jass/impute_jass/imputation_launcher.py
View file @
61926e2f
...
...
@@ -7,14 +7,12 @@ import pandas as pd
from
.windows
import
ld_region_centered_window_imputation
,
prepare_zscore_for_imputation
,
impg_like_imputation
,
realigned_zfiles_on_panel
class
ImputationLauncher
(
object
):
"""
Class perform imputation of snp from summary statistic
Class to perform imputation of snp from summary statistic
"""
def
__init__
(
self
,
window_size
=
10000
,
buf
=
2500
,
lamb
=
0.01
,
pinv_rcond
=
0.01
):
lamb
=
0.01
,
pinv_rcond
=
0.01
):
"""
Initialise the imputation object. Fix the windows size, the buffer size
and the king of imputation employed
...
...
@@ -22,12 +20,11 @@ class ImputationLauncher(object):
Args:
window_size (int): size of the imputation window in bp
buffer (int): the size of the padding around the windows of
imputation (relevant only for batch imputation)
imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation
matrices to make it less singular
matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument.
The scipy.linalg.pinv is used to invert
the correlation matrices
The scipy.linalg.pinv is used to invert the correlation matrices
"""
self
.
window_size
=
window_size
...
...
@@ -42,12 +39,13 @@ class ImputationLauncher(object):
parameters
Args:
chrom : str specifying chromosome
zscore : known zscore
ref_panel : location of the folder of reference chromosome
ld_folder: location of linkage desiquilibrium matrices
Returns
Imputed zscore dataframe
chrom (str): chromosome "chr*"
zscore (pandas dataframe): known zscore
ref_panel (str): path of the folder of reference panel
ld_folder (str): path of the folder containing linkage desiquilibrium matrices
Returns:
pandas dataframe: Imputed zscore dataframe
"""
pattern
=
"{0}/{1}_*.ld"
.
format
(
ld_folder
,
chrom
)
zscore
=
prepare_zscore_for_imputation
(
ref_panel
,
zscore
)
...
...
@@ -70,8 +68,15 @@ class ImputationLauncher(object):
def
genome_imputation
(
self
,
gwas_tag
,
ref_panel_folder
,
ld_folder
,
zscore_folder
,
folder_output
):
"""
Launch imputation on all chromosome for one trait
Launch imputation on all chromosome for one trait by calling
chromosome_imputation for each chromosome
Args:
gwas_tag (str): a short string to annotate imputed GWAS files
ref_panel_folder (str): path of the folder of reference panel
ld_folder (str): path of the folder containing linkage desiquilibrium matrices
zscore_folder (str): path of the folder for input GWAS files
folder_output (str): path of the folder for imputed GWAS files
"""
for
i
in
range
(
1
,
23
):
...
...
impute_jass/impute_jass/ld_matrix.py
View file @
61926e2f
...
...
@@ -3,7 +3,7 @@
Function set to compute LD correlation from a reference panel
in predefined Region
LD matrix are then
s
tored to the
scipy
sparse
matrix
format
LD matrix are then t
ransf
or
m
ed to the
pandas
sparse format
"""
import
scipy
as
sc
...
...
@@ -16,7 +16,15 @@ import re
def
launch_plink_ld
(
startpos
,
endpos
,
chr
,
reffile
,
folder
):
"""
launch plink ld
launch plink linkage desiquilibrium correlation and save
the ouput
Args:
startpos (int): position of the start of the window
endpos (int): position of the end of the window
chr (str): chromosome position
reffile (str): reference panel file
folder (str): output folder
"""
bimref
=
reffile
+
".bim"
ref_panel
=
pd
.
read_csv
(
bimref
,
sep
=
"
\t
"
,
names
=
[
'chr'
,
"nothing"
,
'pos'
,
'Ref_all'
,
'alt_all'
],
index_col
=
1
)
...
...
@@ -26,16 +34,23 @@ def launch_plink_ld(startpos, endpos, chr, reffile, folder):
fo
=
"{0}/chr{1}_{2}_{3}"
.
format
(
folder
,
chr
,
startpos
,
endpos
)
cmd
=
"p-link --noweb --bfile {0} --r --ld-snp-list ./snp_list.txt --ld-window 50 --ld-window-kb 3000 --ld-window-r2 0.4 --chr {1} --out {2}"
.
format
(
reffile
,
chr
,
fo
)
cmd
=
"plink --bfile {0} --r --ld-snp-list ./snp_list.txt --ld-window 50 --ld-window-kb 3000 --ld-window-r2 0.4 --chr {1} --out {2}"
.
format
(
reffile
,
chr
,
fo
)
sub
.
check_output
(
cmd
,
shell
=
True
)
def
generate_sparse_matrix
(
plink_ld
,
ref_chr_df
):
"""
Extract correlation matrix from the plink correlation
file generated by ld_matrix.launch_plink_ld
read plink results create a sparse dataframe LD-matrix
then save it to a zipped pickle
Args:
plink_ld (str): path to the plink correlation matrix file
ref_chr_df (str):
Returns:
pandas.SparseDataFrame : Linkage desiquilibrium matrix
"""
plink_ld
=
pd
.
read_csv
(
plink_ld
,
sep
=
"\s+"
)
...
...
@@ -51,12 +66,16 @@ def generate_sparse_matrix(plink_ld, ref_chr_df):
mat_ld
=
mat_ld
.
loc
[
re_index
,
re_index
]
mat_ld
=
mat_ld
.
to_sparse
()
return
mat_ld
#mat_ld.to_pickle(path_ld_mat,, compression='gzip')
def
generate_genome_matrices
(
region_files
,
reffolder
,
folder_output
):
"""
go through region files and compute LD matrix for each transform and
save the results in a pandas sparse dataframe
Args:
region_files (str) : region file containing beginning and end position
reffolder (str) : folder of reference panel
folder_output (str): folder to save plink LD correlation result files
"""
regions
=
pd
.
read_csv
(
region_files
)
for
reg
in
regions
.
iterrows
():
...
...
impute_jass/impute_jass/stat_models.py
View file @
61926e2f
...
...
@@ -21,6 +21,7 @@ import scipy.linalg
def
compute_mu
(
sig_i_t
,
sig_t_inv
,
zt
):
"""
Compute the estimation of z-score from neighborring snp
Args:
sig_i_t (matrix?) : correlation matrix with line corresponding to
unknown Snp (snp to impute) and column to known SNPs
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment