Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Statistical-Genetics
RAISS
Commits
da779b03
Commit
da779b03
authored
Mar 23, 2018
by
Hanna JULIENNE
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added comments
parent
f07a787a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
10 deletions
+31
-10
impute_jass/impute_jass/imputation_launcher.py
impute_jass/impute_jass/imputation_launcher.py
+18
-2
impute_jass/impute_jass/ld_matrix.py
impute_jass/impute_jass/ld_matrix.py
+0
-2
impute_jass/impute_jass/stat_models.py
impute_jass/impute_jass/stat_models.py
+3
-5
impute_jass/setup.py
impute_jass/setup.py
+10
-1
No files found.
impute_jass/impute_jass/imputation_launcher.py
View file @
da779b03
...
...
@@ -3,11 +3,27 @@ Function set to launch imputation on a complete chromosome or
on the genome
"""
import
glob
import
pandas
as
pd
from
.windows
import
ld_region_centered_window_imputation
,
impg_like_imputation
,
realigned_zfiles_on_panel
class
ImputationLauncher
(
object
):
"""
Class perform imputation of snp from summary statistic
"""
def
__init__
(
self
,
window_size
=
10000
,
imputation_style
=
"online"
,
buf
=
2500
,
lamb
=
0.01
,
pinv_rcond
=
0.01
):
"""
Args:
window_size (int): size of the imputation window in bp
imputation_style (str): define if the windows while span the genome in a non overlapping fashion ("batch") or
by being centered on each snp to impute ('online')
buffer (int): the size of the padding around the windows of imputation (relevant only for batch imputation)
lamb (float): size of the increment added to snp correlation matrices to make it less singular
pinv_rcond (float): the rcond scipy.linalg.pinv function argument. The scipy.linalg.pinv is used to invert
the correlationmatrices
"""
self
.
imputation_style
=
imputation_style
self
.
window_size
=
window_size
self
.
buffer
=
buf
...
...
@@ -54,11 +70,11 @@ class ImputationLauncher(object):
ref_panel
=
pd
.
read_csv
(
ref_panel_file
,
sep
=
"
\t
"
,
names
=
[
'chr'
,
"nothing"
,
'pos'
,
'Ref_all'
,
'alt_all'
],
index_col
=
1
)
known_zscore_file
=
zscore_folder
+
"/z_"
+
tag
+
"chr"
+
str
(
i
)
+
".txt"
known_zscore_file
=
zscore_folder
+
"/z_"
+
gwas_
tag
+
"
_
chr"
+
str
(
i
)
+
".txt"
known_zscore
=
pd
.
read_csv
(
known_zscore_file
,
index_col
=
0
,
sep
=
"
\t
"
)
chrom
=
"chr"
+
str
(
i
)
z_imp
=
self
.
chromosome_imputation
(
chrom
,
known_zscore
,
ref_panel
,
ld_folder
)
imputed_zscore
=
folder_output
+
"/z_"
+
tag
+
"chr"
+
str
(
i
)
+
".txt"
imputed_zscore
=
folder_output
+
"/z_"
+
gwas_
tag
+
"
_
chr"
+
str
(
i
)
+
".txt"
z_imp
.
to_csv
(
imputed_zscore
,
sep
=
"
\t
"
)
impute_jass/impute_jass/ld_matrix.py
View file @
da779b03
...
...
@@ -44,9 +44,7 @@ def generate_sparse_matrix(plink_ld, ref_chr_df):
mat_ld
=
mat_ld
.
reindex
(
index
=
un_index
,
columns
=
un_index
)
mat_ld
.
fillna
(
0
,
inplace
=
True
)
sym
=
np
.
maximum
(
mat_ld
.
values
,
mat_ld
.
values
.
transpose
())
np
.
fill_diagonal
(
sym
,
1.01
)
mat_ld
=
pd
.
DataFrame
(
sym
,
index
=
mat_ld
.
index
,
columns
=
mat_ld
.
columns
)
re_index
=
ref_chr_df
.
loc
[
mat_ld
.
index
].
sort_values
(
by
=
"pos"
).
index
...
...
impute_jass/impute_jass/stat_models.py
View file @
da779b03
...
...
@@ -26,10 +26,9 @@ def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
Argument:
zt : (vector) the vector of known Z scores
"""
snps
=
sig_t
.
columns
sig_t
=
sig_t
.
values
np
.
fill_diagonal
(
sig_t
,
(
1
+
lamb
))
sig_t_inv
=
sc
.
linalg
.
pinv
(
sig_t
,
rcond
=
rcond
)
sig_t_inv
=
sc
.
linalg
.
pinv
(
sig_t
)
#
, rcond=rcond)
if
batch
:
condition_number
=
np
.
array
([
np
.
linalg
.
cond
(
sig_t
)]
*
sig_i_t
.
shape
[
0
])
...
...
@@ -38,10 +37,9 @@ def impg_model(zt, sig_t, sig_i_t, lamb=0.01, rcond=0.01, batch=True):
condition_number
=
np
.
linalg
.
cond
(
sig_t
)
correct_inversion
=
check_inversion
(
sig_t
,
sig_t_inv
)
var
,
ld_score
=
compute_var
(
sig_i_t
,
sig_t_inv
,
lamb
,
batch
)
mu
=
compute_mu
(
sig_i_t
,
sig_t_inv
,
zt
)
if
np
.
any
(
mu
>
5
0
):
if
np
.
any
(
mu
>
3
0
):
print
(
"ABERANT SNP SNiP "
)
#mu = mu / (((1+lamb)-var)**0.5)
return
({
"var"
:
var
,
"mu"
:
mu
,
"ld_score"
:
ld_score
,
"condition_number"
:
condition_number
,
"correct_inversion"
:
correct_inversion
})
return
({
"var"
:
var
,
"mu"
:
mu
,
"ld_score"
:
ld_score
,
"condition_number"
:
condition_number
,
"correct_inversion"
:
correct_inversion
})
impute_jass/setup.py
View file @
da779b03
...
...
@@ -10,4 +10,13 @@ setup(name='impute_jass',
#package_dir = {'': 'jass_preprocessing'},
packages
=
[
'impute_jass'
],
package_data
=
{
'impute_jass'
:
'./data/*.csv'
},
zip_safe
=
False
)
zip_safe
=
False
entry_points
=
{
'console_scripts'
:
[
'impute_chromosome = '
]
}
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment