Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
RAISS
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Statistical-Genetics
RAISS
Commits
8b3491c6
Commit
8b3491c6
authored
7 years ago
by
Hanna JULIENNE
Browse files
Options
Downloads
Patches
Plain Diff
wrote centered window function
parent
36cb79b4
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
impute_jass/impute_jass/stat_models.py
+0
-1
0 additions, 1 deletion
impute_jass/impute_jass/stat_models.py
impute_jass/impute_jass/windows.py
+57
-3
57 additions, 3 deletions
impute_jass/impute_jass/windows.py
with
57 additions
and
4 deletions
impute_jass/impute_jass/stat_models.py
+
0
−
1
View file @
8b3491c6
"""
function for SNP imputation
"""
import
numpy
as
np
...
...
This diff is collapsed.
Click to expand it.
impute_jass/impute_jass/windows.py
+
57
−
3
View file @
8b3491c6
...
...
@@ -5,7 +5,10 @@ implement the imputation window is sliding along the genome:
- centered_window: A sliding window centered on the Snp to impute
"""
from
.stat_models
import
ImpG_model_batch
,
ImpG_model_snp
from
.ld_matrix
import
generate_sparse_matrix
import
pandas
as
pd
def
parse_region_position
(
LD_file
):
"""
...
...
@@ -17,9 +20,60 @@ def parse_region_position(LD_file):
(
chrom
,
startpos
,
endpos
)
=
LD_file
.
split
(
"
/
"
)[
-
1
].
split
(
"
.
"
)[
0
].
split
(
'
_
'
)
return
(
chrom
,
startpos
,
endpos
)
def
centered_window_imputation
(
LD_file
,
ref_panel_folder
,
Zfile
):
def
realigned_zfiles_on_panel
(
ref_panel
,
Zscores
):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Check if the counted allele is the same in the reference panel and
the Zscore files.
If not, the coded and other allele are inverted and the Zscores sign
is inverted also.
"""
allele_inverted
=
(
ref_panel
.
loc
[
Zscores
.
index
,
'
Ref_all
'
]
!=
Zscores
.
A0
)
Zscores
.
loc
[
allele_inverted
,
"
A0
"
]
=
ref_panel
.
alt_all
Zscores
.
loc
[
allele_inverted
,
"
A1
"
]
=
ref_panel
.
Ref_all
Zscores
.
loc
[
allele_inverted
,
"
Z
"
]
=
-
Zscores
.
loc
[
allele_inverted
,
"
Z
"
]
return
Zscores
def
centered_window_imputation
(
LD_file
,
ref_panel_folder
,
Zfile
,
window_size
):
"""
Each missing Snp is imputed by known snp found in a window centered on the SNP to impute
Argument
"""
pass
(
chrom
,
startpos
,
endpos
)
=
parse_region_position
(
LD_file
)
ref_panel_file
=
"
/mnt/atlas/PCMA/1._DATA/ImpG_refpanel/{0}.eur.1pct.bim
"
.
format
(
chrom
)
print
(
ref_panel_file
)
ref_panel
=
pd
.
read_csv
(
ref_panel_file
,
sep
=
"
\t
"
,
names
=
[
'
chr
'
,
"
nothing
"
,
'
pos
'
,
'
Ref_all
'
,
'
alt_all
'
],
index_col
=
1
)
LD_mat
=
generate_sparse_matrix
(
LD_file
,
ref_panel
)
Zscores
=
pd
.
read_csv
(
Zfile
,
index_col
=
0
,
sep
=
"
\t
"
)
Zscores
=
realigned_zfiles_on_panel
(
ref_panel
,
Zscores
)
Zscores
[
'
Var
'
]
=
1
# dispatch snp between typed and untyped
unknowns
=
LD_mat
.
index
.
difference
(
Df
.
index
)
print
(
"
### Imputation of {0} snps ###
"
.
format
(
len
(
unknowns
)))
for
snp_unknown
in
unknowns
:
# Boundary of the centered_window
start_ld_block
=
ref_panel
.
loc
[
snp_unknown
,
'
pos
'
]
-
window
end_ld_block
=
ref_panel
.
loc
[
snp_unknown
,
'
pos
'
]
+
window
known
=
Zscores
.
loc
[(
start_ld_block
<
Df
.
pos
)
&
(
Df
.
pos
<
end_ld_block
)].
index
Sig_t
=
LD_mat
.
loc
[
known
,
known
]
Sig_i_t
=
LD_mat
.
loc
[
snp_unknown
,
known
]
Zt
=
Zscores
.
loc
[
known
,
'
Z
'
]
imp
=
ImpG_model_snp
(
Zt
,
Sig_t
,
Sig_i_t
)
Zt
.
loc
[
snp_unknown
,
"
Z
"
]
=
imp
[
'
mu
'
]
Zt
.
loc
[
snp_unknown
,
"
Var
"
]
=
imp
[
'
Var
'
]
return
Zt
.
sort_values
(
by
=
"
pos
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment