Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
hgcovid_imputation
Commits
691042d3
Commit
691042d3
authored
Mar 22, 2022
by
Hanna JULIENNE
Browse files
fixed position error due to liftover
parent
e2a0b1ae
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/models/imputation_test_real_data.py
View file @
691042d3
...
...
@@ -123,6 +123,7 @@ def compute_snp_alignement(mgwas):
def
sorted_alleles
(
x
):
return
""
.
join
(
sorted
(
x
))
if
__name__
==
'__main__'
:
signif_signal
=
pd
.
read_csv
(
"/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/data/external/result_df6_B2_compare.tsv"
,
sep
=
"
\t
"
)
...
...
@@ -136,7 +137,7 @@ if __name__ == '__main__':
Zscores_col
=
[
zscore
for
zscore
in
eur_filled_out
.
columns
if
re
.
search
(
"_Z$"
,
zscore
)]
loci_id
=
14
for
loci_id
in
Loci_dict
.
index
:
try
:
print
(
"PROCESSIN LOCI "
)
...
...
@@ -145,19 +146,31 @@ if __name__ == '__main__':
ld_file
=
Loci_dict
.
loc
[
loci_id
,
'LD_matrix'
]
ref_panel
=
pd
.
read_csv
(
"/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/data/raw/ref_panel/ref_panel_chr{}.bim"
.
format
(
int
(
Loci_dict
.
loc
[
loci_id
,
'CHR'
])),
sep
=
"
\t
"
,
names
=
[
'chr'
,
"nothing"
,
'pos'
,
'Ref_all'
,
'alt_all'
],
index_col
=
1
)
LD_matrix
=
raiss
.
ld_matrix
.
load_sparse_matrix
(
"/pasteur/zeus/projets/p02/GGS_WKD/PROJECT_imputation_covidhg/hgcovid_imputation/data/raw/LD_matrices/nfe/{}"
.
format
(
ld_file
),
ref_panel
)
ref_panel
.
reset_index
(
inplace
=
True
)
ref_panel
=
ref_panel
.
loc
[
~
(
ref_panel
.
Ref_all
+
ref_panel
.
alt_all
).
isin
([
"AT"
,
"TA"
,
'CG'
,
'GC'
])]
ref_panel
[
"positional_index"
]
=
ref_panel
.
chr
.
apply
(
str
)
+
ref_panel
.
pos
.
apply
(
str
)
+
(
ref_panel
.
Ref_all
+
ref_panel
.
alt_all
).
apply
(
sorted_alleles
)
ref_panel
.
set_index
(
"positional_index"
,
inplace
=
True
)
ref_panel
.
shape
eur_filled_out
.
shape
eur_filled_out
.
loc
[(
eur_filled_out
[
"loc"
]
==
14
).
values
].
shape
eur_filled_out
.
loc
[(
eur_filled_out
[
"loc"
]
==
14
).
values
].
index
.
difference
(
ref_panel
.
index
)
ref_panel
.
pos
[
ref_panel
.
pos
>
61455328
]
mgwas
=
pd
.
merge
(
ref_panel
,
eur_filled_out
,
left_index
=
True
,
right_index
=
True
)
mgwas
=
compute_snp_alignement
(
mgwas
)
mgwas
.
shape
col_to_flip
=
[
zscore
for
zscore
in
mgwas
.
columns
if
re
.
search
(
"_Z$|_beta$"
,
zscore
)]
mgwas
.
loc
[
mgwas
.
sign_flip
==-
1
,
col_to_flip
]
=
-
mgwas
.
loc
[
mgwas
.
sign_flip
==-
1
,
col_to_flip
]
loci
=
mgwas
.
loc
[(
mgwas
[
'loc'
]
==
loci_id
)]
loci
.
set_index
(
"index"
,
inplace
=
True
)
loci
=
loci
.
loc
[
loci
.
index
.
intersection
(
LD_matrix
.
index
)]
loci
.
shape
to_mask_globally
=
np
.
random
.
choice
(
loci
.
index
,
int
(
loci
.
shape
[
0
]
/
10
))
known
=
loci
.
index
.
difference
(
to_mask_globally
)
...
...
@@ -165,14 +178,16 @@ if __name__ == '__main__':
print
(
'SNP masked : {0}, SNP known : {1}, SNP IMPUTED : {2}'
.
format
(
len
(
to_mask_globally
),
len
(
known
),
len
(
unknown
)))
print
(
"PROCESS GLOBAL MASKING"
)
for
study
in
Zscores_col
:
#
print(study)
print
(
study
)
Zscore
=
loci
[[
'#CHR'
,
"POS"
,
"Ref_all"
,
"Ref_all"
,
study
]]
Zscore
.
columns
=
[
'rsID'
,
"pos"
,
"A0"
,
"A1"
,
"Z"
]
print
(
Zscore
.
head
())
Z_masked
=
loci
[
study
].
copy
(
deep
=
True
)
Z_masked
.
loc
[
to_mask_globally
]
=
np
.
nan
print
(
Z_masked
.
head
())
imp
=
raiss
.
stat_models
.
raiss_model
(
Zscore
.
loc
[
known
,
"Z"
],
LD_matrix
.
loc
[
known
,
known
],
LD_matrix
.
loc
[
unknown
,
known
],
rcond
=
0.000001
)
Z_imputed
=
format_result_df
(
imp
,
unknown
,
Z_masked
.
loc
[
known
],
known
)
...
...
@@ -187,7 +202,7 @@ if __name__ == '__main__':
loci
=
loci
.
loc
[
loci
.
index
.
intersection
(
LD_matrix
.
index
)]
print
(
"PROCESS RANDOM MASKING"
)
for
study
in
Zscores_col
:
#
print(study)
print
(
study
)
to_mask_in_study
=
np
.
random
.
choice
(
loci
.
index
,
int
(
loci
.
shape
[
0
]
/
10
))
Zscore
=
loci
[[
'#CHR'
,
"POS"
,
"Ref_all"
,
"Ref_all"
,
study
]]
Zscore
.
columns
=
[
'rsID'
,
"pos"
,
"A0"
,
"A1"
,
"Z"
]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment