Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
jass_preprocessing
Commits
8f3a98d9
Commit
8f3a98d9
authored
Oct 01, 2021
by
Hanna JULIENNE
Browse files
FIX issue
#8
parent
3ecc09f7
Changes
2
Hide whitespace changes
Inline
Side-by-side
jass_preprocessing/map_gwas.py
View file @
8f3a98d9
...
...
@@ -153,6 +153,12 @@ def read_gwas( gwas_internal_link, column_map, imputation_treshold=None):
'NA'
,
'NULL'
,
'NaN'
,
'nan'
,
'na'
,
'.'
,
'-'
],
dtype
=
{
"snpid"
:
str
,
"a1"
:
str
,
"a2"
:
str
,
"freq"
:
float
,
"z"
:
float
,
"se"
:
float
,
"pval"
:
float
})
print
(
fullGWAS
.
head
())
#Ensure that allele are written in upper cases:
fullGWAS
.
a1
=
fullGWAS
.
a1
.
str
.
upper
()
fullGWAS
.
a2
=
fullGWAS
.
a2
.
str
.
upper
()
def
sorted_alleles
(
x
):
return
""
.
join
(
sorted
(
x
))
# either rs ID or full position must be available:
...
...
jass_preprocessing/map_reference.py
View file @
8f3a98d9
...
...
@@ -21,7 +21,7 @@ def read_reference(gwas_reference_panel, mask_MHC=False, minimum_MAF=None, regio
"""
ref
=
pd
.
read_csv
(
gwas_reference_panel
,
header
=
None
,
sep
=
"
\t
"
,
names
=
[
'chr'
,
"snp_id"
,
"MAF"
,
"pos"
,
"ref"
,
"alt"
],
dtype
=
{
"chr"
:
str
,
"snp_id"
:
str
,
"MAF"
:
np
.
float
,
"pos"
:
np
.
int
,
"ref"
:
str
,
"alt"
:
str
},
dtype
=
{
"chr"
:
str
,
"snp_id"
:
str
,
"MAF"
:
np
.
float
,
"pos"
:
np
.
int
,
"ref"
:
str
,
"alt"
:
str
},
index_col
=
"snp_id"
)
def
sorted_alleles
(
x
):
...
...
@@ -81,10 +81,12 @@ def map_on_ref_panel(gw_df , ref_panel, index_type="rsid"):
merge_GWAS
.
set_index
(
"snp_id"
,
inplace
=
True
)
else
:
raise
ValueError
(
"index_type can take only two values: 'rsid' or 'positional'"
)
if
((
merge_GWAS
.
pos
==
merge_GWAS
.
POS
).
mean
()
>
0.95
):
merge_GWAS
=
merge_GWAS
.
loc
[(
merge_GWAS
.
pos
==
merge_GWAS
.
POS
)]
else
:
raise
ValueError
(
"SNP positions in reference panel and in Summary statistic are different! Different assembly?"
)
if
((
"pos"
in
merge_GWAS
.
columns
)
and
(
"POS"
in
merge_GWAS
.
columns
))
if
(
merge_GWAS
.
pos
==
merge_GWAS
.
POS
).
mean
()
>
0.95
):
merge_GWAS
=
merge_GWAS
.
loc
[(
merge_GWAS
.
pos
==
merge_GWAS
.
POS
)]
else
:
raise
ValueError
(
"SNP positions in reference panel and in Summary statistic are different! Different assembly?"
)
print
(
"before filter"
)
print
(
merge_GWAS
.
shape
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment