Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
jass_preprocessing
Commits
b8b082b3
Commit
b8b082b3
authored
Nov 07, 2019
by
Hanna JULIENNE
Browse files
add zipped GWAS support
parent
845dfd26
Pipeline
#17788
passed with stages
in 47 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
jass_preprocessing/map_gwas.py
View file @
b8b082b3
...
...
@@ -10,6 +10,9 @@ import os
import
sys
import
pandas
as
pd
import
numpy
as
np
import
gzip
import
re
def
walkfs
(
startdir
,
findfile
):
"""
...
...
@@ -96,15 +99,23 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
reference_label
=
column_dict
.
columns
.
tolist
()
# labels in the GWAS files
target_lab
=
pd
.
Index
(
my_labels
.
values
.
tolist
())
f
=
open
(
gwas_internal_link
)
is_gzipped
=
re
.
search
(
r
".gz$"
,
gwas_internal_link
)
if
is_gzipped
:
f
=
gzip
.
open
(
gwas_internal_link
)
line
=
f
.
readline
()
line
=
line
.
decode
(
'utf-8'
)
else
:
f
=
open
(
gwas_internal_link
)
line
=
f
.
readline
()
count_line
=
0
line
=
f
.
readline
()
header
=
pd
.
Index
(
line
.
split
())
def
get_position
(
I
,
x
):
try
:
return
I
.
get_loc
(
x
)
except
KeyError
:
return
np
.
nan
label_position
=
[
get_position
(
header
,
i
)
for
i
in
target_lab
]
mapgw
=
pd
.
Series
(
label_position
,
index
=
reference_label
)
...
...
@@ -128,9 +139,18 @@ def read_gwas( gwas_internal_link, column_map):
"""
print
(
"Reading file:"
)
print
(
gwas_internal_link
)
is_gzipped
=
re
.
search
(
r
".gz$"
,
gwas_internal_link
)
if
is_gzipped
:
compression
=
'gzip'
else
:
compression
=
None
print
(
column_map
.
values
)
print
(
column_map
.
index
)
fullGWAS
=
pd
.
read_csv
(
gwas_internal_link
,
delim_whitespace
=
True
,
usecols
=
column_map
.
values
,
#column_dict['label_position'].keys(),
usecols
=
column_map
.
values
,
compression
=
compression
,
#column_dict['label_position'].keys(),
names
=
column_map
.
index
,
index_col
=
0
,
header
=
0
,
na_values
=
[
''
,
'#N/A'
,
'#N/A'
,
'N/A'
,
'#NA'
,
'-1.#IND'
,
'-1.#QNAN'
,
...
...
jass_preprocessing/save_output.py
View file @
b8b082b3
...
...
@@ -15,11 +15,11 @@ def save_output_by_chromosome(mgwas, ImpG_output_Folder, my_study):
mgwas_chr
=
pd
.
DataFrame
({
'rsID'
:
mgwas_copy
.
loc
[
chrom
].
snp_id
,
'pos'
:
mgwas_copy
.
loc
[
chrom
].
pos
,
'A
1
'
:
mgwas_copy
.
loc
[
chrom
].
ref
,
'A
2
'
:
mgwas_copy
.
loc
[
chrom
].
alt
,
'A
0
'
:
mgwas_copy
.
loc
[
chrom
].
ref
,
'A
1
'
:
mgwas_copy
.
loc
[
chrom
].
alt
,
'Z'
:
mgwas_copy
.
loc
[
chrom
].
computed_z
,
'P'
:
mgwas_copy
.
loc
[
chrom
].
pval
},
columns
=
[
'rsID'
,
'pos'
,
'A
1
'
,
"A
2
"
,
"Z"
,
"P"
])
},
columns
=
[
'rsID'
,
'pos'
,
'A
0
'
,
"A
1
"
,
"Z"
,
"P"
])
impg_output_file
=
ImpG_output_Folder
+
'z_'
+
my_study
+
'_chr'
+
str
(
chrom
)
+
".txt"
print
(
"WRITING CHR {} results for {} to: {}"
.
format
(
chrom
,
my_study
,
ImpG_output_Folder
))
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment