Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
jass_preprocessing
Commits
6f72006d
Commit
6f72006d
authored
Nov 19, 2019
by
Hanna JULIENNE
Browse files
corrected index system to treat correctly GWAS sharing the same summary statistics file
parent
f28feeee
Pipeline
#18645
passed with stages
in 1 minute
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
jass_preprocessing/__main__.py
View file @
6f72006d
...
...
@@ -32,7 +32,7 @@ def launch_preprocessing(args):
gwas_map
=
pd
.
read_csv
(
args
.
gwas_info
,
sep
=
"
\t
"
)
#define an unique
gwas_map
[
'tag'
]
=
gwas_map
.
Consortium
+
"_"
+
D
.
Outcome
gwas_map
[
'tag'
]
=
gwas_map
.
Consortium
+
"_"
+
gwas_map
.
Outcome
if
gwas_map
.
tag
.
duplicated
().
any
():
raise_duplicated_index
(
gwas_map
.
tag
)
...
...
@@ -41,7 +41,7 @@ def launch_preprocessing(args):
for
tag
in
gwas_map
.
index
:
gwas_filename
=
D
.
loc
[
tag
,
"filename"
]
gwas_filename
=
gwas_map
.
loc
[
tag
,
"filename"
]
print
(
'processing GWAS: {}'
.
format
(
tag
))
start
=
time
.
time
()
...
...
jass_preprocessing/map_gwas.py
View file @
6f72006d
...
...
@@ -76,7 +76,7 @@ def convert_missing_values(df):
return
df
.
replace
(
def_missing
,
nan_vec
)
def
map_columns_position
(
gwas_internal_link
,
my_labels
):
def
map_columns_position
(
gwas_internal_link
,
column_dict
):
"""
Find column position for each specific Gwas
...
...
@@ -87,16 +87,14 @@ def map_columns_position(gwas_internal_link, my_labels):
Return:
pandas Series with column position and column names as index
"""
print
(
gwas_internal_link
)
gwas_file
=
gwas_internal_link
.
split
(
'/'
)[
-
1
]
my_labels
=
column_dict
.
loc
[
gwas_file
]
#Our standart labels:
reference_label
=
column_dict
.
columns
.
tolist
()
reference_label
=
column_dict
.
index
.
tolist
()
print
(
reference_label
)
# labels in the GWAS files
target_lab
=
pd
.
Index
(
my_labels
.
values
.
tolist
())
target_lab
=
pd
.
Index
(
column_dict
.
values
.
tolist
())
is_gzipped
=
re
.
search
(
r
".gz$"
,
gwas_internal_link
)
if
is_gzipped
:
f
=
gzip
.
open
(
gwas_internal_link
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment