Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
jass_preprocessing
Commits
0ca598c3
Commit
0ca598c3
authored
Sep 16, 2019
by
hjulienne
Browse files
change main
parent
5fc49314
Pipeline
#15317
passed with stages
in 55 seconds
Changes
2
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
jass_preprocessing/__main__.py
View file @
0ca598c3
...
...
@@ -28,18 +28,21 @@ def launch_preprocessing(args):
Preprocessing GWAS dataset
"""
gwas_map
=
pd
.
read_csv
(
args
.
gwas_info
,
sep
=
"
\t
"
,
index_col
=
0
)
print
(
gwas_map
.
head
())
for
gwas_filename
in
gwas_map
[[
'filename'
]]:
for
gwas_filename
in
gwas_map
.
index
:
print
(
gwas_filename
)
print
(
gwas_map
.
columns
)
tag
=
"{0}_{1}"
.
format
(
gwas_map
.
loc
[
gwas_filename
,
'consortia'
],
gwas_map
.
loc
[
gwas_filename
,
'outcome'
])
print
(
'processing GWAS: {}'
.
format
(
tag
))
start
=
time
.
time
()
GWAS_link
=
jp
.
map_gwas
.
walkfs
(
args
.
gwas_folder
,
gwas_filename
)[
2
]
GWAS_link
=
jp
.
map_gwas
.
walkfs
(
args
.
input_folder
,
gwas_filename
)[
2
]
mapgw
=
jp
.
map_gwas
.
map_columns_position
(
GWAS_link
,
args
.
gwas_info
)
gw_df
=
jp
.
map_gwas
.
read_gwas
(
GWAS_link
,
mapgw
)
ref
=
pd
.
read_csv
(
args
.
ref_path
,
header
=
None
,
sep
=
"
\t
"
,
names
=
[
'chr'
,
"pos"
,
"snp_id"
,
"ref"
,
"alt"
,
"MAF"
],
index_col
=
"snp_id"
)
...
...
jass_preprocessing/map_gwas.py
View file @
0ca598c3
...
...
@@ -112,7 +112,7 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
mapgw
=
pd
.
Series
(
label_position
,
index
=
reference_label
)
mapgw
=
mapgw
.
loc
[
~
mapgw
.
isna
()].
astype
(
int
)
mapgw
.
sort_values
(
inplace
=
True
)
print
(
mapgw
)
f
.
close
()
return
mapgw
...
...
@@ -128,6 +128,8 @@ def read_gwas( gwas_internal_link, column_map):
Return:
a pandas dataframe with missing value all equal to np.nan
"""
print
(
"Reading file:"
)
print
(
gwas_internal_link
)
fullGWAS
=
pd
.
read_csv
(
gwas_internal_link
,
delim_whitespace
=
True
,
usecols
=
column_map
.
values
,
#column_dict['label_position'].keys(),
...
...
@@ -140,6 +142,7 @@ def read_gwas( gwas_internal_link, column_map):
'NA'
,
'NULL'
,
'NaN'
,
'nan'
,
'na'
,
'.'
])
fullGWAS
=
fullGWAS
[
~
fullGWAS
.
index
.
duplicated
(
keep
=
'first'
)]
#fullGWAS = convert_missing_values(fullGWAS)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment