Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
jass_preprocessing
Commits
4018e364
Commit
4018e364
authored
Sep 27, 2019
by
Hanna JULIENNE
Browse files
updated requirements
parent
28fe2b23
Pipeline
#15333
failed with stage
in 38 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
4018e364
...
...
@@ -14,7 +14,7 @@ pages:
-
yum install -y make
-
pip3 install sphinx
-
pip3 install sphinxcontrib-bibtex sphinx_rtd_theme sphinx-argparse
-
pip3 install -r
jass_preprocessing/
requirements.txt
-
pip3 install -r requirements.txt
-
cd doc
-
sphinx-apidoc -f -o ./source/_autosummary/ ../jass_preprocessing/
-
make html
...
...
jass_preprocessing/__main__.py
View file @
4018e364
...
...
@@ -22,24 +22,20 @@ import argparse
#| ImpG_output_Folder | main ouput folder | netPath+ 'PCMA/1._DATA/preprocessing_test/' |
def
launch_preprocessing
(
args
):
"""
Preprocessing GWAS dataset
"""
gwas_map
=
pd
.
read_csv
(
args
.
gwas_info
,
sep
=
"
\t
"
,
index_col
=
0
)
print
(
gwas_map
.
head
()
)
gwas_map
=
pd
.
read_csv
(
args
.
gwas_info
,
sep
=
"
\t
"
)
gwas_map
.
set_index
(
"filename"
,
inplace
=
True
)
for
gwas_filename
in
gwas_map
.
index
:
print
(
gwas_filename
)
print
(
gwas_map
.
columns
)
tag
=
"{0}_{1}"
.
format
(
gwas_map
.
loc
[
gwas_filename
,
'consortia'
],
gwas_map
.
loc
[
gwas_filename
,
'outcome'
])
tag
=
"{0}_{1}"
.
format
(
gwas_map
.
loc
[
gwas_filename
,
'Consortium'
],
gwas_map
.
loc
[
gwas_filename
,
'Outcome'
])
print
(
'processing GWAS: {}'
.
format
(
tag
))
start
=
time
.
time
()
GWAS_link
=
jp
.
map_gwas
.
walkfs
(
args
.
input_folder
,
gwas_filename
)[
2
]
mapgw
=
jp
.
map_gwas
.
map_columns_position
(
GWAS_link
,
args
.
gwas_info
)
gw_df
=
jp
.
map_gwas
.
read_gwas
(
GWAS_link
,
mapgw
)
...
...
jass_preprocessing/map_gwas.py
View file @
4018e364
...
...
@@ -84,23 +84,22 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
Return:
pandas Series with column position and column names as index
"""
column_dict
=
pd
.
read_csv
(
GWAS_labels
,
sep
=
'
\t
'
,
na_values
=
'na'
,
index_col
=
0
)
gwas_file
=
gwas_internal_link
.
split
(
'/'
)[
-
1
]
column_dict
=
pd
.
read_csv
(
GWAS_labels
,
sep
=
'
\t
'
,
na_values
=
'na'
)
column_dict
.
set_index
(
"filename"
,
inplace
=
True
)
gwas_file
=
gwas_internal_link
.
split
(
'/'
)[
-
1
]
my_labels
=
column_dict
.
loc
[
gwas_file
]
#Our standart labels:
reference_label
=
column_dict
.
columns
.
tolist
()
# labels in the GWAS files
target_lab
=
pd
.
Index
(
my_labels
.
values
.
tolist
())
f
=
open
(
gwas_internal_link
)
count_line
=
0
line
=
f
.
readline
()
print
(
line
)
header
=
pd
.
Index
(
line
.
split
())
def
get_position
(
I
,
x
):
try
:
return
I
.
get_loc
(
x
)
...
...
@@ -108,7 +107,6 @@ def map_columns_position(gwas_internal_link, GWAS_labels):
return
np
.
nan
label_position
=
[
get_position
(
header
,
i
)
for
i
in
target_lab
]
mapgw
=
pd
.
Series
(
label_position
,
index
=
reference_label
)
mapgw
=
mapgw
.
loc
[
~
mapgw
.
isna
()].
astype
(
int
)
mapgw
.
sort_values
(
inplace
=
True
)
...
...
@@ -137,8 +135,7 @@ def read_gwas( gwas_internal_link, column_map):
index_col
=
0
,
header
=
0
,
na_values
=
[
''
,
'#N/A'
,
'#N/A'
,
'N/A'
,
'#NA'
,
'-1.#IND'
,
'-1.#QNAN'
,
'-NaN'
,
'-nan'
,
'1.#IND'
,
'1.#QNAN'
,
'N/A'
,
'-NaN'
,
'-nan'
,
'1.#IND'
,
'1.#QNAN'
,
'N/A'
,
'NA'
,
'NULL'
,
'NaN'
,
'nan'
,
'na'
,
'.'
])
...
...
jass_preprocessing/requirements.txt
deleted
100644 → 0
View file @
28fe2b23
scipy
pandas
numpy
seaborn
matplotlib
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment