Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
2193ea22
Commit
2193ea22
authored
Mar 01, 2018
by
Hanna JULIENNE
Browse files
Implement function to compute LD-matrices
parents
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
impute_jass/data/Region_LD.csv
0 → 100644
View file @
2193ea22
This diff is collapsed.
Click to expand it.
impute_jass/impute_jass/__init__.py
0 → 100644
View file @
2193ea22
impute_jass/impute_jass/ld_matrix.py
0 → 100644
View file @
2193ea22
"""
Function set to compute LD correlation from a reference panel
in predefined Region
LD matrix are then stored to the scipy sparse matrix format
"""
import
scipy
as
sc
import
pandas
as
pd
import
subprocess
as
sub
sub
.
check_output
(
"pwd"
)
LD_region
=
pd
.
read_csv
(
'./impute_for_jass/Imputation_for_jass/impute_jass/data/Region_LD.csv'
)
def
launch_plink_ld
(
startpos
,
endpos
,
chr
,
reffile
,
folder
):
"""
launch plink ld
"""
fo
=
"{0}/{1}_{2}_{3}"
.
format
(
folder
,
chr
,
startpos
,
endpos
)
cmd
=
"p-link --noweb --bfile {0} --r --ld-window-r2 0 --from-bp {1} --to-bp {2} --chr {3} --out {4}"
.
format
(
reffile
,
startpos
,
endpos
,
chr
,
fo
)
sub
.
check_output
(
cmd
,
shell
=
True
)
def
generate_sparse_matrix
(
plink_ld
,
path_ld_mat
):
"""
read plink results create a sparse dataframe LD-matrix
then save it to a zipped pickle
"""
plink_ld
=
pd
.
read_csv
(
plink_ld
,
sep
=
"\s+"
)
mat_ld
=
plink_ld
.
pivot
(
index
=
'SNP_A'
,
columns
=
'SNP_B'
,
values
=
'R'
).
to_sparse
(
fill_value
=
0
)
mat_ld
.
to_pickle
(
path_ld_mat
)
def
generate_genome_matrices
(
region_files
,
reffolder
,
folder_output
):
"""
"""
regions
=
pd
.
read_csv
(
region_files
)
for
reg
in
region_files
.
iterrows
():
print
(
reg
[
0
])
# input reference panel file
fi_ref
=
"{0}/{1}.eur.1pct"
.
format
(
reffolder
,
reg
[
1
][
'chr'
])
# Compute the LD correlation with LD
launch_plink_ld
(
reg
[
1
][
'start'
],
reg
[
1
][
'stop'
],
reg
[
1
][
'chr'
],
fi_ref
,
folder_output
)
fi_plink
=
"{0}/{1}_{2}_{3}.ld"
.
format
(
folder_output
,
reg
[
1
][
'chr'
],
reg
[
1
][
'startpos'
],
reg
[
1
][
"endpos"
])
fo_mat
=
"{0}/{1}_{2}_{3}.mat"
.
format
(
folder_output
,
reg
[
1
][
'chr'
],
reg
[
1
][
'startpos'
],
reg
[
1
][
"endpos"
])
#transform plink output to a compressed generate_sparse_matrix
generate_sparse_matrix
(
fi_plink
,
fo_mat
)
impute_jass/setup.py
0 → 100644
View file @
2193ea22
from
setuptools
import
setup
,
find_packages
setup
(
name
=
'jass_preprocessing'
,
version
=
'0.1'
,
description
=
'Preprocess GWAS summary statistic for JASS'
,
url
=
'http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing'
,
author
=
'Hanna Julienne'
,
author_email
=
'hanna.julienne@pasteur.fr'
,
license
=
'MIT'
,
#package_dir = {'': 'jass_preprocessing'},
packages
=
[
'impute_jass'
],
package_data
=
{
'impute_jass'
:
'data/*.csv'
},
zip_safe
=
False
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment