Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Statistical-Genetics
RAISS
Commits
fa8b3139
Commit
fa8b3139
authored
Mar 02, 2018
by
Hanna JULIENNE
Browse files
Test compute LD matrix + write Impg model function
parent
2193ea22
Changes
3
Hide whitespace changes
Inline
Side-by-side
impute_jass/impute_jass/__init__.py
View file @
fa8b3139
import
impute_jass.ld_matrix
as
LD
impute_jass/impute_jass/ld_matrix.py
View file @
fa8b3139
# coding: utf-8
"""
Function set to compute LD correlation from a reference panel
in predefined Region
...
...
@@ -8,10 +9,9 @@
import
scipy
as
sc
import
pandas
as
pd
import
subprocess
as
sub
sub
.
check_output
(
"pwd"
)
LD_region
=
pd
.
read_csv
(
'./impute_for_jass/Imputation_for_jass/impute_jass/data/Region_LD.csv'
)
import
pkg_resources
import
numpy
as
np
import
re
def
launch_plink_ld
(
startpos
,
endpos
,
chr
,
reffile
,
folder
):
...
...
@@ -19,36 +19,50 @@ def launch_plink_ld(startpos, endpos, chr, reffile, folder):
launch plink ld
"""
fo
=
"{0}/{1}_{2}_{3}"
.
format
(
folder
,
chr
,
startpos
,
endpos
)
fo
=
"{0}/
chr
{1}_{2}_{3}"
.
format
(
folder
,
chr
,
startpos
,
endpos
)
cmd
=
"p-link --noweb --bfile {0} --r --ld-window-r2 0 --from-bp {1} --to-bp {2} --chr {3} --out {4}"
.
format
(
reffile
,
startpos
,
endpos
,
chr
,
fo
)
#print(cmd)
sub
.
check_output
(
cmd
,
shell
=
True
)
def
generate_sparse_matrix
(
plink_ld
,
path_ld_mat
):
def
generate_sparse_matrix
(
plink_ld
):
"""
read plink results create a sparse dataframe LD-matrix
then save it to a zipped pickle
"""
plink_ld
=
pd
.
read_csv
(
plink_ld
,
sep
=
"\s+"
)
mat_ld
=
plink_ld
.
pivot
(
index
=
'SNP_A'
,
columns
=
'SNP_B'
,
values
=
'R'
).
to_sparse
(
fill_value
=
0
)
mat_ld
.
to_pickle
(
path_ld_mat
)
mat_ld
=
plink_ld
.
pivot
(
index
=
'SNP_A'
,
columns
=
'SNP_B'
,
values
=
'R'
)
un_index
=
mat_ld
.
index
.
union
(
mat_ld
.
columns
)
mat_ld
=
mat_ld
.
reindex
(
index
=
un_index
,
columns
=
un_index
)
mat_ld
.
fillna
(
0
,
inplace
=
True
)
sym
=
mat_ld
.
values
+
mat_ld
.
values
.
transpose
()
np
.
fill_diagonal
(
sym
,
1.01
)
mat_ld
=
pd
.
DataFrame
(
sym
,
index
=
mat_ld
.
index
,
columns
=
mat_ld
.
columns
)
# mat_ld = pd.DataFrame(np.maximum(mat_ld.values, mat_ld.values.transpose()), index=un_index, columns=un_index)
mat_ld
=
mat_ld
.
to_sparse
()
return
mat_ld
#mat_ld.to_pickle(path_ld_mat,, compression='gzip')
def
generate_genome_matrices
(
region_files
,
reffolder
,
folder_output
):
"""
go through region files and compute LD matrix for each transform and
save the results in a pandas sparse dataframe
"""
regions
=
pd
.
read_csv
(
region_files
)
for
reg
in
region
_file
s
.
iterrows
():
for
reg
in
regions
.
iterrows
():
print
(
reg
[
0
])
# input reference panel file
fi_ref
=
"{0}/{1}.eur.1pct"
.
format
(
reffolder
,
reg
[
1
][
'chr'
])
chr_int
=
re
.
search
(
'([0-9]{1,2})'
,
str
(
reg
[
1
][
'chr'
])).
group
()
# Compute the LD correlation with LD
launch_plink_ld
(
reg
[
1
][
'start'
],
reg
[
1
][
'stop'
],
reg
[
1
][
'chr'
]
,
fi_ref
,
folder_output
)
launch_plink_ld
(
reg
[
1
][
'start'
],
reg
[
1
][
'stop'
],
chr_int
,
fi_ref
,
folder_output
)
fi_plink
=
"{0}/{1}_{2}_{3}.ld"
.
format
(
folder_output
,
reg
[
1
][
'chr'
],
reg
[
1
][
'startpos'
],
reg
[
1
][
"endpos"
])
fo_mat
=
"{0}/{1}_{2}_{3}.mat"
.
format
(
folder_output
,
reg
[
1
][
'chr'
],
reg
[
1
][
'startpos'
],
reg
[
1
][
"endpos"
])
#
fi_plink = "{0}/{1}_{2}_{3}.ld".format(folder_output, reg[1]['chr'], reg[1]['startpos'], reg[1]["endpos"])
#
fo_mat = "{0}/{1}_{2}_{3}.mat".format(folder_output, reg[1]['chr'], reg[1]['startpos'], reg[1]["endpos"])
#transform plink output to a compressed generate_sparse_matrix
generate_sparse_matrix
(
fi_plink
,
fo_mat
)
#
generate_sparse_matrix(fi_plink, fo_mat)
impute_jass/setup.py
View file @
fa8b3139
from
setuptools
import
setup
,
find_packages
setup
(
name
=
'
jass_preprocessing
'
,
setup
(
name
=
'
impute_jass
'
,
version
=
'0.1'
,
description
=
'Preprocess GWAS summary statistic for JASS'
,
url
=
'http:https://gitlab.pasteur.fr/statistical-genetics/JASS_Pre-processing'
,
...
...
@@ -9,5 +9,5 @@ setup(name='jass_preprocessing',
license
=
'MIT'
,
#package_dir = {'': 'jass_preprocessing'},
packages
=
[
'impute_jass'
],
package_data
=
{
'impute_jass'
:
'data/*.csv'
},
package_data
=
{
'impute_jass'
:
'
./
data/*.csv'
},
zip_safe
=
False
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment