Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bis-aria
ariaec
Commits
402cdec9
Commit
402cdec9
authored
Feb 03, 2016
by
fabrice
Browse files
pep8tized v3
parent
dc8fc3f1
Changes
13
Hide whitespace changes
Inline
Side-by-side
ariaec/__init__.py
View file @
402cdec9
"""
Aria EC module
"""
ariaec/__init__.pyc
View file @
402cdec9
No preview for this file type
ariaec/base.py
View file @
402cdec9
...
...
@@ -102,9 +102,9 @@ def format_str(string):
:param string:
:return:
"""
if
re
.
search
(
r
"^\s*true\s*$"
,
string
,
re
.
I
):
if
re
.
search
(
r
"^\s*
(
true
|yes)
\s*$"
,
string
,
re
.
I
):
return
True
elif
re
.
search
(
r
"^\s*false\s*$"
,
string
,
re
.
I
):
elif
re
.
search
(
r
"^\s*
(
false
|no)
\s*$"
,
string
,
re
.
I
):
return
False
elif
re
.
search
(
r
"^\s*\d+\s*$"
,
string
):
return
int
(
string
)
...
...
@@ -114,7 +114,7 @@ def format_str(string):
return
string
.
split
(
','
)
elif
"+"
in
string
:
return
string
.
split
(
'+'
)
elif
re
.
search
(
r
"[/\
\
w]+"
,
string
):
elif
re
.
search
(
r
"[/\w]+"
,
string
):
return
string
else
:
if
string
:
...
...
@@ -133,12 +133,23 @@ def format_str(string):
def
format_dict
(
indict
):
"""
:param indict:
:return:
"""
for
key
in
indict
:
indict
[
key
]
=
format_str
(
indict
[
key
])
return
indict
def
ppdict
(
indict
,
indent
=
2
):
"""
:param indict:
:param indent:
:return:
"""
return
json
.
dumps
(
indict
,
indent
=
indent
)
...
...
@@ -197,6 +208,12 @@ class CustomLogging:
default_file
=
"conf/logging.json"
def
__init__
(
self
,
level
=
logging
.
INFO
,
desc
=
None
):
"""
:param level:
:param desc:
:return:
"""
# TODO: detect path log filenames and makedirs if not exists
logging
.
basicConfig
(
level
=
level
)
if
desc
:
...
...
@@ -206,6 +223,11 @@ class CustomLogging:
self
.
config
=
self
.
default_config
()
def
update_msg
(
self
,
desc
):
"""
:param desc:
:return:
"""
if
type
(
self
.
msg
)
==
list
:
self
.
msg
+=
desc
self
.
msg
=
" - "
.
join
(
self
.
msg
)
...
...
@@ -213,6 +235,10 @@ class CustomLogging:
self
.
msg
=
" - "
.
join
((
self
.
msg
,
desc
.
capitalize
()))
def
default_config
(
self
):
"""
:return:
"""
# with open(self.default_file, 'rt') as f:
with
pkgr
.
resource_stream
(
__name__
,
self
.
default_file
)
as
f
:
config
=
json
.
load
(
f
)
...
...
@@ -237,6 +263,10 @@ class CustomLogging:
logging
.
config
.
dictConfig
(
self
.
config
)
def
welcome
(
self
):
"""
:return:
"""
desc
=
'''
================================================================================
...
...
@@ -249,6 +279,10 @@ class CustomLogging:
class
Capturing
(
list
):
def
__enter__
(
self
):
"""
:return:
"""
# Stock default stdout and redirect current stdout to this class
self
.
_stdout
=
sys
.
stdout
# All print calls are saved into self._stringio
...
...
@@ -256,6 +290,11 @@ class Capturing(list):
return
self
def
__exit__
(
self
,
*
args
):
"""
:param args:
:return:
"""
self
.
extend
(
"
\n
"
.
join
(
self
.
_stringio
.
getvalue
().
splitlines
()))
sys
.
stdout
=
self
.
_stdout
...
...
ariaec/base.pyc
View file @
402cdec9
No preview for this file type
ariaec/econtacts.py
View file @
402cdec9
...
...
@@ -3,13 +3,13 @@
"""
from
__future__
import
absolute_import
,
division
,
print_function
import
math
import
re
import
numpy
as
np
import
pandas
as
pd
#
import
re
#
import
math
#
import numpy as np
#
import pandas as pd
from
collections
import
namedtuple
from
.base
import
cart_dist
#
from .base import cart_dist
Contact
=
namedtuple
(
"Contact"
,
[
"resid1"
,
"resid2"
])
...
...
@@ -18,154 +18,154 @@ Atom = namedtuple("Atom", ["name", "coords"])
# BBcontacts scripts #
# ---------------------------------------------------------------------------- #
def
diversityvalue
(
msa
,
l
):
"""
Compute diversityvalue for bbcontacts.py : sqrt(n)/L where n is the
number of sequence in MSA and L the protein length
:param msa: filepath (fasta format)
:param l: protein length
:return: diversityvalue (float)
"""
#
Compute n value
msa_reg
=
re
.
compile
(
r
"^>[A-Za-z0-9]+_[A-Za-z0-9]+"
)
n
=
0
with
open
(
msa
)
as
f
:
for
index
,
line
in
enumerate
(
f
):
match
=
msa_reg
.
match
(
line
)
if
match
and
"deselect"
not
in
line
:
n
+=
1
return
math
.
sqrt
(
n
)
/
float
(
l
)
def
indexup_bbcontact
(
bbcontact_dict
,
startind
,
out
,
prefix
):
"""
Index update if startind of coupling matrix is not eq 0
:param out:
:param prefix:
:param bbcontact_dict:
:param startind:
:return:
"""
up
=
int
(
startind
)
-
1
col
=
(
'identifier'
,
'diversity'
,
'direction'
,
'viterbiscore'
,
'indexpred'
,
'state'
,
'res1'
,
'res2'
)
with
open
(
'%s/%s.filteredoutput_up.txt'
%
(
out
,
prefix
),
'w'
)
as
f
:
f
.
write
(
"#{:>14}
\t
{:>10}
\t
{:>15}
\t
{:>15}
\t
{:>10}
\t
{:>10}
\t
{:>5}
\t
{"
":>5}"
.
format
(
*
col
))
for
contact
in
bbcontact_dict
:
resid1
=
int
(
bbcontact_dict
[
contact
][
'res1_nb'
])
resid2
=
int
(
bbcontact_dict
[
contact
][
'res2_nb'
])
bbcontact_dict
[
contact
][
'res1'
]
=
resid1
+
up
bbcontact_dict
[
contact
][
'res1_nb'
]
=
resid1
+
up
bbcontact_dict
[
contact
][
'res2'
]
=
resid2
+
up
bbcontact_dict
[
contact
][
'res2_nb'
]
=
resid2
+
up
line
=
[
bbcontact_dict
[
contact
][
x
]
for
x
in
col
]
f
.
write
(
"
\n
{:>15}
\t
{:>10}
\t
{:>15}
\t
{:>15}
\t
{:>10}
\t
{:>10}
\t
{"
":>5}
\t
{:>5}"
.
format
(
*
line
))
def
contact_insertion
(
contacts_dict
,
insert_list
,
insertvalue
,
seq
):
"""
Insert
:param contacts_dict:
:param insert_list:
:param insertvalue:
:param seq:
:return:
"""
maxind
=
max
(
contacts_dict
.
keys
())
for
insert
in
insert_list
:
if
insert
not
in
contacts_dict
:
contacts_dict
[
insert
]
=
{
'resname'
:
seq
[
insert
-
1
]}
tmp
=
{}
#
Init all contacts related to insert
for
contact
in
range
(
insert
+
1
,
maxind
+
1
):
tmp
[
contact
]
=
float
(
insertvalue
)
contacts_dict
[
insert
].
update
(
tmp
)
#
def diversityvalue(msa, l):
#
"""
#
Compute diversityvalue for bbcontacts.py : sqrt(n)/L where n is the
#
number of sequence in MSA and L the protein length
#
:param msa: filepath (fasta format)
#
:param l: protein length
#
:return: diversityvalue (float)
#
"""
#
#
Compute n value
#
msa_reg = re.compile(r"^>[A-Za-z0-9]+_[A-Za-z0-9]+")
#
n = 0
#
#
with open(msa) as f:
#
for index, line in enumerate(f):
#
match = msa_reg.match(line)
#
if match and "deselect" not in line:
#
n += 1
#
#
return math.sqrt(n) / float(l)
#
def indexup_bbcontact(bbcontact_dict, startind, out, prefix):
#
"""
#
Index update if startind of coupling matrix is not eq 0
#
:param out:
#
:param prefix:
#
:param bbcontact_dict:
#
:param startind:
#
:return:
#
"""
#
up = int(startind) - 1
#
col = ('identifier', 'diversity', 'direction', 'viterbiscore',
#
'indexpred', 'state', 'res1', 'res2')
#
with open('%s/%s.filteredoutput_up.txt' % (out, prefix), 'w') as f:
#
f.write("#{:>14}\t{:>10}\t{:>15}\t{:>15}\t{:>10}\t{:>10}\t{:>5}\t{"
#
":>5}".format(*col))
#
#
for contact in bbcontact_dict:
#
resid1 = int(bbcontact_dict[contact]['res1_nb'])
#
resid2 = int(bbcontact_dict[contact]['res2_nb'])
#
bbcontact_dict[contact]['res1'] = resid1 + up
#
bbcontact_dict[contact]['res1_nb'] = resid1 + up
#
bbcontact_dict[contact]['res2'] = resid2 + up
#
bbcontact_dict[contact]['res2_nb'] = resid2 + up
#
line = [bbcontact_dict[contact][x] for x in col]
#
f.write("\n{:>15}\t{:>10}\t{:>15}\t{:>15}\t{:>10}\t{:>10}\t{"
#
":>5}\t{:>5}".format(*line))
#
def contact_insertion(contacts_dict, insert_list, insertvalue, seq):
#
"""
#
Insert
#
:param contacts_dict:
#
:param insert_list:
#
:param insertvalue:
#
:param seq:
#
:return:
#
"""
#
maxind = max(contacts_dict.keys())
#
for insert in insert_list:
#
if insert not in contacts_dict:
#
contacts_dict[insert] = {'resname': seq[insert - 1]}
#
tmp = {}
#
Init all contacts related to insert
#
for contact in range(insert + 1, maxind + 1):
#
tmp[contact] = float(insertvalue)
#
contacts_dict[insert].update(tmp)
# Contacts scripts #
# ---------------------------------------------------------------------------- #
def
pdb_to_contact
(
pdbpath
,
threshold
):
"""
List of contacts inside given pdb file
:param threshold:
:author: bardiaux
:param pdbpath:
:return:
"""
mapy
=
[]
descmap
=
{}
with
open
(
pdbpath
,
'r'
)
as
pdb
:
coords
=
{}
while
1
:
l
=
pdb
.
readline
()
if
not
l
:
break
elif
l
.
startswith
(
'ATOM'
):
#
Foreach atom
fields
=
l
atom
=
fields
[
12
:
16
].
strip
()
resid
=
int
(
fields
[
23
:
26
].
strip
())
if
atom
[
0
]
!=
"H"
:
coords
.
setdefault
(
resid
,
[])
coord
=
np
.
array
([
float
(
a
)
for
a
in
[
fields
[
30
:
38
],
fields
[
38
:
46
],
fields
[
46
:
54
]]])
coords
[
resid
].
append
(
Atom
(
name
=
atom
,
coords
=
coord
))
k
=
coords
.
keys
()
k
.
sort
()
# sorted resid list
for
ii
in
range
(
0
,
len
(
k
)):
#
Foreach res i
i
=
k
[
ii
]
at1
=
coords
[
i
]
# (x, y, z) coords list (N, CA, C, ...)
for
jj
in
range
(
ii
,
len
(
k
)):
#
foreach res j (i+1, i+2, ...)
j
=
k
[
jj
]
found
=
False
at2
=
coords
[
j
]
for
a
in
at1
:
#
foreach atm of res i
for
b
in
at2
:
#
foreach atm of res j
if
not
found
and
cart_dist
(
a
.
coords
,
b
.
coords
)
<
\
float
(
threshold
):
#
If no contact found between 2 res and cart dist < t
found
=
True
if
(
i
,
j
)
not
in
mapy
:
#
If contact not already found
mapy
.
append
((
i
,
j
))
descmap
[
Contact
(
resid1
=
i
,
resid2
=
j
)]
=
{
"dist"
:
"%.2f"
%
cart_dist
(
a
.
coords
,
b
.
coords
),
"atoms"
:
(
a
.
name
,
b
.
name
)}
return
mapy
,
descmap
def
contact2dataframe
(
contacts
,
refseq
):
dim
=
len
(
refseq
)
matrix
=
np
.
zeros
(
shape
=
(
dim
,
dim
))
matrix
=
pd
.
DataFrame
(
matrix
)
#
print matrix
#
print contacts
for
contact
in
contacts
:
#
Contactmatrix humanidx starts at 0 and input contacts at 1
matrix
.
iloc
[
int
(
contact
[
0
])
-
1
,
int
(
contact
[
1
])
-
1
]
=
1
matrix
.
iloc
[
int
(
contact
[
1
])
-
1
,
int
(
contact
[
0
])
-
1
]
=
1
return
matrix
#
def pdb_to_contact(pdbpath, threshold):
#
"""
#
List of contacts inside given pdb file
#
:param threshold:
#
:author: bardiaux
#
:param pdbpath:
#
:return:
#
"""
#
mapy = []
#
descmap = {}
#
#
with open(pdbpath, 'r') as pdb:
#
coords = {}
#
#
while 1:
#
#
l = pdb.readline()
#
#
if not l:
#
break
#
#
elif l.startswith('ATOM'):
#
Foreach atom
#
#
fields = l
#
atom = fields[12:16].strip()
#
resid = int(fields[23:26].strip())
#
#
if atom[0] != "H":
#
#
coords.setdefault(resid, [])
#
coord = np.array([float(a) for a in [fields[30:38],
#
fields[38:46],
#
fields[46:54]]])
#
coords[resid].append(Atom(name=atom, coords=coord))
#
#
k = coords.keys()
#
k.sort() # sorted resid list
#
#
for ii in range(0, len(k)):
#
Foreach res i
#
i = k[ii]
#
at1 = coords[i] # (x, y, z) coords list (N, CA, C, ...)
#
for jj in range(ii, len(k)):
#
foreach res j (i+1, i+2, ...)
#
j = k[jj]
#
found = False
#
at2 = coords[j]
#
#
for a in at1:
#
foreach atm of res i
#
for b in at2:
#
foreach atm of res j
#
if not found and cart_dist(a.coords, b.coords) < \
#
float(
#
threshold):
#
If no contact found between 2 res and cart dist < t
#
found = True
#
if (i, j) not in mapy:
#
If contact not already found
#
mapy.append((i, j))
#
descmap[Contact(resid1=i, resid2=j)] = {"dist": "%.2f" % cart_dist(a.coords, b.coords),
#
"atoms": (a.name, b.name)}
#
#
return mapy, descmap
#
def contact2dataframe(contacts, refseq):
#
dim = len(refseq)
#
matrix = np.zeros(shape=(dim, dim))
#
matrix = pd.DataFrame(matrix)
#
print matrix
#
print contacts
#
for contact in contacts:
#
Contactmatrix humanidx starts at 0 and input contacts at 1
#
matrix.iloc[int(contact[0]) - 1, int(contact[1]) - 1] = 1
#
matrix.iloc[int(contact[1]) - 1, int(contact[0]) - 1] = 1
#
#
return matrix
ariaec/econverter.pyc
View file @
402cdec9
No preview for this file type
ariaec/protein.py
View file @
402cdec9
...
...
@@ -13,8 +13,6 @@ import aria.legacy.SequenceList as SequenceList
import
aria.legacy.AminoAcid
as
AmnAcd
from
.base
import
(
reg_load
,
Capturing
,
ppdict
)
# import skbio.Protein as skprot
# TODO: interface skbio ??
...
...
@@ -38,6 +36,11 @@ class SsList:
ss_dist_reg
=
re
.
compile
(
r
"\s+(\d+\.\d+) \( (\d+\.\d+)\)"
)
def
__init__
(
self
,
sett
):
"""
:param sett:
:return:
"""
self
.
settings
=
sett
self
.
ss_matrix
=
[]
self
.
ssdict
=
{}
...
...
@@ -46,6 +49,10 @@ class SsList:
@
property
def
index
(
self
):
"""
:return:
"""
if
self
.
ss_matrix
:
# Assuming human idx (1 ...) correspond to ss_matrix first column !!!
return
[
int
(
_
)
for
_
in
zip
(
*
self
.
ss_matrix
)[
0
]]
...
...
@@ -53,10 +60,21 @@ class SsList:
return
[]
def
check_filetype
(
self
,
filename
):
"""
:param filename:
:return:
"""
self
.
filetype
=
os
.
path
.
splitext
(
filename
)[
1
][
1
:]
# TODO: check if given file is supported
def
read
(
self
,
filename
,
sequence
=
''
):
"""
:param filename:
:param sequence:
:return:
"""
self
.
check_filetype
(
filename
)
logger
.
info
(
"Reading secondary structure file %s [%s]"
%
(
filename
,
...
...
@@ -73,6 +91,11 @@ class SsList:
self
.
seq_sublist
(
sequence
)
def
read_psipred
(
self
,
filename
):
"""
:param filename:
:return:
"""
self
.
ssdict
=
reg_load
(
self
.
psipred_reg
,
filename
)
# TODO: supprimer psipred_list dans les futures implementations
...
...
@@ -95,11 +118,22 @@ class SsList:
self
.
ssdict
[
line_id
][
'ss_conf'
]])
def
write_ssfasta
(
self
,
filename
,
desc
=
"pdbid"
):
"""
:param filename:
:param desc:
:return:
"""
with
open
(
filename
,
'w'
)
as
psipred
:
psipred
.
write
(
"> %s
\n
"
%
desc
)
psipred
.
write
(
""
.
join
([
_
[
0
]
for
_
in
zip
(
*
self
.
ss_matrix
)[
2
]]))
def
read_indextableplus
(
self
,
filename
):
"""
:param filename:
:return:
"""
c
=
0
error_list
=
[]
ss_index_dict
=
{
'H'
:
1
,
'C'
:
1
,
'E'
:
1
}
...
...
@@ -134,6 +168,12 @@ class SsList:
str
(
error_list
)))
def
_read_ssdist
(
self
,
infile
,
filename
=
''
):
"""
:param infile:
:param filename:
:return:
"""
logger
.
info
(
"Reading distance file {0}"
.
format
(
filename
))
c
=
0
atom_list
=
[]
...
...
@@ -228,6 +268,13 @@ class AminoAcidSequence(SequenceList.SequenceList):
}
def
__init__
(
self
,
topologyfile
,
*
args
,
**
kwargs
):
"""
:param topologyfile:
:param args:
:param kwargs:
:return:
"""
SequenceList
.
SequenceList
.
__init__
(
self
,
*
args
,
**
kwargs
)
self
.
_topfile
=
topologyfile
self
.
_topology
=
None
...
...
@@ -236,6 +283,10 @@ class AminoAcidSequence(SequenceList.SequenceList):
@
property
def
humanidx
(
self
):
"""
:return:
"""
return
range
(
1
,
len
(
self
.
sequence
)
+
1
)
@
property
...
...
@@ -254,13 +305,26 @@ class AminoAcidSequence(SequenceList.SequenceList):
return
self
.
_topology
def
__getitem__
(
self
,
key
):
"""
:param key:
:return:
"""
return
self
.
aalist
[
key
]
def
__repr__
(
self
):
"""
:return:
"""
return
"Amino Acid sequence %s (%d)"
%
(
self
.
sequence
,
len
(
self
.
sequence
))
def
readtopo
(
self
):
"""
:return:
"""
topo
=
{}
with
pkgr
.
resource_stream
(
__name__
,
self
.
_topfile
)
as
tpf
:
res_flag
=
False
...
...
@@ -302,6 +366,11 @@ class AminoAcidSequence(SequenceList.SequenceList):
return
topo
def
read
(
self
,
filename
):
"""
:param filename:
:return:
"""
# TODO: smarter reader checking type of file (fasta, etc ...)
with
Capturing
()
as
output
:
self
.
ReadFasta
(
filename
)
...
...
@@ -315,6 +384,11 @@ class AminoAcidSequence(SequenceList.SequenceList):
class
Protein
:
def
__init__
(
self
,
sett
):
"""
:param sett:
:return:
"""
self
.
aa_sequence
=
AminoAcidSequence
(
sett
.
TOPO
)
self
.
sec_struct
=
SsList
(
sett
)
self
.
index
=
[]
# Index starting from 1
...
...
@@ -345,9 +419,19 @@ class Protein:
@
property
def
topology
(
self
):
"""
:return:
"""
return
self
.
aa_sequence
.
topology
def
set_aa_sequence
(
self
,
filename
,
ssidx
=
False
):
"""
:param filename:
:param ssidx:
:return:
"""
self
.
aa_sequence
.
read
(
filename
)
self
.
index
=
range
(
1
,
len
(
self
.
aa_sequence
.
sequence
)
+
1
)
if
self
.
sec_struct
.
ss_matrix
:
...
...
@@ -359,6 +443,13 @@ class Protein: