Commit 6352972f authored by Fabrice Allain's avatar Fabrice Allain
Browse files

pep revision

parent a1c17f2d
# coding=utf-8
"""
Basic tools aria_ec
"""
......@@ -123,7 +124,7 @@ def format_str(string):
ev_str = ast.literal_eval(string)
except ValueError:
LOG.error("Don't understand given string %s. Please check "
"format.", string)
"format.", string)
return None
except SyntaxError:
LOG.error("Given string %s is not a valid expression", string)
......@@ -203,9 +204,12 @@ def tickrot(axes, figure, rotype='horizontal', x=True, y=True):
figure.canvas.draw()
class CustomLogging:
class CustomLogging(object):
# default_file = os.path.join(os.path.abspath(os.path.dirname(__file__)),
# "conf/logging.json")
"""
custom logging
"""
default_file = "conf/logging.json"
def __init__(self, level=logging.INFO, desc=None):
......@@ -292,6 +296,9 @@ class CustomLogging:
class Capturing(list):
"""
Capture output
"""
def __enter__(self):
"""
......
# coding=utf-8
"""
Input/Output aria_ec scripts
"""
......@@ -29,6 +30,10 @@ LOG = logging.getLogger(__name__)
class AriaEcBbConverter(object):
"""
Bbcontacts convert main class
"""
def __init__(self, settings):
# TODO: check_type settings (AriaEcSettings)
self.settings = settings
......@@ -38,6 +43,9 @@ class AriaEcBbConverter(object):
def run(self):
# Check input
"""
main method
"""
LOG.debug("Settings:\n" + json.dumps(self.settings.setup.config,
indent=4))
LOG.debug("Args:\n" + json.dumps(self.settings.setup.args,
......@@ -81,6 +89,12 @@ class AriaEcBbConverter(object):
@staticmethod
def compute_diversityvalue(msa, l):
# Compute n value
"""
:param msa:
:param l:
:return:
"""
msa_reg = re.compile(r"^>[A-Za-z0-9]+_[A-Za-z0-9]+")
n = 0
......@@ -94,6 +108,10 @@ class AriaEcBbConverter(object):
class AriaXMLConverter(Converter, object):
"""
XML converter for aria files
"""
def __init__(self, settings):
Converter.__init__(self)
self._mol_set = MoleculeSettings()
......@@ -103,6 +121,10 @@ class AriaXMLConverter(Converter, object):
self.molecule = None
def load_molecule(self, seqpath):
"""
:param seqpath:
"""
self._mol_set['format'] = 'seq'
self._mol_set['input'] = seqpath
self._mol_set['output'] = os.path.join(self.settings.infra["xml"],
......@@ -142,11 +164,22 @@ class AriaXMLConverter(Converter, object):
@staticmethod
def deff(distance_list, dpow=6):
"""
:param distance_list:
:param dpow:
:return:
"""
return math.pow(sum([math.pow(x, -int(dpow)) for x in
distance_list]), -1 / 6)
@staticmethod
def write_dist_xml(restraint_dict, outfile):
"""
:param restraint_dict:
:param outfile:
"""
xml_file = open(outfile, "w")
xml_file.write('''\
<!DOCTYPE distance_restraint_list SYSTEM "distance_restraint1.0.dtd">
......@@ -305,6 +338,12 @@ assign (resid {res1} and name o) (resid {res2} and name hn) 1.8 {dminus} {dplus
def write_ssdist_tbl(sec_struct, ss_dist, outfile):
# Build global secondary structure distance restraints (H-H+4, E-E+1,
# ...)
"""
:param sec_struct:
:param ss_dist:
:param outfile:
"""
with open(outfile, 'w') as outfile:
for a in range(len(sec_struct) - 1):
for b in range(a + 1, len(sec_struct)):
......@@ -329,6 +368,13 @@ assign (resid {res1} and name {atm1}) (resid {res2} and name {atm2}) {dist} {sd
@staticmethod
def write_dihedral_tbl(sec_struct, outfile, k=0.5, e=2):
# Build dihedral angle restraints (phi, psi)
"""
:param sec_struct:
:param outfile:
:param k:
:param e:
"""
with open(outfile, "w") as outfile:
for i in range(len(sec_struct)):
if (i + 1) != len(sec_struct):
......@@ -359,6 +405,11 @@ assign (resid {res1} and name n) (resid {res1} and name ca) (resid {res1} and na
@staticmethod
def unambig_dist_xmltag(unambigous_dict):
"""
:param unambigous_dict:
:return:
"""
res = ""
for unambig in unambigous_dict:
res += '''
......@@ -377,6 +428,11 @@ assign (resid {res1} and name n) (resid {res1} and name ca) (resid {res1} and na
@staticmethod
def ambig_dist_xmltag(ambigous_dict):
"""
:param ambigous_dict:
:return:
"""
res = ""
for ambig in ambigous_dict:
res += '''
......@@ -395,11 +451,13 @@ assign (resid {res1} and name n) (resid {res1} and name ca) (resid {res1} and na
class AriaEcXMLConverter(AriaXMLConverter):
"""
XML converter ariaec to aria
"""
@staticmethod
def _write_hbmap_tbl(hbmap, outfile, dminus, dplus, n_hb=None, hb_type="main", topo=None):
AriaXMLConverter._write_hbmap_tbl(hbmap, outfile, dminus, dplus, n_hb=None, hb_type="main",
topo=None)
AriaXMLConverter._write_hbmap_tbl(hbmap, outfile, dminus, dplus)
def __init__(self, *args, **kwargs):
self.restraint_list = []
......@@ -407,16 +465,38 @@ class AriaEcXMLConverter(AriaXMLConverter):
def atm_product(self, idx1, res1, idx2, res2, prod_type="min"):
"""
:param idx1:
:param res1:
:param idx2:
:param res2:
:param prod_type:
:return:
"""
def resname(res):
"""
:param res:
:return:
"""
return AminoAcid.AminoAcid(res)[0]
def min_atms(aa1, aa2, atms):
# Function to minimize atom pair list between aa1 & aa2
return [
atmpair for atmpair in atms if atmpair in (
('CA', 'CA'),
('CB', 'CB'),
self.settings.scsc_min[resname(aa1)][resname(aa2)])]
"""
:param aa1:
:param aa2:
:param atms:
:return:
"""
# Function to minimize atom pair list between aa1 & aa2
return [
atmpair for atmpair in atms if atmpair in (
('CA', 'CA'),
('CB', 'CB'),
self.settings.scsc_min[resname(aa1)][resname(aa2)])]
atms1 = self.molecule.get_chains()[0].getResidues()[idx1].atoms.keys()
atms2 = self.molecule.get_chains()[0].getResidues()[idx2].atoms.keys()
if prod_type == "min":
......@@ -432,6 +512,14 @@ class AriaEcXMLConverter(AriaXMLConverter):
return min_atms(res1, res2, list(itertools.product(atms1, atms2)))
def targetdistmap(self, distype, sequence, distfile=None, groupby=None):
"""
:param distype:
:param sequence:
:param distfile:
:param groupby:
:return:
"""
# TODO: valeur par defaut de distfile au fichier contenant les infos
# fixes
# ! target est une ResAtmMap pour les 20 aa, sinon il s'agit d'une
......@@ -530,12 +618,28 @@ class AriaEcXMLConverter(AriaXMLConverter):
nat_reliable = self.settings.setup.config["native_reliable"]
def min_ind(ind):
"""
:param ind:
:return:
"""
return ind if ind >= 0 else 0
def max_ind(ind, max_idx):
"""
:param ind:
:param max_idx:
:return:
"""
return ind if ind <= max_idx else max_idx
def resname_3l(residx):
"""
:param residx:
:return:
"""
return AminoAcid.AminoAcid(contactmap.index.values[residx][-3:])[1]
max_seqidx = len(contactmap.sequence)
......@@ -667,6 +771,12 @@ class AriaEcXMLConverter(AriaXMLConverter):
return xml_file, pair_list
def write_maplist_restraints(self, maplist, targetmap):
"""
:param maplist:
:param targetmap:
:return:
"""
out = ([], [])
for maptype in maplist:
......@@ -710,6 +820,10 @@ class AriaEcXMLConverter(AriaXMLConverter):
return {'hbond': hb_file, 'dihed': dihed_file, 'ssdist': ssdist_file}
def write_xmlseq(self):
"""
:return:
"""
try:
......@@ -739,7 +853,7 @@ class AriaEcXMLConverter(AriaXMLConverter):
module_directory="/tmp/mako_modules")
try:
with open(templatepath, 'r'):
with open(templatepath):
pass
except Exception as msg:
LOG.critical("""Can't open template file "%s". %s""" %
......
# coding=utf-8
"""
Network deconvolution tool
"""
......@@ -110,6 +111,7 @@ def net_deconv(npmat, beta=0.99, alpha=1, control=0):
'''
print('Decomposition and deconvolution...')
# noinspection PyTypeChecker
dv, u = la.eigh(mat_th)
d = np.diag(dv)
lam_n = np.abs(np.min(np.min(np.diag(d)), 0))
......
# coding=utf-8
"""
Secondary structure list
"""
......@@ -20,8 +21,10 @@ from .base import (reg_load, ppdict)
LOG = logging.getLogger(__name__)
class SsList:
class SsList(object):
"""
Secondary structure prediction class
"""
psipred_reg = re.compile(r'^(?P<up_index>\d+)'
r'\s+(?P<up_residue>[AC-IK-NP-TVWYZ])'
r'\s+(?P<ss_pred>[HEC])'
......@@ -29,11 +32,11 @@ class SsList:
indxplus_reg = re.compile(
r'^(?P<up_index>\d+)\s+(?P<up_residue>[AC-IK-NP-TVWYZ])\s+'
r'(?P<ss_pred>[CEH])\s+(?P<ss_conf>\d)\s+(?P<msa_index>[\d\-]+)\s+'
r'(?P<msa_consper>[\d\-]+)\s+(?P<msa_cons>[\*~\-])\s+'
r'(?P<in_const>[\*~\-])\s+(?P<pdb_atom>[\d\-]+)\s+'
r'(?P<msa_consper>[\d\-]+)\s+(?P<msa_cons>[*~\-])\s+'
r'(?P<in_const>[*~\-])\s+(?P<pdb_atom>[\d\-]+)\s+'
r'(?P<pdb_chain>[\-\w])\s+(?P<pdb_index>[\d\-]+\w?)\s+'
r'(?P<pdb_residue>[AC-IK-NP-TVWYZ\-])\s+(?P<pdb_x_pos>[\d\.\-]+)\s+'
r'(?P<pdb_y_pos>[\d\.\-]+)\s+(?P<pdb_z_pos>[\d\-\.]+)')
r'(?P<pdb_residue>[AC-IK-NP-TVWYZ\-])\s+(?P<pdb_x_pos>[\d.\-]+)\s+'
r'(?P<pdb_y_pos>[\d.\-]+)\s+(?P<pdb_z_pos>[\d\-.]+)')
ss_dist_reg = re.compile(r"\s+(\d+\.\d+) \( (\d+\.\d+)\)")
def __init__(self, sett):
......@@ -85,7 +88,7 @@ class SsList:
else:
self.read_psipred(filename)
LOG.debug("Secondary structure matrix:\n%s\n"
"Secondary structure dict:\n%s", self.ss_matrix,
"Secondary structure dict:\n%s", self.ss_matrix,
self.ssdict)
if sequence:
self.seq_sublist(sequence)
......@@ -109,8 +112,9 @@ class SsList:
# ss_index_dict
ss_index_dict[self.ssdict[line_id - 1]['ss_pred'][0]] += 1
self.ssdict[line_id]['ss_pred'] = "".join((self.ssdict[line_id]['ss_pred'],
str(ss_index_dict[self.ssdict[line_id]['ss_pred']])))
self.ssdict[line_id]['ss_pred'] = "".join(
(self.ssdict[line_id]['ss_pred'],
str(ss_index_dict[self.ssdict[line_id]['ss_pred']])))
self.ss_matrix.append([self.ssdict[line_id]['up_index'],
self.ssdict[line_id]['up_residue'],
......@@ -164,7 +168,8 @@ class SsList:
except AssertionError:
sys.exit('Uneven number of matching lines (%d) '
'vs up_index (%d) : lines %s' % (c,
int(self.ss_matrix[-1][0]),
int(self.ss_matrix[
-1][0]),
str(error_list)))
def _read_ssdist(self, infile, filename=''):
......@@ -238,12 +243,14 @@ class SsList:
self.ss_matrix = self.ss_matrix[imin:imax]
LOG.debug("Secondary structure matrix relative to given "
"sequence:\n%s", self.ss_matrix)
# TODO: read_dssp
"sequence:\n%s", self.ss_matrix)
# TODO: read_dssp
class AminoAcidSequence(SequenceList.SequenceList, object):
"""
Amino acid sequence
"""
startres_reg = re.compile(r"^\s*residue\s+(?P<name>[A-Za-z]{1,4})", flags=re.I)
end_reg = re.compile(r"^\s*end", flags=re.I)
restatement_reg = {
......@@ -384,7 +391,10 @@ class AminoAcidSequence(SequenceList.SequenceList, object):
LOG.info("Amino acid sequence:\t%s", self.sequence)
class Protein:
class Protein(object):
"""
Protein class
"""
def __init__(self, sett):
"""
......@@ -394,7 +404,7 @@ class Protein:
"""
self.aa_sequence = AminoAcidSequence(sett.TOPO)
self.sec_struct = SsList(sett)
self.index = [] # Index starting from 1
self.index = [] # Index starting from 1
self.seqfile_path = ''
@staticmethod
......@@ -417,8 +427,9 @@ class Protein:
else:
return index1
elif len(index1) != len(index2):
raise IndexError("Please check humanidx list from input files. They "
"are not the same length !")
raise IndexError(
"Please check humanidx list from input files. They "
"are not the same length !")
@property
def topology(self):
......@@ -441,7 +452,7 @@ class Protein:
self.sec_struct.seq_sublist(self.aa_sequence.sequence)
if ssidx:
LOG.info("Using secondary structure index for amino acid "
"sequence")
"sequence")
self.index = self.sync_index(self.aa_sequence.humanidx,
self.sec_struct.index)
......@@ -462,15 +473,15 @@ class Protein:
self.sec_struct.read_ssdist(ssdist_filename)
else:
LOG.error("No secondary structure distance file found. Please "
"check configuration file")
"check configuration file")
if self.aa_sequence.sequence:
# Synchronise sec structure sequence with aa sequence
LOG.info("Align secondary structure sequence with protein "
"sequence")
"sequence")
self.sec_struct.seq_sublist(self.aa_sequence.sequence)
if ssidx:
LOG.info("Using secondary structure index for amino acid "
"sequence")
"sequence")
self.index = self.sync_index(self.aa_sequence.humanidx,
self.sec_struct.index)
......
......@@ -227,8 +227,8 @@ class ProteinMap(Map):
self.contact_flags = flaglist if flaglist else None
self._maplot = None
def _constructor_expanddim(self):
super(ProteinMap, self)._constructor_expanddim()
# def _constructor_expanddim(self):
# return self._constructor_expanddim()
@property
def sequence(self):
......@@ -311,7 +311,8 @@ class ProteinMap(Map):
contact_list = []
n = 1 if human_idx else 0
if self.dtype is bool:
for irow, row in enumerate(self):
# for irow, row in enumerate(self):
for irow, row in enumerate(self.index):
for icol, value in enumerate(self[row]):
if value:
contact_list.append((irow + n, icol + n))
......@@ -671,8 +672,8 @@ class ResAtmMap(ProteinMap):
# TODO: Autre methodes de dist
distance_method = 'euclidean'
def _constructor_expanddim(self):
super(ResAtmMap, self)._constructor_expanddim()
# def _constructor_expanddim(self):
# super(ResAtmMap, self)._constructor_expanddim()
def __init__(self, sequence=None, **kwargs):
# Sequence: 1L string or MultiIndex object
......@@ -864,8 +865,8 @@ class ResMap(ResAtmMap):
def __init__(self, sequence, **kwargs):
super(ResMap, self).__init__(sequence=sequence, **kwargs)
def _constructor_expanddim(self):
super(ResMap, self)._constructor_expanddim()
# def _constructor_expanddim(self):
# super(ResMap, self)._constructor_expanddim()
@property
def sequence(self):
......@@ -1209,7 +1210,7 @@ class MapFilter(object):
# Check for each cys in dis_bridge if they aready exists
# in unidisbridge_list
exdis = next((unidis for cys in dis_bridge for unidis in
unidisbridge_list if cys in unidis), None)
unidisbridge_list if cys in unidis))
if exdis:
if scoremap.iat[dis_bridge] > scoremap.iat[exdis]:
# Better cys--cys contact
......
No preview for this file type
......@@ -65,13 +65,49 @@ class MapFile(RegexFile):
"regex": re.compile(r"^(?P<res1_nb>\d+)\s+(?P<res1_name>\w)\s+"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>\-?\d+\.\d+)\s*$"),
r"(?P<plm_score>--?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>--?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>-?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>-?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>--?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>--?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>-?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>-?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>--?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>--?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>-?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>-?\d+\.\d+)\s*$"
r"(?P<res2_nb>\d+)\s+(?P<res2_name>\w)\s+"
r"(?P<mi_score>\d)\s+"
r"(?P<plm_score>-?\d+\.\d+)\s*$"),
"score_field": "plm_score"
},
"evfold": {
"regex": re.compile(
r'^(?P<res1_nb>\d+),(?P<res2_nb>\d+),'
r'(?P<ec_score>\-?\d+\.\d+e?\-?\d*),'
r'(?P<ec_score>-?\d+\.\d+e?-?\d*),'
r'(?P<placeholder>\d),(?P<res1_cons>\d+),'
r'(?P<res2_cons>\d+),(?P<ss_filter>\d|\d{3}),'
r'(?P<high_cons_filter>\d|\d{3}),'
......@@ -82,37 +118,37 @@ class MapFile(RegexFile):
"pconsc": {
"regex": re.compile(
r'^(?P<res1_nb>\d+) (?P<res2_nb>\d+) '
r'(?P<ec_score>\-?\d+\.\d+e?\-?\d*)$'),
r'(?P<ec_score>-?\d+\.\d+e?-?\d*)$'),
"score_field": "ec_score"
},
"pconsc1": {
"regex": re.compile(
r'^(?P<res1_nb>\d+) (?P<res2_nb>\d+) '
r'(?P<ec_score>\-?\d+\.\d+e?\-?\d*)$'),
r'(?P<ec_score>-?\d+\.\d+e?-?\d*)$'),
"score_field": "ec_score"
},
"pconsc2": {
"regex": re.compile(
r'^(?P<res1_nb>\d+) (?P<res2_nb>\d+) '
r'(?P<ec_score>\-?\d+\.\d+e?\-?\d*)$'),
r'(?P<ec_score>-?\d+\.\d+e?-?\d*)$'),
"score_field": "ec_score"
},
"metapsicov_stg1": {
"regex": re.compile(
r'^(?P<res1_nb>\d+) (?P<res2_nb>\d+) \d (?P<res_dist>-?\d+.?\d*) '
r'(?P<ec_score>\-?\d+.\d+e?\-?\d*)$'),
r'(?P<ec_score>-?\d+.\d+e?-?\d*)$'),
"score_field": "ec_score"
},
"metapsicov_stg2": {
"regex": re.compile(
r'^(?P<res1_nb>\d+) (?P<res2_nb>\d+) \d (?P<res_dist>-?\d+.?\d*) '
r'(?P<ec_score>\-?\d+.\d+e?\-?\d*)$'),
r'(?P<ec_score>-?\d+.\d+e?-?\d*)$'),
"score_field": "ec_score"
},
"psicov": {
"regex": re.compile(
r'^(?P<res1_nb>\d+) (?P<res2_nb>\d+) \d (?P<res_dist>-?\d+.?\d*) '
r'(?P<ec_score>\-?\d+.\d+e?\-?\d*)$'),
r'(?P<ec_score>-?\d+.\d+e?-?\d*)$'),
"score_field": "ec_score"
},
"gremlin": {
......@@ -120,9 +156,9 @@ class MapFile(RegexFile):
r'^(?P<res1_nb>\d+)\t(?P<res2_nb>\d+)\t'
r'(?P<res1_id>\d+_[AC-IK-NP-TVWYZ])\t'
r'(?P<res2_id>\d+_[AC-IK-NP-TVWYZ])\t'
r'(?P<raw_score>\-?\d+\.\d+e?\-?\d*)\t'
r'(?P<scale_score>\-?\d+\.\d+e?\-?\d*)\t'
r'(?P<prob>\-?\d+\.\d+e?\-?\d*)'),
r'(?P<raw_score>-?\d+\.\d+e?-?\d*)\t'
r'(?P<scale_score>-?\d+\.\d+e?-?\d*)\t'
r'(?P<prob>-?\d+\.\d+e?-?\d*)'),
"score_field": "scale_score"
},
"native": {
......@@ -168,13 +204,13 @@ class MapFile(RegexFile):
r'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'
r'(?P<resn1>\w+)\s+'
r'(?P<resn2>\w+)\s+'
r'(?P<score>[\w\d\.\+\-]+)'),
r'(?P<score>[\w\d.+\-]+)'),
"score_field": "score"
},
"default_2": {
"regex": re.compile(
r'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'
r'(?P<score>[\w\d\.\+\-]+)'),
r'(?P<score>[\w\d.+\-]+)'),
"score_field": "score"
},
"default_3": {
......
# coding=utf-8
"""
Setup Aria EC module
"""
import os
import subprocess
from setuptools import setup, find_packages, Command
......