Commit c6f3afcf authored by Fabrice Allain's avatar Fabrice Allain

fix: solve issue with missing alignment for evcoupling contacts

parent 1b9fa481
......@@ -157,7 +157,8 @@ class CLI(object):
parser = argp.ArgumentParser(
formatter_class=argp.ArgumentDefaultsHelpFormatter)
parser.add_argument("-o", "--output", dest="output_directory", type=str,
help="Output directory", required=True, action=ReadableDir)
help="Output directory", required=True,
action=ReadableDir)
parser.add_argument("--nolog", action="store_true",
default=False, help="Don't generate log files")
parser.add_argument("-d", "--debug", dest="verbose", default=False,
......
......@@ -15,9 +15,8 @@ import re
import sys
from six import iteritems, text_type
from copy import copy
from ..core.legacy import AminoAcid as AmnAcd
from .common import (reg_load, ppdict)
from .common import (reg_load, ppdict, Capturing)
# import skbio.Protein as skprot
# TODO: interface skbio ??
......@@ -583,10 +582,12 @@ class AminoAcidSequence(SequenceList.SequenceList, object):
# TODO: smarter reader checking type of file (fasta, etc ...)
# TODO: capturing has some troubles with unicode ...
# with Capturing() as output:
if os.path.splitext(filename)[1] == '.seq':
self.ReadSeq(text_type(filename))
else:
self.ReadFasta(text_type(filename))
with Capturing() as output:
if os.path.splitext(filename)[1] == '.seq':
self.ReadSeq(text_type(filename))
else:
self.ReadFasta(text_type(filename))
LOG.info("".join(output).capitalize())
self.sequence = "".join(
(AmnAcd.AminoAcid(str(_))[0]
for _ in self.aalist))
......
......@@ -19,7 +19,6 @@ from conkit.core.sequencefile import SequenceFile
from .common import sort_2dict
from .protmap import (ResMap, ResAtmMap)
LOG = logging.getLogger(__name__)
# TODO: check if Atom is still used ...
Atom = collections.namedtuple("Atom", ["name", "coords"])
......@@ -330,7 +329,8 @@ class MapFile(RegexFile):
if "check_type" in kwargs else True
super(MapFile, self).__init__(*args, **kwargs)
if not self.conioflag and self.checkflag:
LOG.info("Conkit doesn't support {ftype}".format(
LOG.info("The file format {ftype} is not supported by the conkit "
"plugin. Switching to homemade parsers.".format(
ftype=self.filetype))
LOG.debug("Using {module}".format(module=__name__))
self.regex, self.filetype, self.sort = self.check_maptype()
......@@ -388,8 +388,10 @@ class MapFile(RegexFile):
-------
"""
LOG.info("Checking if file %s correspond to %s format", self.filepath,
self.filetype)
LOG.info(
"Checking if file %s correspond to our definition of %s format",
self.filepath,
self.filetype)
# Check if given type is supported
# TODO: report this check into commands section
if os.stat(self.filepath).st_size == 0:
......@@ -405,10 +407,11 @@ class MapFile(RegexFile):
if self.filetype in self.types:
match = self.types[self.filetype].get("regex").match(line)
else:
LOG.error("Format %s not supported !", self.filetype)
LOG.error("Format %s not supported. Please refer"
" to the documentation for supported files",
self.filetype)
match = None
if match:
LOG.info("Format type correct")
return [
self.types[self.filetype].get("regex"),
self.filetype,
......@@ -431,7 +434,8 @@ class MapFile(RegexFile):
# Stop checking after second line
LOG.error("Can't read %s file.", self.filetype)
break
LOG.error("Wrong format type given ...")
LOG.error("Wrong format type given. Please refer to the "
"documentation to check if the given format is correct.")
return [None] * 3
def load(self, *args):
......@@ -584,7 +588,8 @@ class ContactMapFile(MapFile):
path=kwargs.get("path"),
desc=self.filetype) if self.sort else None
distmap = ResMap(protein.aa_sequence.sequence, mtype='distance',
seqidx=protein.index, idxnames=idxnames, path=kwargs.get("path"),
seqidx=protein.index, idxnames=idxnames,
path=kwargs.get("path"),
colnames=colnames, sym=kwargs['sym'],
desc=self.filetype) if self.filetype == "metapsicovhb" else None
......@@ -645,7 +650,7 @@ class ContactMapFile(MapFile):
if self.filetype == "metapsicovhb":
self.distlist.append(self.lines[contact].get("res_dist"))
if self.filetype in ("evfold", "plmdca", "plm", "plmev"):
if self.filetype in ("evcoupling", "plmdca", "plm", "plmev"):
self.clashlist.append(next(
(el for el in (
self.lines[contact].get("ss_filter"),
......@@ -673,8 +678,12 @@ class ContactMapFile(MapFile):
alignment = pairwise2.align.localxs(
seq, protein.aa_sequence.sequence, -1, -1,
one_alignment_only=True)[0]
LOG.info('Alignment of amino acid sequence with contact file\n'
'%s' % pairwise2.format_alignment(*alignment))
LOG.info(
'Alignment of sequence in contact file ({0})'
' with reference ({1})\n{2}'.format(
self.filetype,
os.path.basename(protein.aa_sequence.fileName),
pairwise2.format_alignment(*alignment)))
shift = re.match(r'^-*', alignment[1])
shift = len(shift.group(0)) if shift else 0
if shift:
......@@ -813,16 +822,19 @@ class PDBFile(MapFile):
float(self.lines[atomy]['z']))
dist = distance.euclidean(coordx, coordy)
if indx[0] in list(resmap.index.get_level_values("residuex"))\
and indy[0] in list(resmap.index.get_level_values("residuex")):
if indx[0] in list(resmap.index.get_level_values("residuex")) \
and indy[0] in list(
resmap.index.get_level_values("residuex")):
LOG.debug("Update distance value (%s, %s)", indx, indy)
newmap.at[indx, indy] = dist
if sym:
# If symmetric matrix
newmap.at[indy, indx] = dist
elif indx[0] not in list(resmap.index.get_level_values("residuex")):
elif indx[0] not in list(
resmap.index.get_level_values("residuex")):
error_list.add(indx[0])
elif indy[0] not in list(resmap.index.get_level_values("residuex")):
elif indy[0] not in list(
resmap.index.get_level_values("residuex")):
error_list.add(indy[0])
if error_list:
# Listing related humanidx in the initial df
......@@ -841,6 +853,7 @@ class PDBFile(MapFile):
class DistanceMapFile(MapFile):
"""Distance matrix file"""
def __init__(self, filepath, filetype):
super(MapFile).__init__(filepath, filetype)
raise NotImplementedError
......@@ -926,8 +939,8 @@ class MapFileListReader(object):
maptypes = [maptypes] if type(maptypes) != list else maptypes
if not maptypes or len(maps) != len(maptypes):
maptypes = [os.path.splitext(_)[1][1:] for _ in maps]
LOG.info("Reader focused on file(s) %s %s", maps,
maptypes)
# LOG.info("Analyzing input file(s) %s %s", maps,
# maptypes)
for i, filepath in enumerate(maps):
if os.path.exists(filepath):
# TODO: check_type functionstr
......
......@@ -51,7 +51,7 @@
<hbdb_potential enabled="${hbdb_potential_enabled}"/>
<scoring method="${scoring_method}"/>
</annealing_parameters>
<md_parameters dynamics="${}" random_seed="${md_parameters_random_seed}" tad_temp_high="${}" tad_timestep_factor="${}" cartesian_temp_high="${}" cartesian_first_iteration="${}" timestep="${}" temp_cool1_final="${}" temp_cool2_final="${}" steps_high="${md_parameters_steps_high}" steps_refine="${}" steps_cool1="${md_parameters_steps_cool1}" steps_cool2="${md_parameters_steps_cool2}"/>
<md_parameters dynamics="${md_parameters_dynamics}" random_seed="${md_parameters_random_seed}" tad_temp_high="${md_parameters_tad_temp_high}" tad_timestep_factor="${md_parameters_tad_timestep_factor}" cartesian_temp_high="${md_parameters_cartesian_temp_high}" cartesian_first_iteration="${md_parameters_cartesian_first_iteration}" timestep="${md_parameters_timestep}" temp_cool1_final="${md_parameters_temp_cool1_final}" temp_cool2_final="${md_parameters_temp_cool2_final}" steps_high="${md_parameters_steps_high}" steps_refine="${md_parameters_steps_refine}" steps_cool1="${md_parameters_steps_cool1}" steps_cool2="${md_parameters_steps_cool2}"/>
</cns>
<job_manager default_command="csh -f">
<host enabled="yes" command="${host_command}" executable="${host_executable}" n_cpu="${n_cpus}" use_absolute_path="yes"/>
......@@ -84,4 +84,4 @@
<noe_restraint_list pickle_output="${pickle_output}" text_output="yes" xml_output="no"/>
<spectra write_assigned="no" write_assigned_force="no" iteration="last" write_unambiguous_only="yes"/>
</report>
</project>
</project>
\ No newline at end of file
......@@ -1371,7 +1371,8 @@ class DistanceRestraint(AbstractPeak):
# TODO: call Id -> Number? or is it another thing?
counter = 0
# counter = 0
counter = 100000
def __init__(self, id=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment