Commit baed6c54 authored by Fabrice Allain's avatar Fabrice Allain
Browse files

Updated string formatting in mapreport

parent d5c35c58
......@@ -157,7 +157,7 @@ def format_dict(indict):
:return:
"""
for key in indict:
if isinstance(type(indict[key]), str):
if isinstance(indict[key], str):
indict[key] = format_str(indict[key])
return indict
......@@ -250,10 +250,10 @@ class CustomLogging(object):
:param desc:
:return:
"""
if isinstance(type(self.msg), list):
if isinstance(self.msg, list):
self.msg += desc
self.msg = " - ".join(self.msg)
elif isinstance(type(self.msg), str):
elif isinstance(self.msg, str):
self.msg = " - ".join((self.msg, desc.capitalize()))
def default_config(self):
......
No preview for this file type
......@@ -24,6 +24,7 @@ def check_file(prospective_file):
:param prospective_file:
"""
LOG.debug("Checking if %s is a readable file", prospective_file)
if not os.path.exists(prospective_file):
raise argp.ArgumentTypeError("readable_file:'{0}' is not a valid "
"path".format(prospective_file))
......@@ -41,12 +42,12 @@ class ReadableFile(argp.Action):
super(ReadableFile, self).__init__(*args, **kwargs)
def __call__(self, parser, namespace, values, option_string=None):
if isinstance(type(values), list):
if isinstance(values, list):
for prospective_file in values:
check_file(prospective_file)
setattr(namespace, self.dest,
[os.path.abspath(os.path.expanduser(val)) for val in values])
elif isinstance(type(values), str):
elif isinstance(values, str):
check_file(values)
setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values)))
......@@ -242,6 +243,7 @@ class AriaEcCommand(object):
settings.load_config(self.args.conf_file)
# Update settings associated to command section
LOG.info("Updating %s args settings", self.args.command)
LOG.debug(self.args.__dict__)
getattr(settings, self.args.command).args.update(format_dict(self.args.__dict__))
LOG.debug(getattr(settings, self.args.command).args)
if self.args.output_directory:
......
No preview for this file type
......@@ -18,7 +18,7 @@ clashlist_executable:
[contactdef]
; Contact definition section used to define contactmap from pdb file.
; Decrease this threshold if using other cutoff (ca_ca, ...)
; Decrease this threshold if using other cutoff (e.g. 5.0)
default_cutoff: 8.0
; Add contact cutoff folowwing the syntax atm1_atm2
;ca_ca: 7.0
......
......@@ -24,6 +24,7 @@ from mako.template import Template
from aria.Molecule import Molecule
from aria.tools import string_to_segid
from aria.AriaXML import AriaXMLPickler
from aria.PDBReader import PDBReader
from aria.conversion import Converter, SequenceList, MoleculeSettings
LOG = logging.getLogger(__name__)
......@@ -127,17 +128,52 @@ class AriaXMLConverter(Converter, object):
Converter.__init__(self)
self._mol_set = MoleculeSettings()
self._pickler = AriaXMLPickler()
self._molecule = None
self._upflag = False
self.outprefix = ""
self.settings = settings
self.molecule = None
self.structure = None
def load_molecule(self, seqpath):
@property
def molecule(self):
"""
aria.Molecule.Molecule object or None. If a structure has been loaded,
use it to update the molecule
"""
if self.structure and self._molecule and not self._upflag:
# update self._molecule with self.structure
# change value of self._upflag
self.upd_mol(self._molecule, self.structure)
self._upflag = True
else:
return self._molecule
@staticmethod
def upd_mol(molecule, structure):
"""
Update molecule object according to pdb structure
Parameters
----------
molecule: aria.Molecule.Molecule
structure: aria.legacy.PDB.PDB
"""
# Check if pdb structure is in the same format as molecule
# Loop over pdb structure and update/remove atoms in molecule chains (cf
# aria molecule and chain methods
pass
:param seqpath:
def read_seq(self, seqpath):
"""
Load aria Molecule object from seq file
Parameters
----------
seqpath: str
absolute path of .seq file
"""
self._mol_set['format'] = 'seq'
self._mol_set['input'] = seqpath
self._mol_set['input'] = os.path.abspath(str(seqpath))
self._mol_set['output'] = os.path.join(self.settings.infra["xml"],
self.outprefix + ".xml")
self._mol_set['type'] = 'PROTEIN'
......@@ -159,40 +195,87 @@ class AriaXMLConverter(Converter, object):
self._mol_set['first_residue_number'])
# with Capturing() as output:
# Initialize SequenceList object with seq file
sequence.parse(self._mol_set['input'], self._mol_set['format'],
self._mol_set['naming_convention'])
# LOG.info("\n" + "".join(output))
# Initialize aria Atom factory
factory = self.create_factory()
factory.reset()
factory.unfreeze()
# Create chains in SequenceList object with aria atom factory
chains = sequence.create_chains(factory)
self.molecule = Molecule(self._mol_set['name'])
# Instantiate aria molecule object
self._molecule = Molecule(self._mol_set['name'])
for segid in segids:
self.molecule.add_chain(chains[segid])
def read_pdb(self, pdbpath):
"""
Parameters
----------
pdbpath
Returns
-------
"""
# read pdb file with aria pdb reader and load it in self.structure
self.structure = PDBReader().read(pdbpath)
def write_xmlseq(self):
"""
Use aria xml pickler to generate xml molecule file
"""
try:
self._pickler.dump(self.molecule, self._mol_set['output'])
except Exception as msg:
LOG.critical("Error for writing xml seq file %s: %s",
self._mol_set['output'], msg)
raise
return self._mol_set['output']
@staticmethod
def deff(distance_list, dpow=6):
def deff(atm_dists, dpow=6):
"""
Compute aria effective distance from input distances
:param distance_list:
:param dpow:
:return:
Parameters
----------
atm_dists: list
list of atm-atm distances
dpow: int
effective distance exponant
Returns
-------
float
effective distance
"""
return math.pow(sum([math.pow(x, -int(dpow)) for x in
distance_list]), -1 / 6)
atm_dists]), -1 / 6)
@staticmethod
def write_dist_xml(restraint_dict, outfile):
def write_dist_xml(dist_restraints, outfile):
"""
Write aria distance restraint xml file
:param restraint_dict:
:param outfile:
Parameters
----------
dist_restraints: dict
distance restraints containing all restraints and related contributi
ons
outfile: str
output file path
"""
xml_file = open(outfile, "w")
xml_file.write('''\
......@@ -201,26 +284,26 @@ class AriaXMLConverter(Converter, object):
''')
# Pour chaque contrainte
# Pour chaque contribution
for restraint_id in restraint_dict:
for restraint_id in dist_restraints:
restraint = {
'rest_id': restraint_id,
'rest_weight': restraint_dict[restraint_id]["meta"]["weight"],
'rest_weight': dist_restraints[restraint_id]["meta"]["weight"],
'rest_dist': float(
restraint_dict[restraint_id]["meta"]["distance"]),
dist_restraints[restraint_id]["meta"]["distance"]),
'lower_bound': float(
restraint_dict[restraint_id]["meta"]["lower_bound"]),
dist_restraints[restraint_id]["meta"]["lower_bound"]),
'upper_bound': float(
restraint_dict[restraint_id]["meta"]["upper_bound"]),
'reliable': restraint_dict[restraint_id]["meta"]["reliable"],
'list_name': restraint_dict[restraint_id]["meta"]["list_name"]
dist_restraints[restraint_id]["meta"]["upper_bound"]),
'reliable': dist_restraints[restraint_id]["meta"]["reliable"],
'list_name': dist_restraints[restraint_id]["meta"]["list_name"]
}
xml_file.write('''\
<restraint id="{rest_id}" weight="{rest_weight:.1f}" distance="{rest_dist:.1f}" \
lower_bound="{lower_bound:.1f}" upper_bound="{upper_bound:.1f}" active="1" \
reliable="{reliable}" list_name="{list_name}">
'''.format(**restraint))
for contrib_id in restraint_dict[restraint_id]["contrib"]:
res_id = restraint_dict[restraint_id]["contrib"][contrib_id][
for contrib_id in dist_restraints[restraint_id]["contrib"]:
res_id = dist_restraints[restraint_id]["contrib"][contrib_id][
"spin_pair"].keys()
xml_file.write('''\
<contribution id="{id}" weight="{weight}">
......@@ -230,12 +313,12 @@ reliable="{reliable}" list_name="{list_name}">
</spin_pair>
</contribution>
'''.format(id=contrib_id, res1_segid="", res2_segid="",
weight=restraint_dict[restraint_id]["contrib"][contrib_id]["meta"][
weight=dist_restraints[restraint_id]["contrib"][contrib_id]["meta"][
"weight"],
res1_pos=res_id[0], res2_pos=res_id[1],
atm1_name=restraint_dict[restraint_id]["contrib"][contrib_id][
atm1_name=dist_restraints[restraint_id]["contrib"][contrib_id][
"spin_pair"][res_id[0]],
atm2_name=restraint_dict[restraint_id]["contrib"][contrib_id][
atm2_name=dist_restraints[restraint_id]["contrib"][contrib_id][
"spin_pair"][res_id[1]]))
xml_file.write('''\
</restraint>
......@@ -283,7 +366,7 @@ assign (resid {res1} and name o) (resid {res2} and name hn) 1.8 {dminus} {dplus
! Long range Hbond list (beta sheet)
''')
contacts = hbmap["scoremap"].sortedset(human_idx=True)
distmap = hbmap.get("distmap")
# distmap = hbmap.get("distmap")
if n_hb:
contacts = contacts[:n_hb]
LOG.debug(contacts)
......@@ -883,24 +966,6 @@ class AriaEcXMLConverter(AriaXMLConverter):
ssdist_file)
return {'hbond': hb_file, 'dihed': dihed_file, 'ssdist': ssdist_file}
def write_xmlseq(self):
"""
:return:
"""
try:
self._pickler.dump(self.molecule, self._mol_set['output'])
except Exception as msg:
LOG.critical("Error for writing xml seq file %s: %s",
self._mol_set['output'], msg)
raise
return self._mol_set['output']
def write_ariaproject(self, aria_template, seqfile, dist_files, tbl_files,
desclist=None):
"""
......
No preview for this file type
No preview for this file type
......@@ -3,7 +3,8 @@
Secondary structure list
"""
from __future__ import absolute_import, division, print_function
from __future__ import absolute_import, division, print_function, \
unicode_literals
import os
import sys
......@@ -405,7 +406,7 @@ class Protein(object):
self.aa_sequence = AminoAcidSequence(sett.TOPO)
self.sec_struct = SsList(sett)
self.index = [] # Index starting from 1
self.seqfile_path = ''
self.seqfile_path = u''
@staticmethod
def sync_index(index1, index2):
......@@ -494,7 +495,7 @@ class Protein(object):
# TODO: same as above, trouble with unicode calls inside capturing
# with Capturing() as output:
self.aa_sequence.WriteSeq(text_type(outfile))
self.seqfile_path = outfile
self.seqfile_path = '%s' % outfile
if __name__ == "__main__":
......
......@@ -9,6 +9,7 @@ from copy import deepcopy
import os
import re
import csv
import string
import logging
import textwrap
import datetime
......@@ -440,63 +441,63 @@ class ProteinMap(Map):
"""
with open(reportpath, 'w') as reportf:
msg = """\
## Report {map1name} vs. {map2name}
##
## Date: {date}
##
## Plots: {outdir}
## Reference map: {map1path}
## Contact map: {map2path}
##
## -----------------------------------------------------------------------------
##
## Sequence:
## {seq}
## Protein length: {protlen}
##
## -----------------------------------------------------------------------------
##
## Matthews correlation coefficient (MCC): {mcc}
## F1 score: {f1s}
## F2 score: {f2s}
## F0.5 score: {f05s}
##
## Precision: {precision}
## Recall (Sensibility): {recall}
## Accuracy: {accuracy}
##
## Hamming loss: {hamm}
## Hinge loss: {hin}
##
## -----------------------------------------------------------------------------
##
## Plot scores
##
## ROC Area Under Curve: {roc_auc}
## Average precision score: {aver_prec}
##
## -----------------------------------------------------------------------------
##
## Precision recall curve
##
## Precision values:
## {allprec}
## Recall values:
## {allrec}
## Score tresholds ({map2name}):
## {prthres}
##
## -----------------------------------------------------------------------------
##
## ROC curve
##
## True Positive Rate (Sensibility) values:
## {alltpr}
## False Positive Rate (1 - Specificity) values:
## {allfpr}
## Score tresholds ({map2name}):
## {rocthres}""".format(**scores)
msg = string.Formatter().vformat("""\
## Report {map1name} vs. {map2name}
##
## Date: {date}
##
## Plots: {outdir}
## Reference map: {map1path}
## Contact map: {map2path}
##
## -----------------------------------------------------------------------------
##
## Sequence:
## {seq}
## Protein length: {protlen}
##
## -----------------------------------------------------------------------------
##
## Matthews correlation coefficient (MCC): {mcc}
## F1 score: {f1s}
## F2 score: {f2s}
## F0.5 score: {f05s}
##
## Precision: {precision}
## Recall (Sensibility): {recall}
## Accuracy: {accuracy}
##
## Hamming loss: {hamm}
## Hinge loss: {hin}
##
## -----------------------------------------------------------------------------
##
## Plot scores
##
## ROC Area Under Curve: {roc_auc}
## Average precision score: {aver_prec}
##
## -----------------------------------------------------------------------------
##
## Precision recall curve
##
## Precision values:
## {allprec}
## Recall values:
## {allrec}
## Score tresholds ({map2name}):
## {prthres}
##
## -----------------------------------------------------------------------------
##
## ROC curve
##
## True Positive Rate (Sensibility) values:
## {alltpr}
## False Positive Rate (1 - Specificity) values:
## {allfpr}
## Score tresholds ({map2name}):
## {rocthres}""", (), defaultdict(str, **scores))
LOG.debug("\n" + msg)
reportf.write(msg)
......@@ -517,7 +518,7 @@ class ProteinMap(Map):
classification metrics
"""
metrics = defaultdict(lambda: None)
metrics = {}
if 1 in y_pred and 1 in y_true:
metrics.update({
......@@ -621,9 +622,11 @@ class ProteinMap(Map):
LOG.info("Generate map report file (%s)", reportpath)
nb_c = int(len(self.sequence) * float(n_factors[0]))
# TODO: evaluate map statistics for several treshold (use submap var)
submap = cmpmap.copy()
print(np.array_equal(cmpmap.values.astype(int).flat, submap.topmap(scoremap, nb_c).values.astype(int).flat))
# print(np.array_equal(cmpmap.values.astype(int).flat,
# submap.topmap(scoremap, nb_c).values.astype(int).flat))
y_true = list(self.values.astype(int).flat)
y_pred = list(cmpmap.values.astype(int).flat)
......
No preview for this file type
......@@ -55,8 +55,7 @@ class AriaEcSetup(object):
# -------------------------------------------------------------------- #
# ----------------------------- Input -------------------------------- #
# -------------------------------------------------------------------- #
self.outprefix = get_filename(self.settings.setup.args.get("seq",
None))
self.outprefix = get_filename(self.settings.setup.args.get("seq", ""))
self.converter.outprefix = self.outprefix
# ------------------------- Load sequence ---------------------------- #
self.protein.set_aa_sequence(self.settings.setup.args.get("seq", None))
......@@ -149,7 +148,7 @@ class AriaEcSetup(object):
self.outprefix + ".seq"))
# Load aria molecule object from seq file and convert it into xml format
LOG.info("Load molecule file and convert it into xml format")
self.converter.load_molecule(self.protein.seqfile_path)
self.converter.read_seq(self.protein.seqfile_path)
# --------------------------- TBL restraints ------------------------- #
# Setting contact number limit for hbmap
n_hb = int(len(self.protein.aa_sequence.sequence) *
......
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment