Commit 603f202a authored by Fabrice Allain's avatar Fabrice Allain

Bug revision: dealing with empty contactlist file in maplot

parent 9c17de17
# coding=utf-8
"""
Input/Output aria_ec
"""
......@@ -17,7 +18,10 @@ LOG = logging.getLogger(__name__)
def check_file(prospective_file):
"""
:param prospective_file:
"""
if not os.path.exists(prospective_file):
raise argp.ArgumentTypeError("readable_file:'{0}' is not a valid "
"path".format(prospective_file))
......@@ -27,6 +31,9 @@ def check_file(prospective_file):
class ReadableFile(argp.Action):
"""
Class used with argparse action to check if a file is readable
"""
def __init__(self, *args, **kwargs):
super(ReadableFile, self).__init__(*args, **kwargs)
......@@ -42,7 +49,7 @@ class ReadableFile(argp.Action):
# TODO: Make parent Command class with _create_argparser, self.args,
# update_logger and run
class AriaEcCommand:
class AriaEcCommand(object):
"""
Argparse interface for aria_ec
"""
......@@ -84,7 +91,8 @@ class AriaEcCommand:
formatter_class=argp.ArgumentDefaultsHelpFormatter)
parser.add_argument("-o", "--output", dest="output_directory",
type=str, help="Output directory", required=True)
parser.add_argument("-c", "--conf", action=ReadableFile, dest="conf_file",
parser.add_argument("-c", "--conf", action=ReadableFile,
dest="conf_file",
default=None, help="configuration file")
parser.add_argument("--nolog", action="store_true",
default=False, help="Don't generate log files")
......@@ -100,8 +108,9 @@ class AriaEcCommand:
"""
for index, command in enumerate(self.command_list):
# Create subparser defined in command list
subcommand = getattr(self, "_" + command + "_parser")(desc=self.desc_list[
index])
subcommand = getattr(self, "_" + command + "_parser")(
desc=self.desc_list[
index])
parser.add_parser(command, parents=[subcommand])
def _setup_parser(self, desc=None):
......@@ -202,6 +211,10 @@ class AriaEcCommand:
return parser
def create_settings(self):
"""
create settings relative to args.command
:return:
"""
LOG.debug("Create AriaEcSettings")
settings = AriaEcSettings(self.args.command)
LOG.info("Loading default config file")
......@@ -218,20 +231,30 @@ class AriaEcCommand:
return settings
def run(self):
# call method relative to args.command
"""
call method relative to args.command
"""
LOG.info("Run %s command", self.args.command)
getattr(self, self.args.command)()
def setup(self):
"""
Setup call
"""
setup_inst = AriaEcSetup(self.create_settings())
setup_inst.run()
def bbconv(self):
"""
bbcontacts converter call
"""
bbconverter = AriaEcBbConverter(self.create_settings())
bbconverter.run()
def contactmap(self):
# instantiate AriaEcContactmap with AriaSettings
"""
instantiate AriaEcContactmap with AriaSettings
"""
econtactmap = AriaEcContactMap(self.create_settings())
econtactmap.run()
......
No preview for this file type
No preview for this file type
# coding=utf-8
"""
Settings section
"""
from __future__ import absolute_import, division, print_function
import logging
import os
from configparser import SafeConfigParser
import collections
import logging
import pickle
import collections
import pkg_resources as pkgr
# noinspection PyCompatibility
from ConfigParser import ConfigParser
from .base import format_dict
LOG = logging.getLogger(__name__)
class Setting:
class Setting(object):
"""
Main setting object with args and config section
"""
def __init__(self):
self.config = collections.defaultdict()
......@@ -27,6 +32,9 @@ class Setting:
class Settings(object):
"""
Group settings with each section corresponding to a Setting object
"""
def __init__(self, sections):
self._sections = set(sections)
......@@ -46,7 +54,8 @@ class Settings(object):
elif not pkg:
LOG.error("Configuration file not found (%s)", configpath)
return None
config = SafeConfigParser(allow_no_value=True)
# config = SafeConfigParser(allow_no_value=True)
config = ConfigParser(allow_no_value=True)
if pkg:
with pkgr.resource_stream(__name__, configpath) as conf:
config.readfp(conf)
......@@ -64,9 +73,12 @@ class Settings(object):
LOG.warning("Unknow config section %s", section)
def write_config(self, filename):
# Ecrit les config de toutes les sections dans un autre fichier
"""
Write config of all sections into another file
:param filename:
"""
LOG.info("Writing .ini file (%s)", filename)
config = SafeConfigParser(allow_no_value=True)
config = ConfigParser(allow_no_value=True)
iniout = open(filename, mode="w")
for section in self._sections:
config.add_section(section)
......@@ -81,20 +93,26 @@ class Settings(object):
class AriaEcSettings(Settings):
# ss_dist = os.path.join(os.path.dirname(os.path.realpath(__file__)),
# 'conf/ss_dist.txt')
# TODO: move these constant variable in objects which can read these file !!
# TODO: Baseclass inspired from this class and ariabase class. All
# objects in this package should extend the base object
"""
Settings object for ariaec
"""
ARIAPROJ_TEMPLATE = 'templates/aria_project_v2.3.0.xml'
SS_DIST = 'data/ss_dist.txt'
SCSC_MIN = 'data/scsc_min.p'
TOPO = 'data/topallhdg5.3.pro'
COMMANDS = ("main", "setup", "contactmap", "bbconv", "contactdef")
def __init__(self, name):
super(AriaEcSettings, self).__init__(("main", "setup", "contactmap",
"bbconv", "contactdef"))
"""
Initiate settings with name related to a command
:param name:
"""
super(AriaEcSettings, self).__init__(self.COMMANDS)
self._infra = {}
self._scsc_min = None
self._ssdist = None
......@@ -109,6 +127,10 @@ class AriaEcSettings(Settings):
@property
def infra(self):
"""
Infrastructure for a specific command
:return:
"""
if self.name == "setup" and not self._infra:
self._infra = {"xml": "", "tbl": "", "others": ""}
self._up_infra()
......@@ -130,6 +152,11 @@ class AriaEcSettings(Settings):
@property
def ssdist(self):
"""
Get distance file for secondary structures in the package or in config
file
:return:
"""
if not self._ssdist:
if self.main.config["ss_dist_file"] and \
os.path.exists(self.main.config["ss_dist_file"]):
......@@ -140,20 +167,28 @@ class AriaEcSettings(Settings):
@property
def template(self):
"""
Get template files in config file or in the package
:return:
"""
if not self._template:
templatepath = "templates/aria_project_v%s.xml" % str(self.main.config["ariaproject_template"])
templatepath = "templates/aria_project_v%s.xml" % \
str(self.main.config["ariaproject_template"])
if os.path.exists(pkgr.resource_filename(__name__, templatepath)):
self._template = pkgr.resource_filename(__name__, templatepath)
else:
LOG.error("Template version for aria project (%s) is not "
"supported", self.main.config.get("ariaproject_template"))
"supported", self.main.config.get("ariaproject_template"))
self._template = pkgr.resource_filename(__name__,
self.ARIAPROJ_TEMPLATE)
return self._template
@property
def scsc_min(self):
# If scsc_min already computed
"""
Get contact index for side chains in package or config file
:return:
"""
if not self._scsc_min:
try:
# Read scsc_min_file given in aria_ec.ini
......@@ -174,6 +209,9 @@ class AriaEcSettings(Settings):
return self._scsc_min
def make_infra(self):
"""
Generate infrastructure
"""
LOG.info("Making output directories")
for direct in self.infra:
LOG.debug("Create %s directory", self.infra[direct])
......@@ -181,4 +219,9 @@ class AriaEcSettings(Settings):
os.makedirs(os.path.abspath(self.infra[direct]))
def load_config(self, configpath, **kwargs):
"""
:param configpath:
:param kwargs:
"""
super(AriaEcSettings, self).load_config(configpath, **kwargs)
No preview for this file type
# coding=utf-8
"""
Input/Output aria_ec scripts
"""
......@@ -15,6 +16,10 @@ LOG = logging.getLogger(__name__)
class AriaEcContactMap(object):
"""
Contact maplot class
"""
def __init__(self, settings):
# TODO: check_type settings (AriaEcSettings)
self.settings = settings
......@@ -30,6 +35,9 @@ class AriaEcContactMap(object):
def run(self):
# Check input
"""
Main method
"""
LOG.debug("Settings:\n" + json.dumps(self.settings.contactmap.config,
indent=4))
LOG.debug("Args:\n" + json.dumps(self.settings.contactmap.args,
......@@ -128,7 +136,7 @@ class AriaEcContactMap(object):
prefix = "_".join((mapname, self.refname)).replace(".", "_")
if mapname == self.refname:
if not self.settings.contactmap.args.get("onlyreport", False):
if self.settings.contactmap.args.get("onlyreport", False) is not False:
refmap.write_contacts(mapname,
outdir=outdir,
scoremap=self.refmap.get("scoremap",
......
No preview for this file type
This diff is collapsed.
No preview for this file type
# coding=utf-8
"""
Reader objects
"""
......@@ -17,6 +18,10 @@ Atom = collections.namedtuple("Atom", ["name", "coords"])
class RegexFile(object):
"""
File which can be parsed with a regex
"""
def __init__(self, filepath, filetype='', regex='', sort=''):
self.regex = regex
self.sort = sort
......@@ -52,6 +57,9 @@ class MapFile(RegexFile):
# sort_field allow sorting lines with values into this field
# TODO: wrong regex for native_full ?
# TODO: smarter dict ...
"""
Map file class
"""
types = {
"plmdca": {
"regex": re.compile(r"^(?P<res1_nb>\d+)\s+(?P<res1_name>\w)\s+"
......@@ -180,6 +188,10 @@ class MapFile(RegexFile):
"regex": re.compile(
r'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'),
"score_field": None
},
"empty": {
"regex": re.compile(r'^\s*$'),
"score_field": None
}
}
check_type = True
......@@ -231,7 +243,7 @@ class MapFile(RegexFile):
if self.lines[line]['res1_name'] != aa_seq[int(self.lines[line]['res1_nb']) - 1] \
or self.lines[line]['res2_name'] != aa_seq[int(self.lines[line]['res2_nb']) - 1]:
LOG.error("Difference between given sequence and residu "
"names in contact file at line %d !", line)
"names in contact file at line %d !", line)
def update_map(self, resmap):
"""
......@@ -247,22 +259,28 @@ class MapFile(RegexFile):
:return:
"""
LOG.info("Checking format for file %s", self.filepath)
LOG.info("Checking if file %s correspond to %s format", self.filepath,
self.filetype)
# Check if given type is supported
# TODO: report this check into commands section
defaults = ("default_1", "default_2", "default_3", "default_4", "empty")
if os.stat(self.filepath).st_size == 0:
LOG.warning("File %s is empty !", self.filepath)
return [
self.types["empty"].get("regex"),
self.filetype,
self.types["empty"].get("score_field")
]
with open(self.filepath) as infile:
# Check first and second line of file
for index, line in enumerate(infile):
if self.filetype in self.types:
LOG.info("Given format (%s) should be supported",
self.filetype)
match = self.types[self.filetype].get("regex").match(line)
else:
match = None
LOG.error("Format %s not supported !", self.filetype)
# TODO: DRY rule !!
def1match = self.types["default_1"]["regex"].match(line)
def2match = self.types["default_2"]["regex"].match(line)
def3match = self.types["default_3"]["regex"].match(line)
def4match = self.types["default_4"]["regex"].match(line)
if match:
LOG.debug("Format type correct")
return [
......@@ -270,37 +288,20 @@ class MapFile(RegexFile):
self.filetype,
self.types[self.filetype].get("score_field")
]
elif def1match:
LOG.debug("Format type correct")
return [
self.types["default_1"].get("regex"),
self.filetype,
self.types["default_1"].get("score_field")
]
elif def2match:
LOG.debug("Format type correct")
return [
self.types["default_2"].get("regex"),
self.filetype,
self.types["default_2"].get("score_field")
]
elif def3match:
LOG.debug("Format type correct")
return [
self.types["default_3"].get("regex"),
self.filetype,
self.types["default_3"].get("score_field")
]
elif def4match:
LOG.debug("Format type correct")
return [
self.types["default_4"].get("regex"),
self.filetype,
self.types["default_4"].get("score_field")
]
else:
LOG.warning("Given type do not correspond, checking default"
" format for contactlist or empty file...")
for subformat in defaults:
if self.types.get(subformat)["regex"].match(line):
LOG.debug("Format type correct %s", subformat)
return [
self.types[subformat].get("regex"),
self.filetype,
self.types[subformat].get("score_field")
]
if index > 2:
# Stop checking after second line
LOG.error("Error reading %s file.", self.filetype)
# Remove contact file
break
LOG.error("Wrong format type given ...")
return [None] * 3
......@@ -368,12 +369,16 @@ class MapFile(RegexFile):
self.lines[contact].get("res2_nb")]
if len(self.contactlist) != len(self.clashlist):
LOG.error("When reading input file, clash list is not "
"the same length than contactlist")
"the same length than contactlist")
LOG.debug(self.clashlist)
class ContactMapFile(MapFile):
# "plmdca", "evfold", "bbcontacts", "pconsc", "gremlin", "metapsicov",
"""
Contact map file
"""
def __init__(self, filepath, filetype):
"""
......@@ -384,6 +389,11 @@ class ContactMapFile(MapFile):
super(self.__class__, self).__init__(filepath, filetype)
def update_map(self, resmap):
"""
:param resmap:
:return:
"""
# TODO: swap dataframe factory here
raise NotImplementedError
......@@ -432,7 +442,7 @@ class ContactMapFile(MapFile):
if (int(residx1.split("-")[0]) != resid1) or \
(resid2 != int(residx2.split("-")[0])):
LOG.error("Wrong resid humanidx (%d, %d) in contact (%d) is "
"not the same in resmap (%d, %d)",
"not the same in resmap (%d, %d)",
resid1, resid2, contact_id,
int(residx1.split("-")[0]),
int(residx2.split("-")[0]))
......@@ -454,6 +464,9 @@ class ContactMapFile(MapFile):
class PDBFile(MapFile):
"""
PDB file
"""
pdbreg = re.compile(r'^(?P<record>ATOM |HETATM)(?P<serial>[\s\w]{5})'
r'\s(?P<name>[\s\w]{4})'
r'(?P<altLoc>[\s\w])'
......@@ -475,9 +488,20 @@ class PDBFile(MapFile):
def create_map(self, protein, contactdef, groupby_method="min", scsc=None,
flaglist=None, sym=True, path=""):
"""
:param protein:
:param contactdef:
:param groupby_method:
:param scsc:
:param flaglist:
:param sym:
:param path:
"""
resmap = ResAtmMap(protein.aa_sequence.sequence, mtype='distance',
flaglist=flaglist, path=path,
seqidx=protein.index, desc=self.filetype)
# noinspection PyTypeChecker
resmap[:] = self.update_map(resmap, sym=sym)
LOG.debug("pdb distance map:\n%s", resmap)
self.mapdict["alldistmap"] = resmap
......@@ -487,6 +511,12 @@ class PDBFile(MapFile):
self.mapdict["contactmap"] = self.mapdict["allcontactmap"].reduce()
def update_map(self, resmap, sym=True):
"""
:param resmap:
:param sym:
:return:
"""
# Map only on heavy atoms
# TODO: check if same sequence in pdb file
LOG.info("Updating distance map with pdb file")
......@@ -537,35 +567,63 @@ class PDBFile(MapFile):
if sym:
newmap.loc[:][idx] = None
LOG.error("Can't update pdb distance map for pos in pdb file "
"%s with %s", list(error_list), missidx)
"%s with %s", list(error_list), missidx)
return newmap
class DistanceMapFile(MapFile):
"""
Distance matrix file
"""
def __init__(self, filepath, filetype):
super(MapFile).__init__(filepath, filetype)
raise NotImplementedError
def create_map(self, aa_seq, contactdef, **kwargs):
"""
:param aa_seq:
:param contactdef:
:param kwargs:
:return:
"""
pass
# Native dist
def update_map(self, resmap):
"""
:param resmap:
:return:
"""
pass
# Construit map avec la liste de residus + infos de distance du fichier
# return DistanceMap
class ProtFileListReader:
class ProtFileListReader(object):
"""
List of file object
"""
def __init__(self, cont_def=5.0):
self.filelist = []
self.contactdef = cont_def
def clear(self):
"""
Initiatise from scratch object
:return:
"""
# TODO: Init supprime bien les fichiers du cache ?
self.__init__(self.contactdef)
def add_file(self, filepathlist, filetypelist=None):
"""
:param filepathlist:
:param filetypelist:
:return:
"""
filepathlist = [filepathlist] if type(
filepathlist) != list else filepathlist
filetypelist = [filetypelist] if type(
......@@ -594,6 +652,15 @@ class ProtFileListReader:
def read(self, filepathlist, filetypelist=None, protein=None, scsc=None,
**kwargs):
"""
:param filepathlist:
:param filetypelist:
:param protein:
:param scsc:
:param kwargs:
:return:
"""
self.clear()
self.add_file(filepathlist, filetypelist=filetypelist)
for fo in self.filelist:
......
No preview for this file type
# coding=utf-8
"""
Input/Output aria_ec scripts
"""
from __future__ import absolute_import, division, print_function
import logging
import json
import os
import json
import logging
from .protein import Protein
from .reader import ProtFileListReader
......@@ -14,15 +15,17 @@ from .protmap import MapFilter
from .econverter import AriaEcXMLConverter
# TODO: S'inspirer de pandas/__init__.py pour les dependances
# from basictools import *
LOG = logging.getLogger(__name__)
class AriaEcSetup:
class AriaEcSetup(object):
"""
Aria Ec Setup protocol
"""
def __init__(self, settings):
"""
:param settings:
:return:
"""
......@@ -40,7 +43,7 @@ class AriaEcSetup:
def run(self):
"""
main method
:return:
"""
# Check input
......@@ -70,21 +73,24 @@ class AriaEcSetup:
# TODO: change read method in reader to __call__
# -------------------------- contact maps ---------------------------- #
self.reader.read(self.settings.setup.args.get("infiles"),
filetypelist=self.settings.setup.args.get("contact_types"),
filetypelist=self.settings.setup.args.get(
"contact_types"),
protein=self.protein,
groupby_method=self.settings.setup.config['groupby_method'],
groupby_method=self.settings.setup.config[
'groupby_method'],
scsc=self.settings.scsc_min)
for fo in self.reader.filelist:
for mapfile in self.reader.filelist:
# fo need a contactmap in order to wite XML dist restraints
# TODO: filter pour toutes les map de mapdict !! (fonction remove
# s'applique sur l'humanidx contenant les residus)
self.filter(fo.mapdict, fo.filetype, fo.contactlist, self.protein