diff --git a/ariaec/commands.py b/ariaec/commands.py index ad37ce5fdb528f7a3d5f265e0e79a177e3e674aa..13786b437c9b5f0a776eb42e8b2993e820cabf3b 100644 --- a/ariaec/commands.py +++ b/ariaec/commands.py @@ -1,3 +1,4 @@ +# coding=utf-8 """ Input/Output aria_ec """ @@ -17,7 +18,10 @@ LOG = logging.getLogger(__name__) def check_file(prospective_file): + """ + :param prospective_file: + """ if not os.path.exists(prospective_file): raise argp.ArgumentTypeError("readable_file:'{0}' is not a valid " "path".format(prospective_file)) @@ -27,6 +31,9 @@ def check_file(prospective_file): class ReadableFile(argp.Action): + """ + Class used with argparse action to check if a file is readable + """ def __init__(self, *args, **kwargs): super(ReadableFile, self).__init__(*args, **kwargs) @@ -42,7 +49,7 @@ class ReadableFile(argp.Action): # TODO: Make parent Command class with _create_argparser, self.args, # update_logger and run -class AriaEcCommand: +class AriaEcCommand(object): """ Argparse interface for aria_ec """ @@ -84,7 +91,8 @@ class AriaEcCommand: formatter_class=argp.ArgumentDefaultsHelpFormatter) parser.add_argument("-o", "--output", dest="output_directory", type=str, help="Output directory", required=True) - parser.add_argument("-c", "--conf", action=ReadableFile, dest="conf_file", + parser.add_argument("-c", "--conf", action=ReadableFile, + dest="conf_file", default=None, help="configuration file") parser.add_argument("--nolog", action="store_true", default=False, help="Don't generate log files") @@ -100,8 +108,9 @@ class AriaEcCommand: """ for index, command in enumerate(self.command_list): # Create subparser defined in command list - subcommand = getattr(self, "_" + command + "_parser")(desc=self.desc_list[ - index]) + subcommand = getattr(self, "_" + command + "_parser")( + desc=self.desc_list[ + index]) parser.add_parser(command, parents=[subcommand]) def _setup_parser(self, desc=None): @@ -202,6 +211,10 @@ class AriaEcCommand: return parser def create_settings(self): + """ + create settings relative to args.command + :return: + """ LOG.debug("Create AriaEcSettings") settings = AriaEcSettings(self.args.command) LOG.info("Loading default config file") @@ -218,20 +231,30 @@ class AriaEcCommand: return settings def run(self): - # call method relative to args.command + """ + call method relative to args.command + """ LOG.info("Run %s command", self.args.command) getattr(self, self.args.command)() def setup(self): + """ + Setup call + """ setup_inst = AriaEcSetup(self.create_settings()) setup_inst.run() def bbconv(self): + """ + bbcontacts converter call + """ bbconverter = AriaEcBbConverter(self.create_settings()) bbconverter.run() def contactmap(self): - # instantiate AriaEcContactmap with AriaSettings + """ + instantiate AriaEcContactmap with AriaSettings + """ econtactmap = AriaEcContactMap(self.create_settings()) econtactmap.run() diff --git a/ariaec/commands.pyc b/ariaec/commands.pyc index ef3fef48cb02bb406983fb6ace50addcd87a0635..72ed275f274dc4fa8fe10a8ea2813228ec3c804c 100644 Binary files a/ariaec/commands.pyc and b/ariaec/commands.pyc differ diff --git a/ariaec/econverter.pyc b/ariaec/econverter.pyc index c52f60e4c6d1b263e46fdadcf909cb845f93855c..67ea09583d2246c33cd715da1ec61b869bc83e65 100644 Binary files a/ariaec/econverter.pyc and b/ariaec/econverter.pyc differ diff --git a/ariaec/ecsettings.py b/ariaec/ecsettings.py index 3ecb943f78b0939aa3dbb52e8d25e21328d43af3..4b0756fbefc89d4ca7087ea2fa81f128c28c3191 100644 --- a/ariaec/ecsettings.py +++ b/ariaec/ecsettings.py @@ -1,21 +1,26 @@ +# coding=utf-8 """ Settings section """ from __future__ import absolute_import, division, print_function -import logging import os -from configparser import SafeConfigParser -import collections +import logging import pickle +import collections import pkg_resources as pkgr +# noinspection PyCompatibility +from ConfigParser import ConfigParser from .base import format_dict LOG = logging.getLogger(__name__) -class Setting: +class Setting(object): + """ + Main setting object with args and config section + """ def __init__(self): self.config = collections.defaultdict() @@ -27,6 +32,9 @@ class Setting: class Settings(object): + """ + Group settings with each section corresponding to a Setting object + """ def __init__(self, sections): self._sections = set(sections) @@ -46,7 +54,8 @@ class Settings(object): elif not pkg: LOG.error("Configuration file not found (%s)", configpath) return None - config = SafeConfigParser(allow_no_value=True) + # config = SafeConfigParser(allow_no_value=True) + config = ConfigParser(allow_no_value=True) if pkg: with pkgr.resource_stream(__name__, configpath) as conf: config.readfp(conf) @@ -64,9 +73,12 @@ class Settings(object): LOG.warning("Unknow config section %s", section) def write_config(self, filename): - # Ecrit les config de toutes les sections dans un autre fichier + """ + Write config of all sections into another file + :param filename: + """ LOG.info("Writing .ini file (%s)", filename) - config = SafeConfigParser(allow_no_value=True) + config = ConfigParser(allow_no_value=True) iniout = open(filename, mode="w") for section in self._sections: config.add_section(section) @@ -81,20 +93,26 @@ class Settings(object): class AriaEcSettings(Settings): - # ss_dist = os.path.join(os.path.dirname(os.path.realpath(__file__)), # 'conf/ss_dist.txt') # TODO: move these constant variable in objects which can read these file !! # TODO: Baseclass inspired from this class and ariabase class. All # objects in this package should extend the base object + """ + Settings object for ariaec + """ ARIAPROJ_TEMPLATE = 'templates/aria_project_v2.3.0.xml' SS_DIST = 'data/ss_dist.txt' SCSC_MIN = 'data/scsc_min.p' TOPO = 'data/topallhdg5.3.pro' + COMMANDS = ("main", "setup", "contactmap", "bbconv", "contactdef") def __init__(self, name): - super(AriaEcSettings, self).__init__(("main", "setup", "contactmap", - "bbconv", "contactdef")) + """ + Initiate settings with name related to a command + :param name: + """ + super(AriaEcSettings, self).__init__(self.COMMANDS) self._infra = {} self._scsc_min = None self._ssdist = None @@ -109,6 +127,10 @@ class AriaEcSettings(Settings): @property def infra(self): + """ + Infrastructure for a specific command + :return: + """ if self.name == "setup" and not self._infra: self._infra = {"xml": "", "tbl": "", "others": ""} self._up_infra() @@ -130,6 +152,11 @@ class AriaEcSettings(Settings): @property def ssdist(self): + """ + Get distance file for secondary structures in the package or in config + file + :return: + """ if not self._ssdist: if self.main.config["ss_dist_file"] and \ os.path.exists(self.main.config["ss_dist_file"]): @@ -140,20 +167,28 @@ class AriaEcSettings(Settings): @property def template(self): + """ + Get template files in config file or in the package + :return: + """ if not self._template: - templatepath = "templates/aria_project_v%s.xml" % str(self.main.config["ariaproject_template"]) + templatepath = "templates/aria_project_v%s.xml" % \ + str(self.main.config["ariaproject_template"]) if os.path.exists(pkgr.resource_filename(__name__, templatepath)): self._template = pkgr.resource_filename(__name__, templatepath) else: LOG.error("Template version for aria project (%s) is not " - "supported", self.main.config.get("ariaproject_template")) + "supported", self.main.config.get("ariaproject_template")) self._template = pkgr.resource_filename(__name__, self.ARIAPROJ_TEMPLATE) return self._template @property def scsc_min(self): - # If scsc_min already computed + """ + Get contact index for side chains in package or config file + :return: + """ if not self._scsc_min: try: # Read scsc_min_file given in aria_ec.ini @@ -174,6 +209,9 @@ class AriaEcSettings(Settings): return self._scsc_min def make_infra(self): + """ + Generate infrastructure + """ LOG.info("Making output directories") for direct in self.infra: LOG.debug("Create %s directory", self.infra[direct]) @@ -181,4 +219,9 @@ class AriaEcSettings(Settings): os.makedirs(os.path.abspath(self.infra[direct])) def load_config(self, configpath, **kwargs): + """ + + :param configpath: + :param kwargs: + """ super(AriaEcSettings, self).load_config(configpath, **kwargs) diff --git a/ariaec/ecsettings.pyc b/ariaec/ecsettings.pyc index 89155bbf715eea0cb2320e460060a723f1e91474..9b233e5f5eb146a89a6df7661662aa9e6a104fbc 100644 Binary files a/ariaec/ecsettings.pyc and b/ariaec/ecsettings.pyc differ diff --git a/ariaec/maplot.py b/ariaec/maplot.py index 127f8d6aa7dfa727c2f8dab6365c6945f59f83fd..38bad0a0f2625a0c5d928c233efcf18a51a14545 100644 --- a/ariaec/maplot.py +++ b/ariaec/maplot.py @@ -1,3 +1,4 @@ +# coding=utf-8 """ Input/Output aria_ec scripts """ @@ -15,6 +16,10 @@ LOG = logging.getLogger(__name__) class AriaEcContactMap(object): + """ + Contact maplot class + """ + def __init__(self, settings): # TODO: check_type settings (AriaEcSettings) self.settings = settings @@ -30,6 +35,9 @@ class AriaEcContactMap(object): def run(self): # Check input + """ + Main method + """ LOG.debug("Settings:\n" + json.dumps(self.settings.contactmap.config, indent=4)) LOG.debug("Args:\n" + json.dumps(self.settings.contactmap.args, @@ -128,7 +136,7 @@ class AriaEcContactMap(object): prefix = "_".join((mapname, self.refname)).replace(".", "_") if mapname == self.refname: - if not self.settings.contactmap.args.get("onlyreport", False): + if self.settings.contactmap.args.get("onlyreport", False) is not False: refmap.write_contacts(mapname, outdir=outdir, scoremap=self.refmap.get("scoremap", diff --git a/ariaec/maplot.pyc b/ariaec/maplot.pyc index a32ab2f8b4e7ea0bfd7f31d87eeb6a269ebae349..06f6847c6a01277b4a3418504cf61e7b3ce0ee37 100644 Binary files a/ariaec/maplot.pyc and b/ariaec/maplot.pyc differ diff --git a/ariaec/protmap.py b/ariaec/protmap.py index d99940d83629e98e23fb69ead7b0189950fb1c0d..fa8da27a092e6e3646432e2daf63f92360b5447d 100644 --- a/ariaec/protmap.py +++ b/ariaec/protmap.py @@ -1,3 +1,4 @@ +# coding=utf-8 """ ARIA Evolutionary Constraints Tools """ @@ -42,6 +43,11 @@ class Map(pd.DataFrame): mtype_choices = {'contact': bool, 'distance': float, "score": float} def update(self, *args, **kwargs): + """ + + :param args: + :param kwargs: + """ super(Map, self).update(*args, **kwargs) # def _constructor_expanddim(self): @@ -76,6 +82,11 @@ class Map(pd.DataFrame): def sortedset(self, human_idx=False): # Remove duplicate in sort_list + """ + + :param human_idx: + :return: + """ n = 1 if human_idx else 0 if hasattr(self, "sort_list"): if self.sym: @@ -92,6 +103,11 @@ class Map(pd.DataFrame): return None def check_type(self, mtype): + """ + + :param mtype: + :return: + """ value = self.mtype_choices.get(mtype) if value: return value @@ -101,6 +117,10 @@ class Map(pd.DataFrame): return None def reduce(self): + """ + + :return: + """ columns = ['-'.join(idx) for idx in self.columns] index = ['-'.join(idx) for idx in self.index] return getattr(self, '__init__')(self.sequence, @@ -112,6 +132,10 @@ class Map(pd.DataFrame): def remove(self, rm_list): # Reset values at positions in rm_list + """ + + :param rm_list: + """ value = False if self.dtype == bool else 0.0 for contact in rm_list: idx1, idx2 = self.index[contact[0]], self.index[contact[1]] @@ -127,6 +151,10 @@ class Map(pd.DataFrame): def to_series(self): # Return panda series related to lower triangle + """ + + :return: + """ df = self.copy() df = df.astype(float) # remove values from upper triangle @@ -135,9 +163,15 @@ class Map(pd.DataFrame): return df.unstack().dropna() def topmap(self, scoremap, nb_topcontact): + """ + + :param scoremap: + :param nb_topcontact: + :return: + """ if self.dtype != bool: LOG.info("Error when retrieving top contact map. The type of " - "the given map is not a contact type!") + "the given map is not a contact type!") return self self[:] = False if self.shape == scoremap.shape: @@ -165,14 +199,24 @@ class Map(pd.DataFrame): self.loc[idx, idx] = pairdict[idxval] def set_value(self, index, col, value, **kwargs): + """ + + :param index: + :param col: + :param value: + :param kwargs: + """ super(Map, self).set_value(index, col, value, **kwargs) if self.sym: super(Map, self).set_value(col, index, value, **kwargs) class ProteinMap(Map): - # TODO: Matrices PosAaAtmMap, AaAtmMap, AtmMap + """ + Protein contact map object + """ + def __init__(self, sequence, flaglist=None, seqidx=None, index=None, columns=None, **kwargs): idx, col = self.create_index(sequence, seqidx=seqidx, idxnames=index, @@ -188,9 +232,15 @@ class ProteinMap(Map): @property def sequence(self): + """ + aa sequence + """ raise NotImplementedError def plotflush(self): + """ + Flush contact map plot + """ plt.clf() plt.cla() plt.close() @@ -198,6 +248,10 @@ class ProteinMap(Map): @property def maplot(self): + """ + Contact map plot + :return: + """ # Contact map Plot if not self._maplot: # Flush matplot @@ -215,12 +269,19 @@ class ProteinMap(Map): def saveplot(self, outdir='', outprefix="protein", size_fig=10, plot_ext="pdf", plot_dpi=200): + """ + Save plot + :param outdir: + :param outprefix: + :param size_fig: + :param plot_ext: + :param plot_dpi: + """ plotpath = os.path.join(outdir, "%s.contactmap.%s" % ( outprefix, plot_ext)) LOG.info("Generate contact map plot (%s)", plotpath) f, ax = plt.subplots(figsize=(12, 9)) - tickrot(self.maplot.axes, self.maplot.figure, - rotype='horizontal') + tickrot(self.maplot.axes, self.maplot.figure) self.maplot.figure.set_size_inches(size_fig, size_fig) map_title = "%s contacts map" % outprefix self.maplot.set_title(map_title) @@ -230,7 +291,11 @@ class ProteinMap(Map): self.maplot.figure.savefig(plotpath, dpi=plot_dpi) def contactset(self, human_idx=False): - # Remove duplicate in contact_list + """ + Remove duplicate in contact_list + :param human_idx: + :return: + """ if self.contact_list(): return sorted(set((tuple(sorted((x, y))) for x, y in self.contact_list(human_idx)))) @@ -238,7 +303,11 @@ class ProteinMap(Map): return None def contact_list(self, human_idx=False): - # Return contact list + """ + Return contact list + :param human_idx: + :return: + """ contact_list = [] n = 1 if human_idx else 0 if self.dtype is bool: @@ -251,9 +320,18 @@ class ProteinMap(Map): return None def create_heatmap(self): + """ + Generate heatmap + :return: + """ raise NotImplementedError def contact_map(self, contactdef, scsc_min=None): + """ + Generate contact map + :param contactdef: + :param scsc_min: + """ raise NotImplementedError def create_index(self, sequence, seqidx=None, idxnames=None, colnames=None): @@ -268,6 +346,14 @@ class ProteinMap(Map): raise NotImplementedError def compareplot(self, protmap, save_fig=True, alpha=None, **kwargs): + """ + Compare 2 contact map and plot differences + :param protmap: + :param save_fig: + :param alpha: + :param kwargs: + :return: + """ self.plotflush() # Contact map plot if getattr(protmap, "shape") and self.shape != protmap.shape: @@ -315,6 +401,16 @@ class ProteinMap(Map): def report(self, cmpmap, scoremap=None, outprefix="", outdir="", plotdir="", plot_ext="pdf", plotag=True): + """ + Generate report file + :param cmpmap: + :param scoremap: + :param outprefix: + :param outdir: + :param plotdir: + :param plot_ext: + :param plotag: + """ filename = ".".join((outprefix, "mapreport")) if outprefix else \ "mapreport" reportpath = "%s/%s" % (outdir, filename) @@ -333,6 +429,15 @@ class ProteinMap(Map): allrec = None prthresholds = None aver_prec = None + acc = None + prec = None + recall = None + mcc = None + f1s = None + f2s = None + f05s = None + hamm = None + hin = None if scoremap is not None: # If prediction scores given, compute roc and precall curves @@ -347,6 +452,20 @@ class ProteinMap(Map): y_true, y_scores) aver_prec = skm.average_precision_score(y_true, y_scores) + if 1 in y_pred and 1 in y_true: + # If one map is empty, we do not compute analysis + acc = skm.accuracy_score(y_true, y_pred) + prec = skm.precision_score(y_true, y_pred) + recall = skm.recall_score(y_true, y_pred) + mcc = skm.matthews_corrcoef(y_true, y_pred) + f1s = skm.f1_score(y_true, y_pred) + f2s = skm.fbeta_score(y_true, y_pred, 2) + f05s = skm.fbeta_score(y_true, y_pred, 0.5) + hamm = skm.hamming_loss(y_true, y_pred) + hin = skm.hinge_loss(y_true, y_pred) + else: + LOG.error("Can't analyze empty map(s).") + msg = """\ ## Report {map1name} vs. {map2name} ## @@ -402,22 +521,16 @@ class ProteinMap(Map): ## False Positive Rate (1 - Specificity) values: ## {allfpr} ## Score tresholds ({map2name}): -## {rocthres}""".format(map1name=map1name, map2name=map2name, - map1path=self.path, map2path=cmpmap.path, - seq=self.sequence, protlen=len(self.sequence), - date=datetime.date.today().strftime("%A %d. %B %Y"), - outdir=outdir, accuracy=skm.accuracy_score(y_true, y_pred), - precision=skm.precision_score(y_true, y_pred), - recall=skm.recall_score(y_true, y_pred), - mcc=skm.matthews_corrcoef(y_true, y_pred), - f1s=skm.f1_score(y_true, y_pred), - f2s=skm.fbeta_score(y_true, y_pred, 2), - f05s=skm.fbeta_score(y_true, y_pred, 0.5), - hamm=skm.hamming_loss(y_true, y_pred), - hin=skm.hinge_loss(y_true, y_pred), aver_prec=aver_prec, - roc_auc=roc_auc, allprec=allprec, allrec=allrec, - prthres=prthresholds, alltpr=alltpr, allfpr=allfpr, - rocthres=rocthresholds) +## {rocthres}""".format( + map1name=map1name, map2name=map2name, map1path=self.path, + map2path=cmpmap.path, seq=self.sequence, + protlen=len(self.sequence), + date=datetime.date.today().strftime("%A %d. %B %Y"), + outdir=outdir, accuracy=acc, precision=prec, recall=recall, + mcc=mcc, f1s=f1s, f2s=f2s, f05s=f05s, hamm=hamm, hin=hin, + aver_prec=aver_prec, roc_auc=roc_auc, allprec=allprec, + allrec=allrec, prthres=prthresholds, alltpr=alltpr, + allfpr=allfpr, rocthres=rocthresholds) LOG.debug("\n" + msg) reportf.write(msg) @@ -468,6 +581,16 @@ class ProteinMap(Map): def compare_contactmap(self, cmpmap, contactlist, outprefix, outdir="", distmap=None, human_idx=True): + """ + Compare 2 contact map and plot differences + :param cmpmap: + :param contactlist: + :param outprefix: + :param outdir: + :param distmap: + :param human_idx: + :return: + """ # CSV file giving TP/FP contacts outpath = "%s/%s.contactcmp.csv" % (outdir, outprefix) LOG.info("Generate stat file (%s)", outpath) @@ -499,6 +622,14 @@ class ProteinMap(Map): def write_contacts(self, filename, outdir="", human_idx=True, scoremap=None): + """ + + :param filename: + :param outdir: + :param human_idx: + :param scoremap: + :return: + """ filepath = "%s/%s.contact.txt" % (outdir, filename) LOG.info("Generate contact file (%s)", filepath) with open(filepath, 'w') as outfile: @@ -551,12 +682,24 @@ class ResAtmMap(ProteinMap): @property def sequence(self): - # Amino Acid sequence string in humanidx + """ + Amino Acid sequence string in humanidx + :return: + """ return "".join(AminoAcid.AminoAcid(_.split("-")[1])[0] for _ in self.index.levels[0]) def create_index(self, sequence, seq_pos=True, seqidx=None, idxnames=None, colnames=None): + """ + + :param sequence: + :param seq_pos: + :param seqidx: + :param idxnames: + :param colnames: + :return: + """ LOG.info("Indexing res - res dataframe") # Atom table for residues (keys are in 3L code) seqidx = seqidx if seqidx and len(seqidx) == len(sequence) else None @@ -600,6 +743,10 @@ class ResAtmMap(ProteinMap): return index, columns def create_heatmap(self): + """ + + :return: + """ unidf = self.reduce() if unidf.isnull().values.sum() == 0: # If there's no nan values @@ -607,6 +754,11 @@ class ResAtmMap(ProteinMap): return None def reduce(self, groupby="min"): + """ + + :param groupby: + :return: + """ if self.index.nlevels == 1 or not groupby: return self newmap = ResMap(self.sequence, dtype=self.dtype, desc=self.desc, @@ -623,6 +775,7 @@ class ResAtmMap(ProteinMap): """ Contact map generator from all atoms distance map. There's a contact with 2 residues iff dist between 2 atoms are below the given treshold + :param def_cut: :param scsc_min: :param contactdef: for all atom pair @@ -660,7 +813,7 @@ class ResAtmMap(ProteinMap): treshold = contactdef[pair] else: LOG.warning("Treshold for %s ignored (lower than " - "the default cutoff)", str(pair)) + "the default cutoff)", str(pair)) LOG.info( "Filtering values in matrix related to %s (%s)", str(pair), str(treshold) if treshold else def_cutoff) @@ -696,7 +849,7 @@ class ResAtmMap(ProteinMap): contact_map.update(tmp) else: LOG.error("Missing values in contact definition section. Add " - "at least a default_cutoff value.") + "at least a default_cutoff value.") LOG.debug("Contact map\n%s", contact_map.head()) return contact_map @@ -714,13 +867,24 @@ class ResMap(ResAtmMap): @property def sequence(self): - # Amino Acid sequence string in humanidx + """ + Amino Acid sequence string in humanidx + :return: + """ return "".join(AminoAcid.AminoAcid(aa.split("-")[1])[0] for aa in self.index) def create_index(self, sequence, seqidx=None, idxnames=None, colnames=None, **kwargs): - # Index correspondant a la liste de residus + """ + Index related to residue list + :param sequence: + :param seqidx: + :param idxnames: + :param colnames: + :param kwargs: + :return: + """ seq = [AminoAcid.AminoAcid(aa)[1] for aa in sequence] seqidx = seqidx if seqidx and len(seqidx) == len(sequence) else None if seqidx: @@ -734,16 +898,31 @@ class ResMap(ResAtmMap): return index, col def create_heatmap(self): + """ + + :return: + """ if self.as_matrix().isnull().values.sum() == 0: # If there's no nan values return sns.heatmap(self.as_matrix()) return None def contact_map(self, contactdef, **kwargs): + """ + + :param contactdef: + :param kwargs: + :return: + """ contact_map = ResMap(self.sequence, mtype="contact", desc=self.desc, sym=self.sym) def treshconv(x): + """ + + :param x: + :return: + """ return x <= treshold # Applique treshold sur la matrice ssi c'est une matrice de distance @@ -772,6 +951,10 @@ class AaMap(Map): @staticmethod def create_index(): + """ + + :return: + """ res_list = ConversionTable.ConversionTable().table['AMINO_ACID'][ 'iupac'].keys() index = pd.Index(res_list, name="residuex") @@ -791,10 +974,17 @@ class AtmMap(Map): super(AtmMap, self).__init__(*args, **kwargs) def create_index(self): - pass + """ + + :return: + """ + raise NotImplementedError -class ProtMapCollection: +class ProtMapCollection(object): + """ + Group all protein maps + """ def __init__(self, settings): self._alldistmap = None @@ -865,6 +1055,10 @@ class ProtMapCollection: @property def distmap(self): + """ + + :return: + """ if self._alldistmap: return self._alldistmap.reduce(groupby=self.settings[ "groupby_method"]) @@ -876,7 +1070,7 @@ class ProtMapCollection: self._distmap = resmap -class MapFilter: +class MapFilter(object): """ Filter contactmap/distancemap nd : Network deconvolution @@ -905,6 +1099,11 @@ class MapFilter: def nd_filter(self, mapdict): # TODO: build ROC curve with number of top contacts as the parameter + """ + + :param mapdict: + :return: + """ LOG.info("...Network deconvolution filter (alpha=%.2f, beta=%.2f)", self.settings["nd_beta"], self.settings["nd_alpha"]) scoremap = mapdict["scoremap"] @@ -938,6 +1137,12 @@ class MapFilter: def cons_filter(self, mapdict, **kwargs): # Liste les contacts aves des residus fortement conserves + """ + + :param mapdict: + :param kwargs: + :return: + """ LOG.info("...Conservation filter") sec_struct = kwargs.get("sec_struct") clash_list = kwargs.get("clash_list") @@ -947,7 +1152,7 @@ class MapFilter: if sec_struct.filetype != "indextableplus": LOG.warning("Conservation filter only works with indextableplus " - "files !") + "files !") return {'clash': None, 'desc': None} # parcours la liste de paires dans la matrice struct secondaire @@ -970,6 +1175,12 @@ class MapFilter: # Si scoremap existe, selectionner les contacts cys-cys qui ont les # meilleurs scores, fournit une liste des contacts disulfures qui # possedent des scores plus faibles + """ + + :param mapdict: + :param kwargs: + :return: + """ LOG.info("...Disulfure bridge unicity filter") clash_list = kwargs.get("clash_list") unidisbridge_list = [] # Liste les ponts disulfures uniques @@ -1024,7 +1235,19 @@ class MapFilter: @staticmethod def ssclash_filter(mapdict, **kwargs): + """ + + :param mapdict: + :param kwargs: + :return: + """ + def hum_contact(xy): + """ + + :param xy: + :return: + """ return xy[0] + 1, xy[1] + 1 # TODO: better add clash list and sec_struct as object attribute @@ -1157,15 +1380,15 @@ class MapFilter: # Allow contact to the fifth residue in the # helix LOG.debug("Found (H-2, H) for contact %s clash " - "but contact with fifth residue is " - "actually allowed", + "but contact with fifth residue is " + "actually allowed", outcontact) ssclash = None elif ssclash in ("H-3,H", "H+3,H") \ and abs(resi - resj) < 12: LOG.debug("Found (H-3, H) for contact %s clash " - "but contact between 3rd and 10th " - "residues are actually allowed", + "but contact between 3rd and 10th " + "residues are actually allowed", outcontact) # Allow contact between 3rd residue and 10th ssclash = None @@ -1187,8 +1410,8 @@ class MapFilter: end = ss_start_end[strand][0] if abs(start - end + 1) <= 5: LOG.debug("Found (E-2, E+2) for contact " - "%s clash but strand " - "is < 5 residues", outcontact) + "%s clash but strand " + "is < 5 residues", outcontact) # Allow contact if strand < 5 residues (gap <8) ssclash = None elif ssclash in ("E-3,E+3", "E+3,E-3"): @@ -1200,8 +1423,8 @@ class MapFilter: elif ssclash in ("E-4,E", "E+4,E") \ and abs(resi - resj) < 8: LOG.debug("Found (E-4, E) for contact %s clash " - "but contacts below 4th residue are " - "actually allowed", outcontact) + "but contacts below 4th residue are " + "actually allowed", outcontact) ssclash = None if ssclash: LOG.debug( @@ -1296,6 +1519,16 @@ class MapFilter: def write_filtout(clash_dict, desc_dict, contactlist, outdir="", outprefix="protein", clashlist=None, human_idx=True): # TODO: utiliser self.clash_dict au lieu de meta_clash + """ + + :param clash_dict: + :param desc_dict: + :param contactlist: + :param outdir: + :param outprefix: + :param clashlist: + :param human_idx: + """ meta_clash = { "cons": { "flag": 888, "msg": "", "warn": "", diff --git a/ariaec/protmap.pyc b/ariaec/protmap.pyc index adcbcfc0577822efdbee59aae4830d90257bacd6..6919d03072f4c46b59bc6fc6602a553761e8b7e9 100644 Binary files a/ariaec/protmap.pyc and b/ariaec/protmap.pyc differ diff --git a/ariaec/reader.py b/ariaec/reader.py index 7b27c166b3c0620d0fe0283178cdff51b2fd4668..fec18b7b783e4405730651caed7d8bd272484365 100644 --- a/ariaec/reader.py +++ b/ariaec/reader.py @@ -1,3 +1,4 @@ +# coding=utf-8 """ Reader objects """ @@ -17,6 +18,10 @@ Atom = collections.namedtuple("Atom", ["name", "coords"]) class RegexFile(object): + """ + File which can be parsed with a regex + """ + def __init__(self, filepath, filetype='', regex='', sort=''): self.regex = regex self.sort = sort @@ -52,6 +57,9 @@ class MapFile(RegexFile): # sort_field allow sorting lines with values into this field # TODO: wrong regex for native_full ? # TODO: smarter dict ... + """ + Map file class + """ types = { "plmdca": { "regex": re.compile(r"^(?P<res1_nb>\d+)\s+(?P<res1_name>\w)\s+" @@ -180,6 +188,10 @@ class MapFile(RegexFile): "regex": re.compile( r'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'), "score_field": None + }, + "empty": { + "regex": re.compile(r'^\s*$'), + "score_field": None } } check_type = True @@ -231,7 +243,7 @@ class MapFile(RegexFile): if self.lines[line]['res1_name'] != aa_seq[int(self.lines[line]['res1_nb']) - 1] \ or self.lines[line]['res2_name'] != aa_seq[int(self.lines[line]['res2_nb']) - 1]: LOG.error("Difference between given sequence and residu " - "names in contact file at line %d !", line) + "names in contact file at line %d !", line) def update_map(self, resmap): """ @@ -247,22 +259,28 @@ class MapFile(RegexFile): :return: """ - LOG.info("Checking format for file %s", self.filepath) + LOG.info("Checking if file %s correspond to %s format", self.filepath, + self.filetype) # Check if given type is supported # TODO: report this check into commands section + defaults = ("default_1", "default_2", "default_3", "default_4", "empty") + if os.stat(self.filepath).st_size == 0: + LOG.warning("File %s is empty !", self.filepath) + return [ + self.types["empty"].get("regex"), + self.filetype, + self.types["empty"].get("score_field") + ] with open(self.filepath) as infile: # Check first and second line of file for index, line in enumerate(infile): if self.filetype in self.types: + LOG.info("Given format (%s) should be supported", + self.filetype) match = self.types[self.filetype].get("regex").match(line) else: match = None LOG.error("Format %s not supported !", self.filetype) - # TODO: DRY rule !! - def1match = self.types["default_1"]["regex"].match(line) - def2match = self.types["default_2"]["regex"].match(line) - def3match = self.types["default_3"]["regex"].match(line) - def4match = self.types["default_4"]["regex"].match(line) if match: LOG.debug("Format type correct") return [ @@ -270,37 +288,20 @@ class MapFile(RegexFile): self.filetype, self.types[self.filetype].get("score_field") ] - elif def1match: - LOG.debug("Format type correct") - return [ - self.types["default_1"].get("regex"), - self.filetype, - self.types["default_1"].get("score_field") - ] - elif def2match: - LOG.debug("Format type correct") - return [ - self.types["default_2"].get("regex"), - self.filetype, - self.types["default_2"].get("score_field") - ] - elif def3match: - LOG.debug("Format type correct") - return [ - self.types["default_3"].get("regex"), - self.filetype, - self.types["default_3"].get("score_field") - ] - elif def4match: - LOG.debug("Format type correct") - return [ - self.types["default_4"].get("regex"), - self.filetype, - self.types["default_4"].get("score_field") - ] + else: + LOG.warning("Given type do not correspond, checking default" + " format for contactlist or empty file...") + for subformat in defaults: + if self.types.get(subformat)["regex"].match(line): + LOG.debug("Format type correct %s", subformat) + return [ + self.types[subformat].get("regex"), + self.filetype, + self.types[subformat].get("score_field") + ] if index > 2: + # Stop checking after second line LOG.error("Error reading %s file.", self.filetype) - # Remove contact file break LOG.error("Wrong format type given ...") return [None] * 3 @@ -368,12 +369,16 @@ class MapFile(RegexFile): self.lines[contact].get("res2_nb")] if len(self.contactlist) != len(self.clashlist): LOG.error("When reading input file, clash list is not " - "the same length than contactlist") + "the same length than contactlist") LOG.debug(self.clashlist) class ContactMapFile(MapFile): # "plmdca", "evfold", "bbcontacts", "pconsc", "gremlin", "metapsicov", + """ + Contact map file + """ + def __init__(self, filepath, filetype): """ @@ -384,6 +389,11 @@ class ContactMapFile(MapFile): super(self.__class__, self).__init__(filepath, filetype) def update_map(self, resmap): + """ + + :param resmap: + :return: + """ # TODO: swap dataframe factory here raise NotImplementedError @@ -432,7 +442,7 @@ class ContactMapFile(MapFile): if (int(residx1.split("-")[0]) != resid1) or \ (resid2 != int(residx2.split("-")[0])): LOG.error("Wrong resid humanidx (%d, %d) in contact (%d) is " - "not the same in resmap (%d, %d)", + "not the same in resmap (%d, %d)", resid1, resid2, contact_id, int(residx1.split("-")[0]), int(residx2.split("-")[0])) @@ -454,6 +464,9 @@ class ContactMapFile(MapFile): class PDBFile(MapFile): + """ + PDB file + """ pdbreg = re.compile(r'^(?P<record>ATOM |HETATM)(?P<serial>[\s\w]{5})' r'\s(?P<name>[\s\w]{4})' r'(?P<altLoc>[\s\w])' @@ -475,9 +488,20 @@ class PDBFile(MapFile): def create_map(self, protein, contactdef, groupby_method="min", scsc=None, flaglist=None, sym=True, path=""): + """ + + :param protein: + :param contactdef: + :param groupby_method: + :param scsc: + :param flaglist: + :param sym: + :param path: + """ resmap = ResAtmMap(protein.aa_sequence.sequence, mtype='distance', flaglist=flaglist, path=path, seqidx=protein.index, desc=self.filetype) + # noinspection PyTypeChecker resmap[:] = self.update_map(resmap, sym=sym) LOG.debug("pdb distance map:\n%s", resmap) self.mapdict["alldistmap"] = resmap @@ -487,6 +511,12 @@ class PDBFile(MapFile): self.mapdict["contactmap"] = self.mapdict["allcontactmap"].reduce() def update_map(self, resmap, sym=True): + """ + + :param resmap: + :param sym: + :return: + """ # Map only on heavy atoms # TODO: check if same sequence in pdb file LOG.info("Updating distance map with pdb file") @@ -537,35 +567,63 @@ class PDBFile(MapFile): if sym: newmap.loc[:][idx] = None LOG.error("Can't update pdb distance map for pos in pdb file " - "%s with %s", list(error_list), missidx) + "%s with %s", list(error_list), missidx) return newmap class DistanceMapFile(MapFile): + """ + Distance matrix file + """ def __init__(self, filepath, filetype): super(MapFile).__init__(filepath, filetype) raise NotImplementedError def create_map(self, aa_seq, contactdef, **kwargs): + """ + + :param aa_seq: + :param contactdef: + :param kwargs: + :return: + """ pass # Native dist def update_map(self, resmap): + """ + + :param resmap: + :return: + """ pass # Construit map avec la liste de residus + infos de distance du fichier # return DistanceMap -class ProtFileListReader: +class ProtFileListReader(object): + """ + List of file object + """ def __init__(self, cont_def=5.0): self.filelist = [] self.contactdef = cont_def def clear(self): + """ + Initiatise from scratch object + :return: + """ # TODO: Init supprime bien les fichiers du cache ? self.__init__(self.contactdef) def add_file(self, filepathlist, filetypelist=None): + """ + + :param filepathlist: + :param filetypelist: + :return: + """ filepathlist = [filepathlist] if type( filepathlist) != list else filepathlist filetypelist = [filetypelist] if type( @@ -594,6 +652,15 @@ class ProtFileListReader: def read(self, filepathlist, filetypelist=None, protein=None, scsc=None, **kwargs): + """ + + :param filepathlist: + :param filetypelist: + :param protein: + :param scsc: + :param kwargs: + :return: + """ self.clear() self.add_file(filepathlist, filetypelist=filetypelist) for fo in self.filelist: diff --git a/ariaec/reader.pyc b/ariaec/reader.pyc index 3164221557912ab2ccdb44d3ba8fd89f3154d085..a7730d5d715b4bb4ee16c4a2946b21f7f9f9d329 100644 Binary files a/ariaec/reader.pyc and b/ariaec/reader.pyc differ diff --git a/ariaec/setup.py b/ariaec/setup.py index 2336dcabc4d69f85ea4a878c1b748ea9a9b3af22..18e4890aee0877637fc6c928b9535503ff9094ba 100644 --- a/ariaec/setup.py +++ b/ariaec/setup.py @@ -1,11 +1,12 @@ +# coding=utf-8 """ Input/Output aria_ec scripts """ from __future__ import absolute_import, division, print_function -import logging -import json import os +import json +import logging from .protein import Protein from .reader import ProtFileListReader @@ -14,15 +15,17 @@ from .protmap import MapFilter from .econverter import AriaEcXMLConverter # TODO: S'inspirer de pandas/__init__.py pour les dependances -# from basictools import * LOG = logging.getLogger(__name__) -class AriaEcSetup: +class AriaEcSetup(object): + """ + Aria Ec Setup protocol + """ + def __init__(self, settings): """ - :param settings: :return: """ @@ -40,7 +43,7 @@ class AriaEcSetup: def run(self): """ - + main method :return: """ # Check input @@ -70,21 +73,24 @@ class AriaEcSetup: # TODO: change read method in reader to __call__ # -------------------------- contact maps ---------------------------- # self.reader.read(self.settings.setup.args.get("infiles"), - filetypelist=self.settings.setup.args.get("contact_types"), + filetypelist=self.settings.setup.args.get( + "contact_types"), protein=self.protein, - groupby_method=self.settings.setup.config['groupby_method'], + groupby_method=self.settings.setup.config[ + 'groupby_method'], scsc=self.settings.scsc_min) - for fo in self.reader.filelist: + for mapfile in self.reader.filelist: # fo need a contactmap in order to wite XML dist restraints # TODO: filter pour toutes les map de mapdict !! (fonction remove # s'applique sur l'humanidx contenant les residus) - self.filter(fo.mapdict, fo.filetype, fo.contactlist, self.protein, - clashlist=fo.clashlist, outprefix=self.outprefix, + self.filter(mapfile.mapdict, mapfile.filetype, mapfile.contactlist, + self.protein, clashlist=mapfile.clashlist, + outprefix=self.outprefix, outdir=self.settings.infra.get("others", '')) - self.allresmap[fo.filetype] = fo.mapdict + self.allresmap[mapfile.filetype] = mapfile.mapdict - if fo.filetype != "pdb" and "pdb" in self.allresmap: - fo.contactmap.compareplot(self.allresmap["pdb"]) + if mapfile.filetype != "pdb" and "pdb" in self.allresmap: + mapfile.contactmap.compareplot(self.allresmap["pdb"]) # ---------------------------- target map ---------------------------- # if self.settings.setup.args.get("distfile") and \ @@ -138,9 +144,11 @@ class AriaEcSetup: # ------------------------------ Output ------------------------------ # # ----------------------------- SEQ file ----------------------------- # - self.protein.write_seq(os.path.join(self.settings.infra.get("others", ''), - self.outprefix + ".seq")) - # Load aria molecule object from generated seq file + self.protein.write_seq( + os.path.join(self.settings.infra.get("others", ''), + self.outprefix + ".seq")) + # Load aria molecule object from seq file and convert it into xml format + LOG.info("Load molecule file and convert it into xml format") self.converter.load_molecule(self.protein.seqfile_path) # --------------------------- TBL restraints ------------------------- # # Setting contact number limit for hbmap @@ -152,11 +160,12 @@ class AriaEcSetup: # --------------------------- XML restraints ------------------------- # # Setting contact number limit for map restraints (native, ec, ...) - # nb_c = int(len(self.protein.aa_sequence.sequence) * - # self.settings.setup.config.get("n_factor")) - dist_files, pair_lists = self.converter.write_maplist_restraints( - self.allresmap, self.targetmap) + # dist_files, pair_lists = self.converter.write_maplist_restraints( + # self.allresmap, self.targetmap) + + dist_files = self.converter.write_maplist_restraints( + self.allresmap, self.targetmap)[0] # --------------------------- XML SEQ file --------------------------- # xmlseq_file = self.converter.write_xmlseq() @@ -169,7 +178,7 @@ class AriaEcSetup: def write_optional_files(self): """ - + Write filtered contacts & distance maps (.csv) :return: """ # Indextableplus file (submatrix) @@ -184,16 +193,18 @@ class AriaEcSetup: "%s/%s.distmap.csv" % (self.settings.infra.get("others"), maptype)) if self.refmaps: - self._write_contacts(self.allresmap[maptype].get( - "filteredlist", None), self.protein.aa_sequence.sequence, - self.settings.infra.get("others", ''), "_".join(( - self.outprefix, maptype, "filtered")), + self._write_contacts( + self.allresmap[maptype].get("filteredlist", None), + self.protein.aa_sequence.sequence, + self.settings.infra.get("others", ''), + "_".join((self.outprefix, maptype, "filtered")), ref=self.refmaps["contactmap"], distmap=self.refmaps["distmap"]) if self.refmaps: self.refmaps["alldistmap"].to_csv( "%s/%s_%s.distmap.csv" % (self.settings.infra.get("others"), - self.outprefix, self.refmaps.filetype)) + self.outprefix, + self.refmaps.filetype)) def _write_contacts(self, contacts_list, seq, out, prefix, nc=None, append=False, ref=None, distmap=None): @@ -209,7 +220,7 @@ class AriaEcSetup: :return: """ mapy = [] - tp = 0 + tp_count = 0 filemode = 'a' if append else 'w' dist_desc = '\tTP/FP\tdist%s(ref)' % self.settings.setup.config[ "groupby_method"] if \ @@ -217,10 +228,12 @@ class AriaEcSetup: else '' with open("%s/%s.contacts.txt" % (out, prefix), filemode) as outfile: if not append: - outfile.write('''# resid1\tresid2\tres1\tres2%s\n''' % dist_desc) + outfile.write( + '''# resid1\tresid2\tres1\tres2%s\n''' % dist_desc) - if type(contacts_list) is dict: - contacts = contacts_list.keys() if not nc else contacts_list.keys()[:nc] + if hasattr(contacts_list, 'keys'): + contacts = contacts_list.keys() if not nc else \ + contacts_list.keys()[:nc] d_type = True else: # Check if contacts is 2-tuple @@ -228,6 +241,7 @@ class AriaEcSetup: raise TypeError('Contact list must be 2-tuple !') contacts = contacts_list if not nc else contacts_list[:nc] d_type = False + LOG.debug("Contact list %s", contacts) for contact in contacts: @@ -250,7 +264,7 @@ class AriaEcSetup: if ref is not None: if ref.ix[(resid1, resid2)]: asses = 'TP' - tp += 1 + tp_count += 1 else: asses = 'FP' outfile.write( @@ -265,12 +279,12 @@ class AriaEcSetup: seq[resid1], seq[resid2])) - ptp = (tp / float(len(contacts))) * 100.0 if ref is not None else \ + ptp = (tp_count / float(len(contacts))) * 100.0 if ref is not None else \ None outfile.write(''' # TP number : {tp} ({ptp:.2f} %) # Number of contacts : {nc} - '''.format(tp=tp, ptp=ptp, nc=len(contacts))) + '''.format(tp=tp_count, ptp=ptp, nc=len(contacts))) if __name__ == "__main__": diff --git a/ariaec/setup.pyc b/ariaec/setup.pyc index b001ed643a5309525fcc71bf8eed70b38cae559a..057ee8b97f13255915fad56e0326949f25f4ae71 100644 Binary files a/ariaec/setup.pyc and b/ariaec/setup.pyc differ diff --git a/setup.py b/setup.py index b04abf47acdeb5225b7860e49612f478748a14d3..3d1a52ccda283fcc85ad85bacc418059b2a15a4f 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +# coding=utf-8 import os import subprocess from setuptools import setup, find_packages, Command @@ -9,7 +10,7 @@ try: version_git = subprocess.check_output(["git", "describe", "--tags"]).rstrip() except subprocess.CalledProcessError: - with open(version_py, 'r') as fh: + with open(version_py) as fh: version_git = open(version_py).read().strip().split('=')[-1].replace('"', '') @@ -26,14 +27,27 @@ class CleanCommand(Command): """Custom clean command to tidy up the project root.""" user_options = [] - def initialize_options(self): + @staticmethod + def initialize_options(): + """ + + :return: + """ pass - def finalize_options(self): + @staticmethod + def finalize_options(): + """ + + :return: + """ pass @staticmethod def run(): + """ + call clean command + """ os.system('rm -vrf ./build ./dist ./*.pyc ./*.tgz ./*.egg ' './*.egg-info') @@ -93,6 +107,7 @@ setup( 'matplotlib'], install_requires=[ + # 'configparser', # TODO: add this requiirement only if python v2 'docutils>=0.3', 'pandas', 'seaborn',