Commit 35d8304e authored by fabrice's avatar fabrice
Browse files

Initial commit

parents
# Created by .ignore support plugin (hsz.mobi)
.venv
data/examples
.idea
\ No newline at end of file
graft ariaec/conf
graft ariaec/templates
graft ariaec/data
\ No newline at end of file
"""
Basic tools aria_ec
"""
from __future__ import absolute_import, division, print_function
import logging
import logging.config
import os
import json
import re
import ast
import sys
import numpy as np
import pkg_resources as pkgr
import matplotlib.artist as art
from cStringIO import StringIO
logger = logging.getLogger()
def titleprint(outfile, progname='', desc=''):
"""
Init log file
:param outfile:
:param progname: docstring
:param desc:
:return:
"""
out = '''
================================================================================
{progname}
{desc}
================================================================================
'''.format(progname=progname, desc=desc.capitalize())
outfile.write(out[1:])
def get_filename(path):
"""
Search filename in the given path
:param path:
:return:
"""
return "_".join(path.split("/")[-1].split(".")[:-1])
def reg_load(regex, filepath, sort=None):
"""
:param regex:
:param filepath:
:param sort:
:return:
"""
lines_dict = {}
with open(filepath) as f:
for index, line in enumerate(f):
match = regex.match(line)
if match:
lines_dict[index] = match.groupdict()
if sort:
lines_dict = sort_2dict(lines_dict, sort)
return lines_dict
def sort_2dict(unsort_dict, key, reverse=True):
"""
Sort 2d dict by key
:param reverse:
:param key:
:param unsort_dict:
:return: sorted dict
"""
sorted_index = sorted(unsort_dict, key=lambda x: float(unsort_dict[x][key]),
reverse=reverse)
sorted_dict = {}
for rank, ind in enumerate(sorted_index):
sorted_dict[rank + 1] = unsort_dict[ind]
return sorted_dict
def cart_dist(x, y):
"""
Evaluate cartesian distance beetween 2 points x, y
:param x: numpy array (len = n dimensions)
:param y: numpy array (len = n dimensions)
:return:
"""
return np.sqrt(sum(np.power(x - y, 2)))
def format_str(string):
"""
Convert str in bool, float, int or str
:param string:
:return:
"""
if re.search("^\s*true\s*$", string, re.I):
return True
elif re.search("^\s*false\s*$", string, re.I):
return False
elif re.search("^\s*\d+\s*$", string):
return int(string)
elif re.search("^[\s\d-]+\.\d+\s*$", string):
return float(string)
elif "," in string:
return string.split(',')
elif "+" in string:
return string.split('+')
elif re.search("[/\\w]+", string):
return string
else:
if string:
try:
ev_str = ast.literal_eval(string)
except ValueError:
logger.error("Don't understand given string %s. Please check "
"format." % string)
return None
except SyntaxError:
logger.error("Given string %s is not a valid expression" % string)
return None
return ev_str
else:
return None
def format_dict(indict):
for key in indict:
indict[key] = format_str(indict[key])
return indict
def ppdict(indict, indent=2):
return json.dumps(indict, indent=indent)
def tickmin(pandata, ntick=None, shift=0, offset=5):
"""
Minimise number of ticks labels for matplotlib or seaborn plot
:param offset:
:param shift:
:param pandata: pandas dataframe
:param ntick: number of ticks wanted per axes
:return:
"""
yticks = [_ + shift for _ in range(0, len(pandata.index))]
xticks = [_ + shift for _ in range(0, len(pandata.columns))]
if ntick:
keptticks = yticks[::(len(pandata.index) // int(ntick))]
yticks = [_ if _ in keptticks else '' for _ in yticks]
# If shift != 0, need to initialize first value of ticks
keptticks = xticks[::(len(xticks) // int(ntick))]
xticks = [_ if _ in keptticks else '' for _ in xticks]
else:
keptticks = [ytick for i, ytick in enumerate(yticks)
if (i + shift) % offset == 0]
# keptticks = yticks[::offset - shift]
yticks = [_ if _ in keptticks else '' for _ in yticks]
# If shift != 0, need to initialize first value of ticks
keptticks = [xtick for i, xtick in enumerate(xticks)
if (i + shift) % offset == 0]
# keptticks = xticks[::offset - shift]
xticks = [_ if _ in keptticks else '' for _ in xticks]
return xticks, yticks
def tickrot(axes, figure, rotype='horizontal', x=True, y=True):
"""
Matplot rotation of ticks labels
:param axes:
:param figure:
:param rotype:
:param x:
:param y:
:return:
"""
if y:
art.setp(axes.get_yticklabels(), rotation=rotype)
if x:
art.setp(axes.get_xticklabels(), rotation=rotype)
figure.canvas.draw()
class CustomLogging:
# default_file = os.path.join(os.path.abspath(os.path.dirname(__file__)),
# "conf/logging.json")
default_file = "conf/logging.json"
def __init__(self, level=logging.INFO, desc=None):
# TODO: detect path log filenames and makedirs if not exists
logging.basicConfig(level=level)
if desc:
self.msg = desc.strip()
else:
self.msg = ""
self.config = self.default_config()
def update_msg(self, desc):
if type(self.msg) == list:
self.msg += desc
self.msg = " - ".join(self.msg)
elif type(self.msg) == str:
self.msg = " - ".join((self.msg, desc.capitalize()))
def default_config(self):
# with open(self.default_file, 'rt') as f:
with pkgr.resource_stream(__name__, self.default_file) as f:
config = json.load(f)
logging.config.dictConfig(config)
return config
def set_outdir(self, outdir):
"""
Create log directory and change log files location
:param outdir: path output directory
:return:
"""
outdir = os.path.join(outdir, "log") if "log" not in outdir else outdir
if not os.path.exists(os.path.abspath(outdir)):
os.makedirs(outdir)
if outdir and "handlers" in self.config:
for hand in self.config["handlers"]:
if "filename" in self.config["handlers"][hand]:
self.config["handlers"][hand]["filename"] = \
os.path.join(outdir, os.path.basename(
self.config["handlers"][hand]["filename"]))
logging.config.dictConfig(self.config)
def welcome(self):
desc = '''
================================================================================
{:^80}
================================================================================
'''.format(self.msg)
print(desc)
class Capturing(list):
def __enter__(self):
# Stock default stdout and redirect current stdout to this class
self._stdout = sys.stdout
# All print calls are saved into self._stringio
sys.stdout = self._stringio = StringIO()
return self
def __exit__(self, *args):
self.extend("\n".join(self._stringio.getvalue().splitlines()))
sys.stdout = self._stdout
if __name__ == "__main__":
# Test Logger
CustomLogging().set_outdir("../examples/out")
logger = logging.getLogger("TEST")
print(dir(logger))
logger.info("Log test")
File added
"""
Input/Output aria_ec
"""
from __future__ import absolute_import, division, print_function
import os
import logging
import argparse as argp
from .ecsettings import AriaEcSettings
from .maplot import AriaEcContactMap
from .econverter import AriaEcBbConverter
from .setup import AriaEcSetup
logger = logging.getLogger(__name__)
def check_file(prospective_file):
if not os.path.exists(prospective_file):
raise argp.ArgumentTypeError("readable_file:'{0}' is not a valid "
"path".format(prospective_file))
if not os.access(prospective_file, os.R_OK):
raise argp.ArgumentTypeError("readable_file:'{0}' is not a readable "
"file".format(prospective_file))
class ReadableFile(argp.Action):
def __call__(self, parser, namespace, values, option_string=None):
if type(values) == list:
for prospective_file in values:
check_file(prospective_file)
elif type(values) == str:
check_file(values)
setattr(namespace, self.dest, values)
# TODO: Make parent Command class with _create_argparser, self.args,
# update_logger and run
class AriaEcCommand:
"""
Argparse interface for aria_ec
"""
command_list = ("setup", "bbconv", "contactmap")
desc_list = (u"Setup ARIA infrastructure with given contacts translated "
u"into ARIA restraints",
u"Convert contacts in bbcontact format",
u"Contactmap tool")
contact_types = ("plmdca", "evfold", "bbcontacts", "pconsc", "gremlin",
"metapsicov", "pdb", "native", "native_full",
"contactlist")
default_confile = "conf/aria_ec.ini"
def __init__(self, custom_logging=None):
# Def Argument parser
parser = self._create_argparser()
# parse args
self.args = parser.parse_args()
# Update logger with outdir
self._update_logger(custom_logging)
def _update_logger(self, log):
if log and hasattr(self.args, "output_directory"):
log.set_outdir(self.args.output_directory)
log.update_msg(self.args.command)
log.welcome()
def _create_argparser(self):
parser = argp.ArgumentParser(description=u"ARIA Evolutionary "
u"restraints toolbox",
formatter_class=argp.ArgumentDefaultsHelpFormatter)
parser.add_argument("-o", "--output", dest="output_directory",
type=str, help="Output directory")
parser.add_argument("-c", "--conf", action=ReadableFile, dest="conf_file",
default=None, help="configuration file")
# Create subcommands
self._create_subparsers(parser.add_subparsers(dest="command"))
return parser
def _create_subparsers(self, parser):
"""
Generate subcommands
:param parser: argparser object
:return:
"""
for index, command in enumerate(self.command_list):
# Create subparser defined in command list
subcommand = getattr(self, "_" + command + "_parser")(desc=self.desc_list[
index])
parser.add_parser(command, parents=[subcommand])
def _setup_parser(self, desc=None):
"""
setup opt & args
:param desc: command descriptor
:return: argparser object
"""
parser = argp.ArgumentParser(description=desc,
add_help=False)
# Options
# Args
group = parser.add_argument_group('required arguments')
group.add_argument("seq", action=ReadableFile,
help="sequence file [FASTA]")
group.add_argument("sspred", action=ReadableFile,
help="secondary structure prediction file")
group.add_argument("infiles", nargs="+", metavar="infile",
action=ReadableFile,
help="contact or pdb file(s) used to build aria "
"distance restraints")
group.add_argument("-d", "--distfile", dest="distfile",
help="Pdb or distance matrix iif distance_type "
"set to distfile in conf file, "
"use distances in the given file as "
"target distance to build distance "
"restraints")
group.add_argument("-t", "--type", required=True,
nargs="+", dest="contact_types",
choices=self.contact_types, help="Infile(s) contact "
"type(s)")
group.add_argument("-r", "--ref", dest="ref",
help="Native pdb. Allow TP/FP detection.")
group.add_argument("--hb", dest="hb",
help="H-bonds contact file (eg: metapsicov.hb)")
group.add_argument("--ssidx", dest="ssidx", action="store_true",
default=False, help="Use secondary structure index")
return parser
def _bbconv_parser(self, desc=None):
"""
bbconv opt & args
:param desc: command descriptor
:return: argparser object
"""
parser = argp.ArgumentParser(description=desc,
add_help=False)
# args
parser.add_argument("contactfile", help="contacts file (pconsc, plm)",
action=ReadableFile)
parser.add_argument("sspred", help="psipred file",
action=ReadableFile)
parser.add_argument("seq", help="sequence file [FASTA]",
action=ReadableFile)
parser.add_argument("msa", nargs='?',
help="MSA [FASTA] for diversityvalue"
"used with bbcontacts")
parser.add_argument("-t", "--type", required=True, dest="contact_type",
choices=self.contact_types, help="Infile contact "
"type")
return parser
def _contactmap_parser(self, desc=None):
"""
contactmap opt & args
:param desc: command descriptor
:return: argparser object
"""
parser = argp.ArgumentParser(description=desc,
add_help=False)
parser.add_argument("seq", action=ReadableFile,
help="sequence file [FASTA]")
parser.add_argument("sspred", action=ReadableFile,
help="secondary structure prediction file")
parser.add_argument("infiles", nargs="+", metavar="infile",
action=ReadableFile,
help="contact or pdb file(s) used to build aria "
"distance restraints")
parser.add_argument("-t", "--type", required=True,
nargs="+", dest="contact_types",
choices=self.contact_types, help="Infile(s) "
"contact "
"type(s)")
parser.add_argument("--nofilter", dest="nofilter", action="store_true",
default=None, help="Do not use contact list "
"filter and select top n "
"contacts")
parser.add_argument("--ssidx", dest="ssidx", action="store_true",
default=False,
help="Use secondary structure index")
return parser
def create_settings(self):
logger.debug("Create AriaEcSettings")
settings = AriaEcSettings()
logger.info("Loading default config file")
settings.load_config(self.default_confile, pkg=True)
if self.args.conf_file:
print(self.args.conf_file)
logger.info("Updating settings with conf file")
settings.load_config(self.args.conf_file)
# Update settings associated to command section
logger.info("Updating %s settings with args" % self.args.command)
getattr(settings, self.args.command).args.update(self.args.__dict__)
if self.args.output_directory:
logger.info("Updating output directory")
settings.infra = self.args.output_directory
return settings
def run(self):
# call method relative to args.command
logger.info("Run %s command" % self.args.command)
getattr(self, self.args.command)()
def setup(self):
setup_inst = AriaEcSetup(self.create_settings())
setup_inst.run()
# instantiate AriaEcSetup with AriaEcSettings
def bbconv(self):
bbconverter = AriaEcBbConverter(self.create_settings())
bbconverter.run()
def contactmap(self):
# instantiate AriaEcContactmap with AriaSettings
econtactmap = AriaEcContactMap(self.create_settings())
econtactmap.run()
if __name__ == "__main__":
# Test AriaEcCommand object
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("IO")
argparser = AriaEcCommand()
[main]
; ------------------------- Main parameters ---------------------------------- #
; Leave these fields empty in order to use default files
ss_dist_file:
scsc_min_file:
ariaproject_template:
[contactdef]
; Contact definition section used to define contactmap from pdb file
default_cutoff: 8.0
ca_ca:
cb_cb:
sc_sc:
; Add other contact cutoff below folowwing the syntax atm1_atm2
[setup]
; ------------------------------ TBL parameters ------------------------------ #
; longrange_hb : True, False [False]; use long range hbond
; restraints. If there is no hbond map given,
; use the naive method from METAPSICOV (Take
; the top nf_longrange_hb * seq length
; predicted contacts from the contactlist and
; set those who are in a beta sheet as hb
; nf_longrange_hb : N hbond generated = nf * seq length
; longrange_hbtype : main, all; Consider short range hbond only
; as main chain hydrogen bond or for all
; donor/acceptor
; hb_dminus/dplus : distance bounds in tbl restraints
longrange_hb: False
nf_longrange_hb: 0.1
longrange_hbtype: main
hb_dminus: 0.0
hb_dplus: 0.5
; --------------------------- Contacts parameters ---------------------------- #
; evfold_weight : use EVFold weight -> 10/i (i:contact rank)
; neighborhood_contact : add neighbors when writing restraint
; pair_list : all, min [min]; use all atom pairs or
; minimized list (CA-CA, CB-CB, SC-SC)
; ambiguous_distance_restraint : use Ambiguous Distance Restraints
; ambiguous_distance_type : min, deff [min]; compute Deff from target
; distance iif ADR
; distance_type : fixed, pdbstat, distfile [fixed]; fixed use
; ca, cb and sc upper_bound
evfold_weight: False
neighborhood_contact: False
pair_list: min
ambiguous_distance_restraint: False
ambiguous_distance_type: min
deffpow: 6
; TODO: move groupby_method to contactdef section
groupby_method: min
distance_type: fixed
; TODO: move parameters below to contactdef section
; Parameters below used only when distance_type is set to "fixed"
restraint_distance: 2.5
lower_bound: 1.0
def_upper_bound: 4.0
ca_upper_bound: 7.0
cb_upper_bound: 7.0
; ---------------------------- Filter parameters ----------------------------- #
; n_factor : Number of EC selected: n * n_factor (n: sequence length)
; contactfilter : all or combinaison of pos, cons, cys, ssclash, nd
; separated by "+" character [all]
n_factor: 1.0
contactfilter:
conservation_treshold: 95
position_treshold: 5
nd_beta: 0.99
nd_alpha: 1.0
nd_control: 0
; --------------------------- ARIA XML parameters ---------------------------- #
runid: 1
cpus: 100
host_command: sbatch --cores-per-socket=10 -o /baycells/home/fallain/slurm.errors
working_directory: data/examples/out/aria_wd
temp_root: data/examples/out/aria_tmp
parameter_definition: automatic
ss_dist_format: tbl
ss_dist_enabled: yes
ss_dist_add_to_network: no
ss_dist_calibrate: no
ss_dist_run_network_anchoring: no
ss_dist_filter_contributions: no
dist_format: xml
dist_enabled: yes
dist_add_to_network: no
dist_calibrate: no
dist_run_network_anchoring: no
dist_filter_contributions: yes
cns_executable: /Bis/home/bardiaux/bin/cns1.21.exe
cns_keep_output: no
unambiguous_restraints_k_cool1_initial: 10.0
unambiguous_restraints_k_cool1_final: 50.0
unambiguous_restraints_k_cool2: 50.0
hbond_restraints_k_cool1_initial: 10.0
hbond_restraints_k_cool1_final: 50.0
hbond_restraints_k_cool2: 50.0
dihedral_restraints_k_cool1: 25.0
dihedral_restraints_k_cool2: 200.0
logharmonic_potential_enabled: no
logharmonic_potential_use_auto_weight: no
logharmonic_potential_weight_unambig: 25.0
logharmonic_potential_weight_ambig: 10.0
logharmonic_potential_weight_hbond: 25.0
rama_potential_enabled: no