Commit 402cdec9 authored by fabrice's avatar fabrice
Browse files

pep8tized v3

parent dc8fc3f1
No preview for this file type
......@@ -102,9 +102,9 @@ def format_str(string):
:param string:
:return:
"""
if re.search(r"^\s*true\s*$", string, re.I):
if re.search(r"^\s*(true|yes)\s*$", string, re.I):
return True
elif re.search(r"^\s*false\s*$", string, re.I):
elif re.search(r"^\s*(false|no)\s*$", string, re.I):
return False
elif re.search(r"^\s*\d+\s*$", string):
return int(string)
......@@ -114,7 +114,7 @@ def format_str(string):
return string.split(',')
elif "+" in string:
return string.split('+')
elif re.search(r"[/\\w]+", string):
elif re.search(r"[/\w]+", string):
return string
else:
if string:
......@@ -133,12 +133,23 @@ def format_str(string):
def format_dict(indict):
"""
:param indict:
:return:
"""
for key in indict:
indict[key] = format_str(indict[key])
return indict
def ppdict(indict, indent=2):
"""
:param indict:
:param indent:
:return:
"""
return json.dumps(indict, indent=indent)
......@@ -197,6 +208,12 @@ class CustomLogging:
default_file = "conf/logging.json"
def __init__(self, level=logging.INFO, desc=None):
"""
:param level:
:param desc:
:return:
"""
# TODO: detect path log filenames and makedirs if not exists
logging.basicConfig(level=level)
if desc:
......@@ -206,6 +223,11 @@ class CustomLogging:
self.config = self.default_config()
def update_msg(self, desc):
"""
:param desc:
:return:
"""
if type(self.msg) == list:
self.msg += desc
self.msg = " - ".join(self.msg)
......@@ -213,6 +235,10 @@ class CustomLogging:
self.msg = " - ".join((self.msg, desc.capitalize()))
def default_config(self):
"""
:return:
"""
# with open(self.default_file, 'rt') as f:
with pkgr.resource_stream(__name__, self.default_file) as f:
config = json.load(f)
......@@ -237,6 +263,10 @@ class CustomLogging:
logging.config.dictConfig(self.config)
def welcome(self):
"""
:return:
"""
desc = '''
================================================================================
......@@ -249,6 +279,10 @@ class CustomLogging:
class Capturing(list):
def __enter__(self):
"""
:return:
"""
# Stock default stdout and redirect current stdout to this class
self._stdout = sys.stdout
# All print calls are saved into self._stringio
......@@ -256,6 +290,11 @@ class Capturing(list):
return self
def __exit__(self, *args):
"""
:param args:
:return:
"""
self.extend("\n".join(self._stringio.getvalue().splitlines()))
sys.stdout = self._stdout
......
No preview for this file type
......@@ -3,13 +3,13 @@
"""
from __future__ import absolute_import, division, print_function
import math
import re
import numpy as np
import pandas as pd
# import re
# import math
# import numpy as np
# import pandas as pd
from collections import namedtuple
from .base import cart_dist
# from .base import cart_dist
Contact = namedtuple("Contact", ["resid1", "resid2"])
......@@ -18,154 +18,154 @@ Atom = namedtuple("Atom", ["name", "coords"])
# BBcontacts scripts #
# ---------------------------------------------------------------------------- #
def diversityvalue(msa, l):
"""
Compute diversityvalue for bbcontacts.py : sqrt(n)/L where n is the
number of sequence in MSA and L the protein length
:param msa: filepath (fasta format)
:param l: protein length
:return: diversityvalue (float)
"""
# Compute n value
msa_reg = re.compile(r"^>[A-Za-z0-9]+_[A-Za-z0-9]+")
n = 0
with open(msa) as f:
for index, line in enumerate(f):
match = msa_reg.match(line)
if match and "deselect" not in line:
n += 1
return math.sqrt(n) / float(l)
def indexup_bbcontact(bbcontact_dict, startind, out, prefix):
"""
Index update if startind of coupling matrix is not eq 0
:param out:
:param prefix:
:param bbcontact_dict:
:param startind:
:return:
"""
up = int(startind) - 1
col = ('identifier', 'diversity', 'direction', 'viterbiscore',
'indexpred', 'state', 'res1', 'res2')
with open('%s/%s.filteredoutput_up.txt' % (out, prefix), 'w') as f:
f.write("#{:>14}\t{:>10}\t{:>15}\t{:>15}\t{:>10}\t{:>10}\t{:>5}\t{"
":>5}".format(*col))
for contact in bbcontact_dict:
resid1 = int(bbcontact_dict[contact]['res1_nb'])
resid2 = int(bbcontact_dict[contact]['res2_nb'])
bbcontact_dict[contact]['res1'] = resid1 + up
bbcontact_dict[contact]['res1_nb'] = resid1 + up
bbcontact_dict[contact]['res2'] = resid2 + up
bbcontact_dict[contact]['res2_nb'] = resid2 + up
line = [bbcontact_dict[contact][x] for x in col]
f.write("\n{:>15}\t{:>10}\t{:>15}\t{:>15}\t{:>10}\t{:>10}\t{"
":>5}\t{:>5}".format(*line))
def contact_insertion(contacts_dict, insert_list, insertvalue, seq):
"""
Insert
:param contacts_dict:
:param insert_list:
:param insertvalue:
:param seq:
:return:
"""
maxind = max(contacts_dict.keys())
for insert in insert_list:
if insert not in contacts_dict:
contacts_dict[insert] = {'resname': seq[insert - 1]}
tmp = {}
# Init all contacts related to insert
for contact in range(insert + 1, maxind + 1):
tmp[contact] = float(insertvalue)
contacts_dict[insert].update(tmp)
# def diversityvalue(msa, l):
# """
# Compute diversityvalue for bbcontacts.py : sqrt(n)/L where n is the
# number of sequence in MSA and L the protein length
# :param msa: filepath (fasta format)
# :param l: protein length
# :return: diversityvalue (float)
# """
#
# Compute n value
# msa_reg = re.compile(r"^>[A-Za-z0-9]+_[A-Za-z0-9]+")
# n = 0
#
# with open(msa) as f:
# for index, line in enumerate(f):
# match = msa_reg.match(line)
# if match and "deselect" not in line:
# n += 1
#
# return math.sqrt(n) / float(l)
# def indexup_bbcontact(bbcontact_dict, startind, out, prefix):
# """
# Index update if startind of coupling matrix is not eq 0
# :param out:
# :param prefix:
# :param bbcontact_dict:
# :param startind:
# :return:
# """
# up = int(startind) - 1
# col = ('identifier', 'diversity', 'direction', 'viterbiscore',
# 'indexpred', 'state', 'res1', 'res2')
# with open('%s/%s.filteredoutput_up.txt' % (out, prefix), 'w') as f:
# f.write("#{:>14}\t{:>10}\t{:>15}\t{:>15}\t{:>10}\t{:>10}\t{:>5}\t{"
# ":>5}".format(*col))
#
# for contact in bbcontact_dict:
# resid1 = int(bbcontact_dict[contact]['res1_nb'])
# resid2 = int(bbcontact_dict[contact]['res2_nb'])
# bbcontact_dict[contact]['res1'] = resid1 + up
# bbcontact_dict[contact]['res1_nb'] = resid1 + up
# bbcontact_dict[contact]['res2'] = resid2 + up
# bbcontact_dict[contact]['res2_nb'] = resid2 + up
# line = [bbcontact_dict[contact][x] for x in col]
# f.write("\n{:>15}\t{:>10}\t{:>15}\t{:>15}\t{:>10}\t{:>10}\t{"
# ":>5}\t{:>5}".format(*line))
# def contact_insertion(contacts_dict, insert_list, insertvalue, seq):
# """
# Insert
# :param contacts_dict:
# :param insert_list:
# :param insertvalue:
# :param seq:
# :return:
# """
# maxind = max(contacts_dict.keys())
# for insert in insert_list:
# if insert not in contacts_dict:
# contacts_dict[insert] = {'resname': seq[insert - 1]}
# tmp = {}
# Init all contacts related to insert
# for contact in range(insert + 1, maxind + 1):
# tmp[contact] = float(insertvalue)
# contacts_dict[insert].update(tmp)
# Contacts scripts #
# ---------------------------------------------------------------------------- #
def pdb_to_contact(pdbpath, threshold):
"""
List of contacts inside given pdb file
:param threshold:
:author: bardiaux
:param pdbpath:
:return:
"""
mapy = []
descmap = {}
with open(pdbpath, 'r') as pdb:
coords = {}
while 1:
l = pdb.readline()
if not l:
break
elif l.startswith('ATOM'):
# Foreach atom
fields = l
atom = fields[12:16].strip()
resid = int(fields[23:26].strip())
if atom[0] != "H":
coords.setdefault(resid, [])
coord = np.array([float(a) for a in [fields[30:38],
fields[38:46],
fields[46:54]]])
coords[resid].append(Atom(name=atom, coords=coord))
k = coords.keys()
k.sort() # sorted resid list
for ii in range(0, len(k)):
# Foreach res i
i = k[ii]
at1 = coords[i] # (x, y, z) coords list (N, CA, C, ...)
for jj in range(ii, len(k)):
# foreach res j (i+1, i+2, ...)
j = k[jj]
found = False
at2 = coords[j]
for a in at1:
# foreach atm of res i
for b in at2:
# foreach atm of res j
if not found and cart_dist(a.coords, b.coords) < \
float(
threshold):
# If no contact found between 2 res and cart dist < t
found = True
if (i, j) not in mapy:
# If contact not already found
mapy.append((i, j))
descmap[Contact(resid1=i, resid2=j)] = {"dist": "%.2f" % cart_dist(a.coords, b.coords),
"atoms": (a.name, b.name)}
return mapy, descmap
def contact2dataframe(contacts, refseq):
dim = len(refseq)
matrix = np.zeros(shape=(dim, dim))
matrix = pd.DataFrame(matrix)
# print matrix
# print contacts
for contact in contacts:
# Contactmatrix humanidx starts at 0 and input contacts at 1
matrix.iloc[int(contact[0]) - 1, int(contact[1]) - 1] = 1
matrix.iloc[int(contact[1]) - 1, int(contact[0]) - 1] = 1
return matrix
# def pdb_to_contact(pdbpath, threshold):
# """
# List of contacts inside given pdb file
# :param threshold:
# :author: bardiaux
# :param pdbpath:
# :return:
# """
# mapy = []
# descmap = {}
#
# with open(pdbpath, 'r') as pdb:
# coords = {}
#
# while 1:
#
# l = pdb.readline()
#
# if not l:
# break
#
# elif l.startswith('ATOM'):
# Foreach atom
#
# fields = l
# atom = fields[12:16].strip()
# resid = int(fields[23:26].strip())
#
# if atom[0] != "H":
#
# coords.setdefault(resid, [])
# coord = np.array([float(a) for a in [fields[30:38],
# fields[38:46],
# fields[46:54]]])
# coords[resid].append(Atom(name=atom, coords=coord))
#
# k = coords.keys()
# k.sort() # sorted resid list
#
# for ii in range(0, len(k)):
# Foreach res i
# i = k[ii]
# at1 = coords[i] # (x, y, z) coords list (N, CA, C, ...)
# for jj in range(ii, len(k)):
# foreach res j (i+1, i+2, ...)
# j = k[jj]
# found = False
# at2 = coords[j]
#
# for a in at1:
# foreach atm of res i
# for b in at2:
# foreach atm of res j
# if not found and cart_dist(a.coords, b.coords) < \
# float(
# threshold):
# If no contact found between 2 res and cart dist < t
# found = True
# if (i, j) not in mapy:
# If contact not already found
# mapy.append((i, j))
# descmap[Contact(resid1=i, resid2=j)] = {"dist": "%.2f" % cart_dist(a.coords, b.coords),
# "atoms": (a.name, b.name)}
#
# return mapy, descmap
# def contact2dataframe(contacts, refseq):
# dim = len(refseq)
# matrix = np.zeros(shape=(dim, dim))
# matrix = pd.DataFrame(matrix)
# print matrix
# print contacts
# for contact in contacts:
# Contactmatrix humanidx starts at 0 and input contacts at 1
# matrix.iloc[int(contact[0]) - 1, int(contact[1]) - 1] = 1
# matrix.iloc[int(contact[1]) - 1, int(contact[0]) - 1] = 1
#
# return matrix
No preview for this file type
......@@ -13,8 +13,6 @@ import aria.legacy.SequenceList as SequenceList
import aria.legacy.AminoAcid as AmnAcd
from .base import (reg_load, Capturing, ppdict)
# import skbio.Protein as skprot
# TODO: interface skbio ??
......@@ -38,6 +36,11 @@ class SsList:
ss_dist_reg = re.compile(r"\s+(\d+\.\d+) \( (\d+\.\d+)\)")
def __init__(self, sett):
"""
:param sett:
:return:
"""
self.settings = sett
self.ss_matrix = []
self.ssdict = {}
......@@ -46,6 +49,10 @@ class SsList:
@property
def index(self):
"""
:return:
"""
if self.ss_matrix:
# Assuming human idx (1 ...) correspond to ss_matrix first column !!!
return [int(_) for _ in zip(*self.ss_matrix)[0]]
......@@ -53,10 +60,21 @@ class SsList:
return []
def check_filetype(self, filename):
"""
:param filename:
:return:
"""
self.filetype = os.path.splitext(filename)[1][1:]
# TODO: check if given file is supported
def read(self, filename, sequence=''):
"""
:param filename:
:param sequence:
:return:
"""
self.check_filetype(filename)
logger.info("Reading secondary structure file %s [%s]" % (
filename,
......@@ -73,6 +91,11 @@ class SsList:
self.seq_sublist(sequence)
def read_psipred(self, filename):
"""
:param filename:
:return:
"""
self.ssdict = reg_load(self.psipred_reg, filename)
# TODO: supprimer psipred_list dans les futures implementations
......@@ -95,11 +118,22 @@ class SsList:
self.ssdict[line_id]['ss_conf']])
def write_ssfasta(self, filename, desc="pdbid"):
"""
:param filename:
:param desc:
:return:
"""
with open(filename, 'w') as psipred:
psipred.write("> %s\n" % desc)
psipred.write("".join([_[0] for _ in zip(*self.ss_matrix)[2]]))
def read_indextableplus(self, filename):
"""
:param filename:
:return:
"""
c = 0
error_list = []
ss_index_dict = {'H': 1, 'C': 1, 'E': 1}
......@@ -134,6 +168,12 @@ class SsList:
str(error_list)))
def _read_ssdist(self, infile, filename=''):
"""
:param infile:
:param filename:
:return:
"""
logger.info("Reading distance file {0}".format(filename))
c = 0
atom_list = []
......@@ -228,6 +268,13 @@ class AminoAcidSequence(SequenceList.SequenceList):
}
def __init__(self, topologyfile, *args, **kwargs):
"""
:param topologyfile:
:param args:
:param kwargs:
:return:
"""
SequenceList.SequenceList.__init__(self, *args, **kwargs)
self._topfile = topologyfile
self._topology = None
......@@ -236,6 +283,10 @@ class AminoAcidSequence(SequenceList.SequenceList):
@property
def humanidx(self):
"""
:return:
"""
return range(1, len(self.sequence) + 1)
@property
......@@ -254,13 +305,26 @@ class AminoAcidSequence(SequenceList.SequenceList):
return self._topology
def __getitem__(self, key):
"""
:param key:
:return:
"""
return self.aalist[key]
def __repr__(self):
"""
:return:
"""
return "Amino Acid sequence %s (%d)" % (self.sequence,
len(self.sequence))
def readtopo(self):
"""
:return:
"""
topo = {}
with pkgr.resource_stream(__name__, self._topfile) as tpf:
res_flag = False
......@@ -302,6 +366,11 @@ class AminoAcidSequence(SequenceList.SequenceList):
return topo
def read(self, filename):
"""
:param filename:
:return:
"""
# TODO: smarter reader checking type of file (fasta, etc ...)
with Capturing() as output:
self.ReadFasta(filename)
......@@ -315,6 +384,11 @@ class AminoAcidSequence(SequenceList.SequenceList):
class Protein:
def __init__(self, sett):
"""
:param sett:
:return:
"""
self.aa_sequence = AminoAcidSequence(sett.TOPO)
self.sec_struct = SsList(sett)
self.index = [] # Index starting from 1
......@@ -345,9 +419,19 @@ class Protein:
@property
def topology(self):
"""
:return:
"""
return self.aa_sequence.topology
def set_aa_sequence(self, filename, ssidx=False):
"""
:param filename:
:param ssidx:
:return:
"""
self.aa_sequence.read(filename)
self.index = range(1, len(self.aa_sequence.sequence) + 1)
if self.sec_struct.ss_matrix:
......@@ -359,6 +443,13 @@ class Protein:
self.sec_struct.index)