Commit 6c3162f7 authored by Fabrice Allain's avatar Fabrice Allain
Browse files

merging not fully functionnal

parent 912fc3b2
...@@ -27,19 +27,4 @@ $Revision: 1.1.1.1 $ ...@@ -27,19 +27,4 @@ $Revision: 1.1.1.1 $
$Date: 2010/03/23 15:27:24 $ $Date: 2010/03/23 15:27:24 $
""" """
__all__ = ["Analyser", "ariabase", "AriaPeak", "AriaXML", "Assignment", from core import *
"AssignmentFilter", "Atom", "Calibrator", "ccpn2top", \ No newline at end of file
"ccpn_conversion", "Chain", "ChemicalShiftFilter", "Cluster",
"ChemicalShiftList", "cns", "Contribution", "ContributionAssigner",
"conversion", "ConversionTable", "CovalentDistances", "CrossPeak",
"CrossPeakFilter", "DataContainer", "Datum", "Experiment",
"exportToCcpn", "Factory", "FloatFile", "importFromCcpn",
"Infrastructure", "Iteration", "JobManager", "mathutils", "Merger",
"Molecule", "MolMol", "Molprobity", "Network", "NOEModel",
"NOESYSpectrum", "NOESYSpectrumFilter" "OrderedDict", "PDBReader",
"PeakAssigner", "Project", "Protocol", "Relaxation", "Report",
"Residue", "RmsReport", "Settings", "ShiftAssignment",
"ShiftAssignmentFilter", "Singleton", "SpinPair",
"StructureEnsemble", "SuperImposer", "tools", "Topology",
"TypeChecking", "ViolationAnalyser", "WhatifProfile", "xmlparser",
"xmlutils"]
...@@ -4,26 +4,26 @@ Created on 4/7/17 ...@@ -4,26 +4,26 @@ Created on 4/7/17
@author: fallain @author: fallain
""" """
import re
import os
import logging import logging
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import os
import re
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
from collections import OrderedDict
from Bio.PDB import PDBParser, PDBIO from Bio.PDB import PDBParser, PDBIO
from sklearn.decomposition import PCA from ..core.AriaXML import AriaXMLPickler
from aria.AriaXML import AriaXMLPickler from ..core.DataContainer import DATA_SEQUENCE
from ..core.SuperImposer import SuperImposer
from collections import OrderedDict
from glob import glob
from matplotlib.colors import ListedColormap
from matplotlib.lines import Line2D from matplotlib.lines import Line2D
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
from aria.SuperImposer import SuperImposer from sklearn.decomposition import PCA
from .converter import AriaEcXMLConverter
from .common import NotDisordered, Capturing
from matplotlib.colors import ListedColormap
from aria.DataContainer import DATA_SEQUENCE
from aria.StructureEnsemble import StructureEnsemble, StructureEnsembleSettings
from ..core.StructureEnsemble import StructureEnsemble, StructureEnsembleSettings
from .common import NotDisordered, Capturing
from .converter import AriaEcXMLConverter
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
......
...@@ -177,8 +177,8 @@ class AriaEcCommands(object): ...@@ -177,8 +177,8 @@ class AriaEcCommands(object):
group = parser.add_argument_group('required arguments') group = parser.add_argument_group('required arguments')
group.add_argument("seq", action=ReadableFile, group.add_argument("seq", action=ReadableFile,
help="sequence file [FASTA]") help="sequence file [FASTA]")
group.add_argument("sspred", action=ReadableFile, # group.add_argument("sspred", action=ReadableFile,
help="secondary structure prediction file") # help="secondary structure prediction file")
group.add_argument("infiles", nargs="+", metavar="infile", group.add_argument("infiles", nargs="+", metavar="infile",
action=ReadableFile, action=ReadableFile,
help="contact or pdb file(s) used to build aria " help="contact or pdb file(s) used to build aria "
...@@ -189,6 +189,12 @@ class AriaEcCommands(object): ...@@ -189,6 +189,12 @@ class AriaEcCommands(object):
"use distances in the given file as " "use distances in the given file as "
"target distance to build distance " "target distance to build distance "
"restraints") "restraints")
group.add_argument("-s", "--ssfile", dest="sspred", action=ReadableFile,
help="secondary structure prediction file")
group.add_argument("-p", "--ariaproject", dest="ariaproject",
action=ReadableFile,
help="ARIA project file. This file will be used as"
"an initialization file if")
group.add_argument("-t", "--type", required=True, group.add_argument("-t", "--type", required=True,
nargs="+", dest="contact_types", nargs="+", dest="contact_types",
choices=self.contact_types, help="Infile(s) contact " choices=self.contact_types, help="Infile(s) contact "
...@@ -201,6 +207,10 @@ class AriaEcCommands(object): ...@@ -201,6 +207,10 @@ class AriaEcCommands(object):
default=False, help="Use secondary structure index") default=False, help="Use secondary structure index")
group.add_argument("--no-filter", dest="no_filter", action="store_true", group.add_argument("--no-filter", dest="no_filter", action="store_true",
default=False, help="Do not filter contact map.") default=False, help="Do not filter contact map.")
group.add_argument("--extract-all", dest="extractall", action="store_true",
default=False, help="Extract data or all data and"
"parameters if an ARIA project"
"is defined with -p option")
return parser return parser
def _bbconv_argparser(self, desc=None): def _bbconv_argparser(self, desc=None):
...@@ -272,12 +282,17 @@ class AriaEcCommands(object): ...@@ -272,12 +282,17 @@ class AriaEcCommands(object):
parser.add_argument("--onlyreport", dest="onlyreport", parser.add_argument("--onlyreport", dest="onlyreport",
action="store_true", action="store_true",
default=False, help="Generate only report file") default=False, help="Generate only report file")
parser.add_argument("--no-filter", dest="no_filter", action="store_true",
default=False, help="Do not filter contact map.")
parser.add_argument("--ssidx", dest="ssidx", action="store_true", parser.add_argument("--ssidx", dest="ssidx", action="store_true",
default=False, default=False,
help="Use secondary structure index") help="Use secondary structure index")
parser.add_argument("--prefix", dest="prefix", default=False, parser.add_argument("--prefix", dest="prefix", action="store_true",
action="store_true", default="",
help="Add specific prefix to generated file names") help="Generate prefix for file names")
parser.add_argument("--prefixname", dest="prefixname",
default="",
help="Prefix name for file names")
return parser return parser
@staticmethod @staticmethod
......
...@@ -62,7 +62,6 @@ class TqdmToLogger(io.StringIO): ...@@ -62,7 +62,6 @@ class TqdmToLogger(io.StringIO):
""" """
self.logger.log(self.level, self.buf) self.logger.log(self.level, self.buf)
# Code below adapated from an answer of klaus se on stackoverflow # Code below adapated from an answer of klaus se on stackoverflow
# (http://stackoverflow.com/a/16071616) # (http://stackoverflow.com/a/16071616)
def worker(f, task_queue, done_queue): def worker(f, task_queue, done_queue):
......
...@@ -31,10 +31,11 @@ clashlist_executable: ...@@ -31,10 +31,11 @@ clashlist_executable:
; Contact definition section used to define maplot from pdb file. ; Contact definition section used to define maplot from pdb file.
; Decrease this threshold if using other cutoff (e.g. 5.0) ; Decrease this threshold if using other cutoff (e.g. 5.0)
default_cutoff: 8.0 default_cutoff: 8.0
; Add contact cutoff folowwing the syntax atm1_atm2 ; Add contact cutoff folowwing the syntax all, atm1_atm2 or sc_sc for side chains
;ca_ca: 7.0 ;all:
;cb_cb: 7.0 ;ca_ca:
;sc_sc: 5.0 ;cb_cb:
;sc_sc:
[setup] [setup]
; ------------------------------ TBL parameters ------------------------------ # ; ------------------------------ TBL parameters ------------------------------ #
...@@ -68,10 +69,6 @@ hb_dplus: 0.5 ...@@ -68,10 +69,6 @@ hb_dplus: 0.5
; neighborhood_contact : True, False [False] ; neighborhood_contact : True, False [False]
; Generate restraints for neighbors foreach ; Generate restraints for neighbors foreach
; contact in the contact map ; contact in the contact map
; pair_list : all, heavy, min [min]
; use all, heavy atms or from a minimized
; list (CA, CB, SC) for contribution list for
; each distance restraint
; atoms_type : all, heavy, min [min] ; atoms_type : all, heavy, min [min]
; use all, heavy atms or from a minimized ; use all, heavy atms or from a minimized
; list (CA, CB, SC) for contribution list for ; list (CA, CB, SC) for contribution list for
......
This diff is collapsed.
...@@ -24,7 +24,8 @@ class AriaEcContactMap(object): ...@@ -24,7 +24,8 @@ class AriaEcContactMap(object):
self.protein = Protein(settings) self.protein = Protein(settings)
self.file_reader = MapFileListReader( self.file_reader = MapFileListReader(
cont_def=settings.contactdef.config) cont_def=settings.contactdef.config)
self.filter = MapFilter(settings.setup.config) self.filter = MapFilter(settings.setup.config,
nofilter=settings.maplot.args.get("no_filter"))
self.protname = '' self.protname = ''
self.allresmap = {} self.allresmap = {}
self.refmap = None self.refmap = None
...@@ -140,17 +141,21 @@ class AriaEcContactMap(object): ...@@ -140,17 +141,21 @@ class AriaEcContactMap(object):
# ------------------------------ Output ------------------------------ # # ------------------------------ Output ------------------------------ #
for mapname, mapt, mapath in self.allresmap.keys(): for mapname, mapt, mapath in self.allresmap.keys():
prefix = self.settings.maplot.args.get("prefixname") if self.settings.maplot.args.get("prefixname") else ""
prefix = "%s_%svs%s" % (self.protname, mapt, self.reftype) if self.settings.maplot.args.get("prefix") else ""
if mapname == self.refname: if mapname == self.refname:
if self.settings.maplot.args.get("onlyreport", False) is not False: if not self.settings.maplot.args.get("onlyreport", False):
refmap.write_contacts(mapname, refmap.write_contacts(mapname, prefix=prefix,
outdir=outdir, outdir=outdir,
scoremap=self.refmap.get("scoremap", scoremap=self.refmap.get("scoremap",
None)) None))
continue continue
prefix = "%s_%svs%s" % (self.protname, mapt, self.reftype) if \
self.settings.maplot.args.get("prefix") and not \
self.settings.maplot.args.get("prefixname") else \
self.settings.maplot.args.get("prefixname") if \
self.settings.maplot.args.get("prefixname") else ""
scoremap = self.allresmap[(mapname, mapt, mapath)].get( scoremap = self.allresmap[(mapname, mapt, mapath)].get(
'scoremap', None) 'scoremap', None)
# if self.allresmap[mapt].get("maplot") is not None and \ # if self.allresmap[mapt].get("maplot") is not None and \
......
...@@ -3,25 +3,23 @@ ...@@ -3,25 +3,23 @@
PDB distance distribution generation PDB distance distribution generation
""" """
import os
import sys
import logging import logging
import os
import pandas as pd import pandas as pd
import pbxplore as pbx import pbxplore as pbx
from glob import glob import sys
from Bio.PDB import PDBList, PDBParser, Selection, is_aa, NeighborSearch, \ from Bio.PDB import PDBList, PDBParser, Selection, is_aa, NeighborSearch, \
MMCIFParser MMCIFParser
from Bio.PDB.DSSP import dssp_dict_from_pdb_file from Bio.PDB.DSSP import dssp_dict_from_pdb_file
from future.utils import iteritems
from collections import defaultdict, OrderedDict from collections import defaultdict, OrderedDict
from future.utils import iteritems
from glob import glob
from ..core.legacy.AminoAcid import AminoAcid
# from .base import ppdict # from .base import ppdict
from .common import Capturing from .common import Capturing
from .reader import CulledPdbFile
from .protmap import ResAtmMap from .protmap import ResAtmMap
from .reader import CulledPdbFile
from aria.legacy.AminoAcid import AminoAcid
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
......
...@@ -5,14 +5,13 @@ Created on 9/5/16 ...@@ -5,14 +5,13 @@ Created on 9/5/16
Derived from qual.py script by Dr. Benjamin Bardiaux Derived from qual.py script by Dr. Benjamin Bardiaux
""" """
import logging
import os import os
import shutil import shutil
import logging
from aria.legacy.QualityChecks import QualityChecks from ..core.legacy import QualityChecks
from .common import CommandProtocol from .common import CommandProtocol
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
......
...@@ -3,22 +3,21 @@ ...@@ -3,22 +3,21 @@
PDB distance distribution analysis PDB distance distribution analysis
""" """
import os
import re
import pickle
import logging
import itertools import itertools
import logging
import numpy as np import numpy as np
import os
import pandas as pd import pandas as pd
import pickle
import re
import sklearn.mixture as mixture import sklearn.mixture as mixture
from ..core.ConversionTable import ConversionTable
from tqdm import tqdm
from .protmap import SsAaAtmMap
from .common import TqdmToLogger
from aria.legacy.AminoAcid import AminoAcid
from aria.ConversionTable import ConversionTable
from pathos.multiprocessing import ProcessingPool from pathos.multiprocessing import ProcessingPool
from tqdm import tqdm
from ..core.legacy.AminoAcid import AminoAcid
from .common import TqdmToLogger
from .protmap import SsAaAtmMap
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
...@@ -173,6 +172,7 @@ class PDBStat(object): ...@@ -173,6 +172,7 @@ class PDBStat(object):
df = df.append(tmp) df = df.append(tmp)
# TODO: CHANGE DEFAULT SELECTION CRITERIA TO AIC
if bic < lowest_bic: if bic < lowest_bic:
lowest_bic = bic lowest_bic = bic
best_gmm = gmm best_gmm = gmm
......
...@@ -6,15 +6,18 @@ ...@@ -6,15 +6,18 @@
from __future__ import absolute_import, division, print_function, \ from __future__ import absolute_import, division, print_function, \
unicode_literals unicode_literals
import os # from ..core import legacy.SequenceList as SequenceList
import sys from ..core.legacy import SequenceList as SequenceList
import re
import logging import logging
import os
import pkg_resources as pkgr import pkg_resources as pkgr
import aria.legacy.SequenceList as SequenceList import re
import aria.legacy.AminoAcid as AmnAcd import sys
from six import iteritems, text_type from six import iteritems, text_type
from ..core.legacy import AminoAcid as AmnAcd
from .common import (reg_load, ppdict) from .common import (reg_load, ppdict)
# import skbio.Protein as skprot # import skbio.Protein as skprot
# TODO: interface skbio ?? # TODO: interface skbio ??
...@@ -30,6 +33,12 @@ class SsList(object): ...@@ -30,6 +33,12 @@ class SsList(object):
r'\s+(?P<ss_pred>[HEC])' r'\s+(?P<ss_pred>[HEC])'
r'\s+(?P<ss_conf>\d?)') r'\s+(?P<ss_conf>\d?)')
psipred2_reg = re.compile(r'^(?P<ss_pred>[HEC]+)') psipred2_reg = re.compile(r'^(?P<ss_pred>[HEC]+)')
psipred3_reg = re.compile(r'^\s*(?P<up_index>\d+)'
r'\s+(?P<up_residue>[AC-IK-NP-TVWYZ])'
r'\s+(?P<ss_pred>[HEC])'
r'\s+(?P<dunno1>\d?\.?\d*)'
r'\s+(?P<dunno2>\d?\.?\d*)'
r'\s+(?P<dunno3>\d?\.?\d*)')
indxplus_reg = re.compile( indxplus_reg = re.compile(
r'^(?P<up_index>\d+)\s+(?P<up_residue>[AC-IK-NP-TVWYZ])\s+' r'^(?P<up_index>\d+)\s+(?P<up_residue>[AC-IK-NP-TVWYZ])\s+'
r'(?P<ss_pred>[CEH])\s+(?P<ss_conf>\d)\s+(?P<msa_index>[\d\-]+)\s+' r'(?P<ss_pred>[CEH])\s+(?P<ss_conf>\d)\s+(?P<msa_index>[\d\-]+)\s+'
...@@ -52,6 +61,12 @@ class SsList(object): ...@@ -52,6 +61,12 @@ class SsList(object):
self.ssdist = {} self.ssdist = {}
self.filetype = '' self.filetype = ''
def __bool__(self):
return True if self.ss_matrix else False
def __nonzero__(self):
return self.__bool__()
@property @property
def index(self): def index(self):
""":return:""" """:return:"""
...@@ -111,6 +126,8 @@ class SsList(object): ...@@ -111,6 +126,8 @@ class SsList(object):
# TODO: better read with getattr # TODO: better read with getattr
if self.filetype == "indextableplus": if self.filetype == "indextableplus":
self.read_indextableplus(filename) self.read_indextableplus(filename)
elif self.filetype == "ss2":
self.read_psipred(filename, ss2=True)
else: else:
self.read_psipred(filename) self.read_psipred(filename)
...@@ -118,7 +135,7 @@ class SsList(object): ...@@ -118,7 +135,7 @@ class SsList(object):
"Secondary structure dict:\n%s", self.ss_matrix, "Secondary structure dict:\n%s", self.ss_matrix,
self.ssdict) self.ssdict)
def read_psipred(self, filename): def read_psipred(self, filename, ss2=False):
""" """
...@@ -132,14 +149,17 @@ class SsList(object): ...@@ -132,14 +149,17 @@ class SsList(object):
""" """
if ss2:
self.ssdict = reg_load(self.psipred_reg, filename) self.ssdict = reg_load(self.psipred3_reg, filename)
else:
self.ssdict = reg_load(self.psipred_reg, filename)
# TODO: supprimer psipred_list dans les futures implementations # TODO: supprimer psipred_list dans les futures implementations
ss_index_dict = {'H': 1, 'C': 1, 'E': 1} ss_index_dict = {'H': 1, 'C': 1, 'E': 1}
for line_id in sorted(self.ssdict.keys()): for line_id in sorted(self.ssdict.keys()):
# Modif champ ss_pred # Modif champ ss_pred
# Si line_id # Si line_id
if line_id > 1 and self.ssdict[line_id]['ss_pred'] not in \ if line_id != min(self.ssdict.keys()) and \
self.ssdict[line_id]['ss_pred'] not in \
self.ssdict[line_id - 1]['ss_pred']: self.ssdict[line_id - 1]['ss_pred']:
# If next ss isn't the same, increment relative struct in # If next ss isn't the same, increment relative struct in
# ss_index_dict # ss_index_dict
...@@ -152,7 +172,7 @@ class SsList(object): ...@@ -152,7 +172,7 @@ class SsList(object):
self.ss_matrix.append([self.ssdict[line_id]['up_index'], self.ss_matrix.append([self.ssdict[line_id]['up_index'],
self.ssdict[line_id]['up_residue'], self.ssdict[line_id]['up_residue'],
self.ssdict[line_id]['ss_pred'], self.ssdict[line_id]['ss_pred'],
self.ssdict[line_id]['ss_conf']]) self.ssdict[line_id].get('ss_conf')])
def write_ssfasta(self, filename, desc="pdbid"): def write_ssfasta(self, filename, desc="pdbid"):
""" """
...@@ -478,7 +498,10 @@ class AminoAcidSequence(SequenceList.SequenceList, object): ...@@ -478,7 +498,10 @@ class AminoAcidSequence(SequenceList.SequenceList, object):
# TODO: smarter reader checking type of file (fasta, etc ...) # TODO: smarter reader checking type of file (fasta, etc ...)
# TODO: capturing has some troubles with unicode ... # TODO: capturing has some troubles with unicode ...
# with Capturing() as output: # with Capturing() as output:
self.ReadFasta(text_type(filename)) if os.path.splitext(filename)[1] == '.seq':
self.ReadSeq(text_type(filename))
else:
self.ReadFasta(text_type(filename))
# LOG.info(''.join(output)) # LOG.info(''.join(output))
self.sequence = "".join((AmnAcd.AminoAcid(str(_))[0] for _ in self.sequence = "".join((AmnAcd.AminoAcid(str(_))[0] for _ in
......
This diff is collapsed.
...@@ -8,9 +8,11 @@ import os ...@@ -8,9 +8,11 @@ import os
import re import re
import logging import logging
import os.path import os.path
import numpy as np
import collections import collections
import pkg_resources as pkgr import pkg_resources as pkgr
import scipy.spatial.distance as distance import scipy.spatial.distance as distance
from Bio import pairwise2
from .common import sort_2dict from .common import sort_2dict
from .protmap import (ResMap, ResAtmMap) from .protmap import (ResMap, ResAtmMap)
...@@ -182,8 +184,8 @@ class MapFile(RegexFile): ...@@ -182,8 +184,8 @@ class MapFile(RegexFile):
r'(?P<placeholder>\d),(?P<res1_cons>\d+),' r'(?P<placeholder>\d),(?P<res1_cons>\d+),'
r'(?P<res2_cons>\d+),(?P<ss_filter>\d|\d{3}),' r'(?P<res2_cons>\d+),(?P<ss_filter>\d|\d{3}),'
r'(?P<high_cons_filter>\d|\d{3}),' r'(?P<high_cons_filter>\d|\d{3}),'
r'(?P<cc_filter>\d|\d{3}),(?P<res1_1l_code>\w),' r'(?P<cc_filter>\d|\d{3}),(?P<res1_name>\w),'
r'(?P<res2_1l_code>\w)$'), r'(?P<res2_name>\w)$'),
"score_field": "ec_score" "score_field": "ec_score"
}, },
"pconsc": { "pconsc": {
...@@ -318,9 +320,11 @@ class MapFile(RegexFile): ...@@ -318,9 +320,11 @@ class MapFile(RegexFile):
"distmap": None, "distmap": None,
"maplot": None, "maplot": None,
"scoremap": None} "scoremap": None}
self.clashlist = None self.clashlist = []
self.contactlist = None self.distlist = []
self.flaglist = None self.contactlist = []
self.flaglist = {}
self.scorelist = []
# self.maplot = None # self.maplot = None
# self.distmap = None # self.distmap = None
...@@ -356,27 +360,6 @@ class MapFile(RegexFile): ...@@ -356,27 +360,6 @@ class MapFile(RegexFile):
raise NotImplementedError("Class %s doesn't implement create_map" % raise NotImplementedError("Class %s doesn't implement create_map" %
self.__class__.__name__) self.__class__.__name__)
def check_contacts(self, aa_seq):
"""
Check if plm_dict is consistent with input sequence
Parameters
----------
aa_seq :
Returns
-------
"""
LOG.info("Checking consistency of contacts with input sequence")
for line in self.lines:
if self.lines[line]['res1_name'] != aa_seq[int(self.lines[line]['res1_nb']) - 1] \
or self.lines[line]['res2_name'] != aa_seq[int(self.lines[line]['res2_nb']) - 1]:
LOG.error("Difference between given sequence and residu "
"names in contact file at line %d !", line)
def update_map(self, resmap): def update_map(self, resmap):
""" """