Commit 71905c34 authored by Fabrice  ALLAIN's avatar Fabrice ALLAIN
Browse files

Removed annoying warnings with metadata attributes in protmap.py

parent 1873f8ac
......@@ -10,4 +10,6 @@ docs.old*
docs/_static/
docs/_templates/
.svn/
examples-dev
examples-dev/out
venv*/
Please read this!
Before opening a new issue, make sure to search for keywords in the issues
filtered by the "regression" or "bug" label.
For the Community Edition issue tracker:
- https://gitlab.com/gitlab-org/gitlab-ce/issues?label_name%5B%5D=regression
- https://gitlab.com/gitlab-org/gitlab-ce/issues?label_name%5B%5D=bug
For the Enterprise Edition issue tracker:
- https://gitlab.com/gitlab-org/gitlab-ee/issues?label_name%5B%5D=regression
- https://gitlab.com/gitlab-org/gitlab-ee/issues?label_name%5B%5D=bug
and verify the issue you're about to submit isn't a duplicate.
Please remove this notice if you're confident your issue isn't a duplicate.
------
### Summary
(Summarize the bug encountered concisely)
### Steps to reproduce
(How one can reproduce the issue - this is very important)
### Example Project
(If possible, please create an example project here on GitLab.com that exhibits the problematic behaviour, and link to it here in the bug report)
(If you are using an older version of GitLab, this will also determine whether the bug has been fixed in a more recent version)
### What is the current *bug* behavior?
(What actually happens)
### What is the expected *correct* behavior?
(What you should see instead)
### Relevant logs and/or screenshots
(Paste any relevant logs - please use code blocks (```) to format console output,
logs, and code as it's very hard to read otherwise.)
### Output of checks
(If you are reporting a bug on GitLab.com, write: This bug happens on GitLab.com)
#### Results of GitLab environment info
<details>
<summary>Expand for output related to GitLab environment info</summary>
<pre>
(For installations with omnibus-gitlab package run and paste the output of:
`sudo gitlab-rake gitlab:env:info`)
(For installations from source run and paste the output of:
`sudo -u git -H bundle exec rake gitlab:env:info RAILS_ENV=production`)
</pre>
</details>
#### Results of GitLab application Check
<details>
<summary>Expand for output related to the GitLab application check</summary>
<pre>
(For installations with omnibus-gitlab package run and paste the output of:
`sudo gitlab-rake gitlab:check SANITIZE=true`)
(For installations from source run and paste the output of:
`sudo -u git -H bundle exec rake gitlab:check RAILS_ENV=production SANITIZE=true`)
(we will only investigate if the tests are passing)
</pre>
</details>
### Possible fixes
(If you can, link to the line of code that might be responsible for the problem)
/label ~bug
### Description
(Include problem, use cases, benefits, and/or goals)
### Proposal
### Links / references
/label ~"feature proposal"
### Background:
(Include problem, use cases, benefits, and/or goals)
**What questions are you trying to answer?**
**Are you looking to verify an existing hypothesis or uncover new issues you should be exploring?**
**What is the backstory of this project and how does it impact the approach?**
**What do you already know about the areas you are exploring?**
**What does success look like at the end of the project?**
### Links / references:
/label ~"UX research"
Add a description of your merge request here. Merge requests without an adequate
description will not be reviewed until one is added.
## Database Checklist
When adding migrations:
- [ ] Updated `db/schema.rb`
- [ ] Added a `down` method so the migration can be reverted
- [ ] Added the output of the migration(s) to the MR body
- [ ] Added tests for the migration in `spec/migrations` if necessary (e.g. when migrating data)
When adding or modifying queries to improve performance:
- [ ] Included data that shows the performance improvement, preferably in the form of a benchmark
- [ ] Included the output of `EXPLAIN (ANALYZE, BUFFERS)` of the relevant queries
When adding foreign keys to existing tables:
- [ ] Included a migration to remove orphaned rows in the source table before adding the foreign key
- [ ] Removed any instances of `dependent: ...` that may no longer be necessary
When adding tables:
- [ ] Ordered columns based on the [Ordering Table Columns](https://docs.gitlab.com/ee/development/ordering_table_columns.html#ordering-table-columns) guidelines
- [ ] Added foreign keys to any columns pointing to data in other tables
- [ ] Added indexes for fields that are used in statements such as WHERE, ORDER BY, GROUP BY, and JOINs
When removing columns, tables, indexes or other structures:
- [ ] Removed these in a post-deployment migration
- [ ] Made sure the application no longer uses (or ignores) these structures
## General Checklist
- [ ] [Changelog entry](https://docs.gitlab.com/ce/development/changelog.html) added, if necessary
- [ ] [Documentation created/updated](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/development/doc_styleguide.md)
- [ ] API support added
- [ ] Tests added for this feature/bug
- Review
- [ ] Has been reviewed by Backend
- [ ] Has been reviewed by Database
- [ ] Conform by the [merge request performance guides](http://docs.gitlab.com/ce/development/merge_request_performance_guidelines.html)
- [ ] Conform by the [style guides](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/CONTRIBUTING.md#style-guides)
- [ ] [Squashed related commits together](https://git-scm.com/book/en/Git-Tools-Rewriting-History#Squashing-Commits)
See the general Documentation guidelines http://docs.gitlab.com/ce/development/doc_styleguide.html
## What does this MR do?
(briefly describe what this MR is about)
## Moving docs to a new location?
See the guidelines: http://docs.gitlab.com/ce/development/doc_styleguide.html#changing-document-location
- [ ] Make sure the old link is not removed and has its contents replaced with a link to the new location.
- [ ] Make sure internal links pointing to the document in question are not broken.
- [ ] Search and replace any links referring to old docs in GitLab Rails app, specifically under the `app/views/` directory.
- [ ] Make sure to add [`redirect_from`](https://docs.gitlab.com/ee/development/doc_styleguide.html#redirections-for-pages-with-disqus-comments) to the new document if there are any Disqus comments on the old document thread.
- [ ] If working on CE, submit an MR to EE with the changes as well.
- [ ] Ping one of the technical writers for review.
......@@ -14,11 +14,13 @@ from .settings import AriaEcSettings
from .maplot import AriaEcContactMap
from .converter import AriaEcBbConverter, AriaEcXMLConverter, \
AriaEcConfigConverter
from .setup import AriaEcSetup
from .pdbqual import AriaEcPdbqual
from .reader import MapFile
from .pdbdist import PDBDist
from .pdbstat import PDBStat
from .setup import AriaEcSetup
from .pdbqual import AriaEcPdbqual
from .analysis import EnsembleAnalysis
from conkit.io import CONTACT_FILE_PARSERS
LOG = logging.getLogger(__name__)
......@@ -67,7 +69,16 @@ class ReadableFile(argp.Action):
# TODO: Make parent Command class with _create_argparser, self.args,
# update_logger and run
class AriaEcCommands(object):
"""Argparse interface for aria_ec"""
"""
Command line interface for aria_ec
Attributes
----------
AriaEcCommands.command_list: list
available command line tools
AriaEcCommands.desc_list: list
"""
command_list = ("setup", "bbconv", "maplot", "pdbqual", "analysis",
"tbl2xml", "pdbdist", "pdbstat", "iniconv")
......@@ -81,10 +92,9 @@ class AriaEcCommands(object):
u"Extract distance distribution from culled list of pdb files",
u"Analyze distance distribution with GMM",
u"Convert configuration files into a unique csv file")
contact_types = ("evfold", "plmev", "plm", "plmdca", "plmc", "bbcontacts",
"pconsc", "pconsc1", "pconsc2", "psicov", "metapsicovhb",
"metapsicov_stg1", "metapsicov_stg2", "gremlin", "pdb",
"native", "native_full", "contactlist")
contact_types = set(MapFile.types).union(
set(CONTACT_FILE_PARSERS))
default_confile = "conf/config.ini"
def __init__(self, custom_logging=None):
......
......@@ -519,7 +519,6 @@ class CustomLogging(object):
================================================================================
'''.format(self.msg)
print(desc)
for hand in self.config.get("handlers"):
if "filename" in self.config["handlers"][hand]:
with open(self.config["handlers"][hand]["filename"],
......
......@@ -69,7 +69,7 @@ hb_dplus: 0.5
; neighborhood_contact : True, False [False]
; Generate restraints for neighbors foreach
; contact in the contact map
; atoms_type : all, heavy, min [min]
; atom_groups : all, heavy, min [min]
; use all, heavy atms or from a minimized
; list (CA, CB, SC) for contribution list for
; each distance restraint
......@@ -103,7 +103,7 @@ hb_dplus: 0.5
native_reliable: False
evfold_weight: False
neighborhood_contact: False
atoms_type: min
atom_groups: min
contributions_type: same
distance_type: fixed
groupby_method: min
......
......@@ -933,7 +933,7 @@ class AriaEcXMLConverter(AriaXMLConverter):
# TODO: use precision level, actually we only use SsAaAtmMaps
precision_level = self.settings.setup.config.get(
"pdbdistance_level", "ss")
atom_types = self.settings.setup.config.get("atoms_type", "min")
atom_groups = self.settings.setup.config.get("atom_groups", "min")
# TODO: Do the same for heavy matrices, it is actually hard coded to min
# matrices sinces we only have this kind of information in data/pdbdists
......@@ -951,7 +951,7 @@ class AriaEcXMLConverter(AriaXMLConverter):
path = self.settings.main.config.get(pdbstasts_group[1])
pkgpath = getattr(self.settings, pdbstasts_group[0])
# TODO: ONLY MIN PAIR ATOMS FOR TARGET PDB DIST
targetmap = SsAaAtmMap(atom_types=atom_types)
targetmap = SsAaAtmMap(atom_groups=atom_groups)
if path and not os.path.exists(path) or not path:
with pkgr.resource_stream(__name__, pkgpath) as pkgfile:
......@@ -1297,7 +1297,7 @@ class AriaEcXMLConverter(AriaXMLConverter):
neigh_flag = self.settings.setup.config['neighborhood_contact']
adr_flag = self.settings.setup.config["ambiguous_distance_restraint"]
contribs_type = self.settings.setup.config["contributions_type"]
atms_type = self.settings.setup.config["atoms_type"]
atm_groups = self.settings.setup.config["atom_groups"]
default_target_dist = self.settings.setup.config["restraint_distance"]
# TODO: Lower bound and upper bound matrix not used actually...
targetdistmap = targetdists.get('INTERTARGET')
......@@ -1345,7 +1345,7 @@ class AriaEcXMLConverter(AriaXMLConverter):
for subcontact in subcontacts:
contribs_lists += self.atm_product(
subcontact[0], resname_3l(subcontact[0]), subcontact[1],
resname_3l(subcontact[1]), list_type=atms_type,
resname_3l(subcontact[1]), list_type=atm_groups,
adr_flag=adr_flag, product_type=contribs_type)
# We have a unique atmpair list, trick to have a unique loop for
......
......@@ -38,9 +38,29 @@ LOG = logging.getLogger(__name__)
class Map(pd.DataFrame):
"""Distance/contact matrix"""
"""
Distance/contact matrix abstract class accepting only one type of value according to mtype arg
Examples
--------
Score/distance matrix
>>> d = Map(index=[0,1,2], columns=[0,1,2])
>>> d
0 1 2
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
>>> d = Map(index=[0,1,2], columns=[0,1,2], mtype='contact')
>>> d
0 1 2
0 False False False
1 False False False
2 False False False
"""
mtype_choices = {'contact': bool, 'distance': float, "score": float}
_metadata = pd.DataFrame._metadata + ['_sort_list']
def update(self, *args, **kwargs):
"""
......@@ -94,7 +114,7 @@ class Map(pd.DataFrame):
self.mtype = mtype
self.dtype = dtype
if mtype == "score":
self.sort_list = []
self._sort_list = []
self.sym = sym
self.desc = desc
self.path = path
......@@ -102,6 +122,10 @@ class Map(pd.DataFrame):
def __str__(self):
return super(Map, self).__str__()
@property
def sort_list(self):
return self._sort_list
def sortedset(self, human_idx=False):
"""
......@@ -117,11 +141,6 @@ class Map(pd.DataFrame):
"""
# Remove duplicate in sort_list
"""
:param human_idx:
:return:
"""
n = 1 if human_idx else 0
if hasattr(self, "sort_list"):
if self.sym:
......@@ -194,10 +213,6 @@ class Map(pd.DataFrame):
"""
# Reset values at positions in rm_list
"""
:param rm_list:
"""
value = False if self.dtype == bool else np.NaN
for contact in rm_list:
idx1, idx2 = self.index[contact[0]], self.index[contact[1]]
......@@ -274,9 +289,15 @@ class Map(pd.DataFrame):
idx = self.index.get_level_values(level) == idxval
self.loc[idx, idx] = pairdict[idxval]
# TODO: remove sym attribute --> information is duplicated !!!!
def set_value(self, index, col, value, **kwargs):
"""
Assign value at [index, col] in the related dataframe
>>> d = Map(index=[0,1,2], columns=[0,1,2])
>>> d.set_value(1, 2, 2)
>>> d.at[1, 2]
2.0
Parameters
----------
......@@ -294,9 +315,13 @@ class Map(pd.DataFrame):
"""
super(Map, self).set_value(index, col, value, **kwargs)
LOG.debug("Update {index}, {col} with {value} value {default}".format(index=index, col=col, value=value,
default=self.at[index, col]))
# super(Map, self).set_value(index, col, value, **kwargs)
super(Map, self).at[index, col] = value
if self.sym:
super(Map, self).set_value(col, index, value, **kwargs)
super(Map, self).at[col, index] = value
# super(Map, self).set_value(col, index, value, **kwargs)
# TODO: Matrices PosAaAtmMap, AaAtmMap, AtmMap
......@@ -310,14 +335,18 @@ class ProteinMap(Map):
all_reg = re.compile(r"^((?!cns|dyana).*)$")
# TODO: Autre methodes de dist
distance_method = 'euclidean'
_metadata = Map._metadata + ['_evflags', '_maplot', '_sequence',
'_atom_groups']
def __init__(self, sequence, **kwargs):
def __init__(self, sequence, flaglist=None, atom_groups="min", **kwargs):
if kwargs.get("index") is None or kwargs.get("columns") is None:
kwargs["index"], kwargs["columns"] = self.create_index(
sequence, **kwargs)
super(ProteinMap, self).__init__(**kwargs)
self.contact_flags = kwargs.get("flaglist")
self._evflags = flaglist
self._maplot = None
self._sequence = sequence
self._atom_groups = atom_groups
# def _constructor_expanddim(self):
# return self._constructor_expanddim()
......@@ -332,6 +361,10 @@ class ProteinMap(Map):
"""
raise NotImplementedError
@property
def contact_flags(self):
return self._contact_flags
def create_heatmap(self):
"""
Generate heatmap
......@@ -972,15 +1005,15 @@ class ResAtmMap(ProteinMap):
# def _constructor_expanddim(self):
# super(ResAtmMap, self)._constructor_expanddim()
def __init__(self, sequence, **kwargs):
def __init__(self, sequence, flaglist=None, atom_groups="min", **kwargs):
# Sequence: 1L string or MultiIndex object
# Dataframe is in 3L code
if not sequence:
sequence = ConversionTable.ConversionTable().table['AMINO_ACID'][
'iupac'].keys()
# Super call will initialize index and columns with self.create_index()
super(ResAtmMap, self).__init__(sequence=sequence, **kwargs)
self._sequence = sequence
super(ResAtmMap, self).__init__(sequence=sequence, flaglist=flaglist,
atom_groups=atom_groups, **kwargs)
@property
def sequence(self):
......@@ -1222,11 +1255,11 @@ class ResAtmMap(ProteinMap):
class ResMap(ResAtmMap):
"""Res - res distance/contact matrix"""
def __init__(self, sequence, **kwargs):
def __init__(self, sequence, flaglist=None, **kwargs):
if not sequence:
sequence = ConversionTable.ConversionTable().table['AMINO_ACID'][
'iupac'].keys()
super(ResMap, self).__init__(sequence, **kwargs)
super(ResMap, self).__init__(sequence, flaglist=flaglist, **kwargs)
# def _constructor_expanddim(self):
# super(ResMap, self)._constructor_expanddim()
......@@ -1388,12 +1421,13 @@ class AaMap(ProteinMap):
# def _constructor_expanddim(self):
# super(AaMap, self)._constructor_expanddim()
def __init__(self, *args, **kwargs):
def __init__(self, atom_groups="min", **kwargs):
# if ("index", "columns") not in kwargs:
# idx, col = self.create_index()
# kwargs["index"] = idx
# kwargs["columns"] = col
super(AaMap, self).__init__(sequence=self.sequence, **kwargs)
super(AaMap, self).__init__(sequence=self.sequence,
atom_groups=atom_groups, **kwargs)
# TODO: Actually useless since it raised an notimplemented error
def reduce(self):
......@@ -1475,11 +1509,10 @@ class AaAtmMap(AaMap):
"""
raise NotImplementedError
def __init__(self, *args, **kwargs):
super(AaAtmMap, self).__init__(*args, **kwargs)
self.atom_types = kwargs.get("atom_types", "min")
def __init__(self, atom_groups="min", **kwargs):
super(AaAtmMap, self).__init__(atom_groups=atom_groups, **kwargs)
def create_index(self, sequence, atom_types="min", **kwargs):
def create_index(self, sequence, atom_groups="min", **kwargs):
"""
Update Aa index with atoms
Returns
......@@ -1496,10 +1529,10 @@ class AaAtmMap(AaMap):
for _ in filter(self.heavy_reg.match, atomtable[aa].keys())]
atm_list = [
atm for aa in seq for atm in ('CA', 'CB', 'SC')] \
if atom_types == "min" else [
if atom_groups == "min" else [
atm for aa in seq for atm in filter(self.heavy_reg.match,
atomtable[aa].keys())] \
if atom_types == "heavy" else [
if atom_groups == "heavy" else [
atm for aa in seq for atm in atomtable[aa].keys()]
idx = pd.MultiIndex.from_tuples(list(zip(*[res_list, atm_list])),
names=('AminoAcid', 'Atom'))
......@@ -1527,7 +1560,7 @@ class AaAtmMap(AaMap):
return AaAtmMap(
path=self.path, data=df, desc=self.desc,
sym=self.sym, mtype=self.mtype, dtype=self.dtype,
atom_types=self.atom_types)
atom_groups=self.atom_groups)
class SsAaAtmMap(AaAtmMap):
......@@ -1559,16 +1592,15 @@ class SsAaAtmMap(AaAtmMap):
"""
pass
def __init__(self, *args, **kwargs):
super(SsAaAtmMap, self).__init__(*args, **kwargs)
self.atom_types = kwargs.get("atom_types", "min")
def __init__(self, atom_groups="min", **kwargs):
super(SsAaAtmMap, self).__init__(atom_groups=atom_groups, **kwargs)
def create_index(self, sequence, atom_types="min", **kwargs):
def create_index(self, sequence, atom_groups="min", **kwargs):
"""
Parameters
----------
atom_types
atom_groups
sequence
kwargs
......@@ -1585,8 +1617,8 @@ class SsAaAtmMap(AaAtmMap):
for ss in self.ss_types:
for aa in seq:
atms = ('CA', 'CB', 'SC') if atom_types == "min" else filter(
self.heavy_reg.match, atomtable[aa].keys()) if atom_types == "heavy" else atomtable[aa].keys()
atms = ('CA', 'CB', 'SC') if atom_groups == "min" else filter(
self.heavy_reg.match, atomtable[aa].keys()) if atom_groups == "heavy" else atomtable[aa].keys()
for atm in atms:
ss_list.append(ss)
res_list.append(aa)
......@@ -1617,7 +1649,7 @@ class SsAaAtmMap(AaAtmMap):
return self
newmap = AaAtmMap(path=self.path, desc=self.desc,
sym=self.sym, mtype=self.mtype, dtype=self.dtype,
atom_types=self.atom_types)
atom_groups=self.atom_groups)
if self.dtype == bool:
newmap[:] = self.copy().stack('SecStruct', dropna=False).groupby(
level=[1, 2]).any()
......
......@@ -8,10 +8,10 @@ import os
import re
import logging
import os.path
import numpy as np
import collections
import pkg_resources as pkgr
import scipy.spatial.distance as distance
from conkit import io as conio
from Bio import pairwise2
from .common import sort_2dict
from .protmap import (ResMap, ResAtmMap)
......@@ -69,7 +69,7 @@ class RegexFile(object):
"""File path"""
return self._filepath
def load(self):
def load(self, *args):
"""
Fill lines with dictionary. Each key is a line number in the given file
:return: None
......@@ -177,7 +177,7 @@ class MapFile(RegexFile):
r"(?P<plm_score>-?\d+\.\d+)\s*$"),
"score_field": "plm_score"
},
"evfold": {
"evcoupling": {
"regex": re.compile(
r'^(?P<res1_nb>\d+),(?P<res2_nb>\d+),'
r'(?P<ec_score>-?\d+\.\d+e?-?\d*),'
......@@ -272,6 +272,8 @@ class MapFile(RegexFile):
r'(?P<hbscore>-?\d+\.?\d*)'),
"score_field": "hbscore"
},
}
unknown = {
"default_1": {
"regex": re.compile(
r'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'
......@@ -303,17 +305,22 @@ class MapFile(RegexFile):
"score_field": None
}
}
check_type = True
def __init__(self, *args, **kwargs):
"""
:param args:
:param kwargs:
:return:
Parameters
----------
args
kwargs
"""
self.conioflag = kwargs.pop("conkit_reader") \
if "conkit_reader" in kwargs else False
super(MapFile, self).__init__(*args, **kwargs)
if self.check_type:
if not self.conioflag:
LOG.info("Conkit doesn't support {ftype}".format(