Commit 6b45e791 authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

improve code formatting and linting procedures

parent 2a8b11c1
{
"python.pythonPath": "venv/bin/python",
"git.ignoreLimitWarning": true,
"python.linting.flake8Enabled": true,
"python.linting.pylintEnabled": false,
"python.linting.flake8Args": ["--max-line-length=88"]
}
\ No newline at end of file
[flake8]
exclude = .git,*migrations*
max-line-length = 88
......@@ -18,16 +18,16 @@ from django.urls import reverse
from django.utils.translation import ugettext_lazy as _
from .utils import FingerPrinter, smi2inchi, smi2inchikey
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
get_pfam_info, get_doi_info
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, \
get_taxonomy_info, get_go_info, get_pfam_info, get_doi_info
class AutoFillableModel(models.Model):
"""
AutoFillableModel makes it possible to automatically fill model fields from
external sources in the autofill() method
The save method allows to either include autofill or not. in autofill kwarg is
set to True, save() will first call autofill(), otherwise it won't
The save method allows to either include autofill or not. in autofill kwarg
is set to True, save() will first call autofill(), otherwise it won't
"""
class Meta:
......@@ -49,7 +49,8 @@ class AutoFillableModel(models.Model):
def autofill_post_save(self):
"""
method called after the save is done, usefull for settings m2m relations
method called after the save is done, usefull for setting m2m
relations
:return:
"""
pass
......@@ -74,7 +75,8 @@ class Bibliography(AutoFillableModel):
DO=re.compile("^10.\d{4,9}/.+$"),
)
source = models.CharField(
'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
'Bibliographic type', max_length=2, choices=SOURCES,
default=SOURCES[0][0])
id_source = models.CharField('Bibliographic ID', max_length=25)
title = models.TextField('Title')
journal_name = models.TextField('Journal name', null=True, blank=True)
......@@ -129,7 +131,9 @@ class Bibliography(AutoFillableModel):
if not id_source_validator.match(id_source):
raise ValidationError(
dict(
id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern)
id_source=_(
f"Must match pattern {id_source_validator.pattern}"
" for this selected source")
)
)
return True
......@@ -259,7 +263,8 @@ class Protein(AutoFillableModel):
def autofill_post_save(self):
info = self.__info
for go_id in info['molecular_functions']:
mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id)
mol_function, created = MolecularFunction.objects.get_or_create(
go_id=go_id)
self.molecular_functions.add(mol_function)
for domain_id in info['domains']:
......@@ -280,7 +285,8 @@ class Domain(AutoFillableModel):
pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
pfam_description = models.CharField('Pfam Description', max_length=100)
domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
domain_family = models.CharField('Domain family', max_length=25,
blank=True, default="")
# TODO: what is this field? check database
# contents
......@@ -302,7 +308,7 @@ class Domain(AutoFillableModel):
return self.pfam_id
def __str__(self):
return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description)
return f'{self.pfam_acc} ({self.pfam_id}-{self.pfam_description})'
class ProteinDomainComplex(models.Model):
......@@ -390,7 +396,8 @@ class Ppi(AutoFillableModel):
'Total number of pockets in the complex', default=1)
symmetry = models.ForeignKey(Symmetry, models.CASCADE)
diseases = models.ManyToManyField(Disease, blank=True)
family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True)
family = models.ForeignKey(PpiFamily, models.CASCADE, null=True,
blank=True)
name = models.TextField('PPI name', null=True, blank=True)
def __str__(self):
......@@ -410,13 +417,20 @@ class Ppi(AutoFillableModel):
"""
return bound ppi complexes belonging to this ppi
"""
return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all())
return PpiComplex.objects.filter(
ppi=self,
complex__in=ProteinDomainBoundComplex.objects.all()
)
def compute_name_from_protein_names(self):
all_protein_names = set(
[ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()])
[
ppi_complex.complex.protein.short_name
for ppi_complex in self.ppicomplex_set.all()])
bound_protein_names = set(
[ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()])
[
ppi_complex.complex.protein.short_name
for ppi_complex in self.get_ppi_bound_complexes()])
partner_protein_names = all_protein_names - bound_protein_names
bound_str = ','.join(bound_protein_names)
partner_str = ','.join(partner_protein_names)
......@@ -854,7 +868,6 @@ class Compound(AutoFillableModel):
insilico_av = models.BooleanField('In silico tests performed', null=True, blank=True)
tests_av = models.IntegerField(verbose_name='Number of tests available', null=True, blank=True)
class Meta:
ordering = ['id']
indexes = [
......
......@@ -3,12 +3,10 @@ iPPI-DB unit tests
"""
import re
from tempfile import NamedTemporaryFile
from django.core.management import call_command
from django.test import TestCase
from django.urls import reverse
from django.contrib.auth import get_user_model
from openbabel import vectorUnsignedInt, OBFingerprint
from ippidb import ws, models
......@@ -17,8 +15,7 @@ from .models import (
Compound,
CompoundTanimoto,
create_tanimoto,
update_compound_cached_properties,
Symmetry,
update_compound_cached_properties
)
from .models import DrugBankCompound
from .utils import FingerPrinter, mol2smi, smi2mol, smi2inchi, smi2inchikey
......@@ -337,8 +334,8 @@ class CompoundAnnotationsTestCase(TestCase):
class QueryCompoundViewsTestCase(TestCase):
@classmethod
def setUpTestData(cls):
c = create_dummy_compound(1, "CC")
c = create_dummy_compound(2, "CCC")
create_dummy_compound(1, "CC")
create_dummy_compound(2, "CCC")
call_command("lle_le")
call_command("pca")
......
......@@ -2,7 +2,6 @@
iPPI-DB contribution module tests
"""
import re
from tempfile import NamedTemporaryFile
from django.test import TestCase
......@@ -10,7 +9,6 @@ from django.urls import reverse
from django.contrib.auth import get_user_model
from ippidb import models
from ippidb import forms
class ContributionViewsTestCase(TestCase):
......
......@@ -2,35 +2,43 @@
iPPI-DB chemoinformatics utility functions
"""
#FIXME: to work, this currently needs awkward PYTHONPATH tweaks, like:
#$export PYTHONPATH=/home/hmenager/openbabellocal/lib/python3.6/site-packages/:$PYTHONPATH
# FIXME: to work, this currently needs awkward PYTHONPATH tweaks, like:
# $export PYTHONPATH=/home/hmenager/openbabellocal/lib/python3.6/
# site-packages/:$PYTHONPATH
import pybel
import openbabel as ob
def mol2smi(mol_string):
m = pybel.readstring('mol', mol_string)
return m.write(format='smi').strip()
m = pybel.readstring("mol", mol_string)
return m.write(format="smi").strip()
def smi2mol(smi_string):
m = pybel.readstring('smi', smi_string)
m = pybel.readstring("smi", smi_string)
# generate 2D coordinates for MarvinJS layout
#NB: the line below should be replaced as soon as the new version of openbabel
# NB: the line below should be replaced as soon as the new version of openbabel
# is out (>2.4.1), by a call to m.make2D()
# (see https://github.com/openbabel/openbabel/blob/06e233e2ca0c6d9f86f74bafe74ffcb75e9d76cd/scripts/python/pybel.py#L577)
pybel._operations['gen2D'].Do(m.OBMol)
return m.write(format='mol')
# (see https://github.com/openbabel/openbabel/blob/
# 06e233e2ca0c6d9f86f74bafe74ffcb75e9d76cd/scripts/python/pybel.py#L577)
pybel._operations["gen2D"].Do(m.OBMol)
return m.write(format="mol")
smi2inchi_conv = ob.OBConversion()
smi2inchi_conv.SetInAndOutFormats("smi", "inchi")
def smi2inchi(smi_string):
mol = ob.OBMol()
smi2inchi_conv.ReadString(mol, smi_string)
return smi2inchi_conv.WriteString(mol).strip()
smi2inchikey_conv = ob.OBConversion()
smi2inchikey_conv.SetInAndOutFormats("smi", "inchi")
smi2inchikey_conv.SetOptions("K",smi2inchikey_conv.OUTOPTIONS)
smi2inchikey_conv.SetOptions("K", smi2inchikey_conv.OUTOPTIONS)
def smi2inchikey(smi_string):
mol = ob.OBMol()
......@@ -39,7 +47,6 @@ def smi2inchikey(smi_string):
class FingerPrinter(object):
def __init__(self, name="FP4"):
self.fingerprinter = ob.OBFingerprint.FindFingerprint(name)
self._smiles_parser = ob.OBConversion()
......@@ -58,15 +65,30 @@ class FingerPrinter(object):
return fp
def fp_dict(self, smiles_dict):
"generate a dict of {compound id: fingerprint} from a dict of {compound id: fingerprint}"
return {compound_id: self.fp(smiles_entry) for compound_id, smiles_entry in smiles_dict.items()}
"""
generate a dict of {compound id: fingerprint} from a dict of
{compound id: fingerprint}
"""
return {
compound_id: self.fp(smiles_entry)
for compound_id, smiles_entry in smiles_dict.items()
}
def tanimoto_fps(self, smiles_query, fp_dict):
"perform a tanimoto similarity search using a smiles query string on a dict of {compound id: fingerprint}"
"""
perform a tanimoto similarity search using a smiles query string
on a dict of {compound id: fingerprint}
"""
fp_query = self.fp(smiles_query)
return {compound_id: self.fingerprinter.Tanimoto(fp_query, fp_entry) for compound_id, fp_entry in fp_dict.items()}
return {
compound_id: self.fingerprinter.Tanimoto(fp_query, fp_entry)
for compound_id, fp_entry in fp_dict.items()
}
def tanimoto_smiles(self, query_smiles, smiles_dict):
"perform a tanimoto similarity search using a smiles query on a dict of {compound id: SMILES}"
"""
perform a tanimoto similarity search using a smiles query on a
dict of {compound id: SMILES}
"""
fp_dict = self.fp_dict(smiles_dict)
return self.tanimoto_fps(query_smiles, fp_dict)
......@@ -12,13 +12,12 @@ from bs4 import BeautifulSoup
class EntryNotFoundError(Exception):
def __init__(self, entry_id, status_code=None):
self.entry_id = entry_id
if status_code:
msg = '%s not found.' % entry_id
msg = "%s not found." % entry_id
else:
msg = '%s not found (error_code %i).' % (entry_id, status_code)
msg = "%s not found (error_code %i)." % (entry_id, status_code)
super().__init__(msg)
......@@ -48,34 +47,37 @@ def get_pubmed_info(pmid):
:rtype: dict
"""
eu = EUtils()
r = eu.EFetch('pubmed', pmid, retmode='dict', rettype='abstract')
r = eu.EFetch("pubmed", pmid, retmode="dict", rettype="abstract")
if isinstance(r, int):
raise PubMedEntryNotFound(pmid, r)
if r['PubmedArticleSet'] is None:
if r["PubmedArticleSet"] is None:
raise PubMedEntryNotFound(pmid)
article = r['PubmedArticleSet'][
'PubmedArticle']['MedlineCitation']['Article']
title = article['ArticleTitle']
authors_list = [a['LastName'] + ' ' + a['Initials']
for a in article['AuthorList']['Author']]
authors = ', '.join(authors_list)
journal_name = article['Journal']['Title']
biblio_date = article['Journal']['JournalIssue']['PubDate']
if 'Year' in biblio_date:
biblio_year = biblio_date['Year']
article = r["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"]["Article"]
title = article["ArticleTitle"]
authors_list = [
a["LastName"] + " " + a["Initials"] for a in article["AuthorList"]["Author"]
]
authors = ", ".join(authors_list)
journal_name = article["Journal"]["Title"]
biblio_date = article["Journal"]["JournalIssue"]["PubDate"]
if "Year" in biblio_date:
biblio_year = biblio_date["Year"]
else:
biblio_year = biblio_date['MedlineDate'][0:3]
return {'title': title,
'journal_name': journal_name,
'biblio_year': biblio_year,
'authors_list': authors}
biblio_year = biblio_date["MedlineDate"][0:3]
return {
"title": title,
"journal_name": journal_name,
"biblio_year": biblio_year,
"authors_list": authors,
}
def get_epo_info(patent_number):
"""
Retrieve information about a patent using the EPO website
WARNING: this is not to be used anymore, the 3.1 version of the EPO service is now offline
WARNING: this is not to be used anymore, the 3.1 version of the EPO service is now
offline
:param patent_number: patent number
:type patent_number: str
......@@ -83,24 +85,38 @@ def get_epo_info(patent_number):
:rtype: dict
"""
resp = requests.get(
'http://ops.epo.org/3.1/rest-services/published-data/publication/docdb/{}/biblio.json'.format(patent_number))
f"http://ops.epo.org/3.1/rest-services/published-data/publication/docdb/"
f"{patent_number}/biblio.json"
)
data = resp.json()
exchange_doc = data['ops:world-patent-data'][
'exchange-documents']['exchange-document']
exchange_doc = data["ops:world-patent-data"]["exchange-documents"][
"exchange-document"
]
if isinstance(exchange_doc, list):
exchange_doc = exchange_doc[0]
title = [el['$']
for el in exchange_doc['bibliographic-data']['invention-title'] if el['@lang'] == 'en'][0]
authors = [i['inventor-name']['name']['$']
for i in exchange_doc['bibliographic-data']['parties']['inventors']['inventor'] if
i['@data-format'] == 'original'][0]
biblio_year = [el['date']['$'][:4]
for el in exchange_doc['bibliographic-data']['publication-reference']['document-id'] if
el['@document-id-type'] == 'epodoc'][0]
return {'title': title,
'journal_name': None,
'biblio_year': biblio_year,
'authors_list': authors}
title = [
el["$"]
for el in exchange_doc["bibliographic-data"]["invention-title"]
if el["@lang"] == "en"
][0]
authors = [
i["inventor-name"]["name"]["$"]
for i in exchange_doc["bibliographic-data"]["parties"]["inventors"]["inventor"]
if i["@data-format"] == "original"
][0]
biblio_year = [
el["date"]["$"][:4]
for el in exchange_doc["bibliographic-data"]["publication-reference"][
"document-id"
]
if el["@document-id-type"] == "epodoc"
][0]
return {
"title": title,
"journal_name": None,
"biblio_year": biblio_year,
"authors_list": authors,
}
def get_google_patent_info_ris(patent_number):
......@@ -114,7 +130,7 @@ def get_google_patent_info_ris(patent_number):
:return: patent metadata (title, journal name, publication year, authors list).
:rtype: dict
"""
url = 'https://encrypted.google.com/patents/{}.ris'.format(patent_number)
url = "https://encrypted.google.com/patents/{}.ris".format(patent_number)
resp = requests.get(url)
title = None
authors = []
......@@ -127,10 +143,12 @@ def get_google_patent_info_ris(patent_number):
title = line[1]
elif line[0] == "Y1":
biblio_year = line[1].split("/")[0]
return {'title': title,
'journal_name': None,
'biblio_year': biblio_year,
'authors_list': authors}
return {
"title": title,
"journal_name": None,
"biblio_year": biblio_year,
"authors_list": authors,
}
def get_google_patent_info(patent_number):
......@@ -142,22 +160,25 @@ def get_google_patent_info(patent_number):
:return: patent metadata (title, journal name, publication year, authors list).
:rtype: dict
"""
url = 'https://patents.google.com/patent/{}'.format(patent_number)
url = "https://patents.google.com/patent/{}".format(patent_number)
resp = requests.get(url)
if resp.status_code != 200:
raise PatentNotFound(patent_number, resp.status_code)
soup = BeautifulSoup(resp.text, 'html.parser')
title = soup.find_all('meta', attrs={'name': 'DC.title'})[0]['content'].strip()
soup = BeautifulSoup(resp.text, "html.parser")
title = soup.find_all("meta", attrs={"name": "DC.title"})[0]["content"].strip()
authors_list = []
for author_meta in soup.find_all('meta', attrs={'name': 'DC.contributor'}):
authors_list.append(author_meta['content'].strip())
authors = ', '.join(authors_list)
biblio_year = soup.find_all('meta', attrs={'name': 'DC.date'})[0]['content'].strip()[0:4]
return {'title': title,
'journal_name': None,
'biblio_year': biblio_year,
'authors_list': authors}
for author_meta in soup.find_all("meta", attrs={"name": "DC.contributor"}):
authors_list.append(author_meta["content"].strip())
authors = ", ".join(authors_list)
biblio_year = soup.find_all("meta", attrs={"name": "DC.date"})[0][
"content"
].strip()[0:4]
return {
"title": title,
"journal_name": None,
"biblio_year": biblio_year,
"authors_list": authors,
}
def get_doi_info(doi):
......@@ -169,41 +190,55 @@ def get_doi_info(doi):
:return: publication metadata (title, journal name, publication year, authors list).
:rtype: dict
"""
resp = requests.get('http://dx.doi.org/%s' % doi, headers={'Accept':'application/vnd.citationstyles.csl+json'})
resp = requests.get(
"http://dx.doi.org/%s" % doi,
headers={"Accept": "application/vnd.citationstyles.csl+json"},
)
resp.raise_for_status()
json_data = resp.json()
title = json_data['title']
journal_name = json_data.get("container-title", json_data.get("original-title", None))
biblio_year=0
title = json_data["title"]
journal_name = json_data.get(
"container-title", json_data.get("original-title", None)
)
biblio_year = 0
try:
if 'journal-issue' in json_data and 'published-print' in json_data['journal-issue']:
biblio_year = json_data['journal-issue']['published-print']['date-parts'][0][0]
elif 'published-print' in json_data:
biblio_year = json_data['published-print']['date-parts'][0][0]
elif 'issued' in json_data:
biblio_year = json_data['issued']['date-parts'][0][0]
if (
"journal-issue" in json_data
and "published-print" in json_data["journal-issue"]
):
biblio_year = json_data["journal-issue"]["published-print"]["date-parts"][
0
][0]
elif "published-print" in json_data:
biblio_year = json_data["published-print"]["date-parts"][0][0]
elif "issued" in json_data:
biblio_year = json_data["issued"]["date-parts"][0][0]
else:
biblio_year = json_data['published-online']['date-parts'][0][0]
biblio_year = json_data["published-online"]["date-parts"][0][0]
except KeyError as e:
print('http://dx.doi.org/%s' % doi)
print("http://dx.doi.org/%s" % doi)
print(json_data)
raise e
authors_list = []
for author_data in json_data['author']:
for author_data in json_data["author"]:
try:
if 'family' in author_data:
authors_list.append('%s %s' % (author_data["family"], author_data.get("given", "")))
if "family" in author_data:
authors_list.append(
"%s %s" % (author_data["family"], author_data.get("given", ""))
)
else:
authors_list.append(author_data['name'])
authors_list.append(author_data["name"])
except KeyError as e:
print('http://dx.doi.org/%s' % doi)
print("http://dx.doi.org/%s" % doi)
print(json_data)
raise e
authors = ', '.join(authors_list)
return {'title': title,
'journal_name': journal_name,
'biblio_year': biblio_year,
'authors_list': authors}
authors = ", ".join(authors_list)
return {
"title": title,
"journal_name": journal_name,
"biblio_year": biblio_year,
"authors_list": authors,
}
def get_uniprot_info(uniprot_id):
......@@ -212,66 +247,76 @@ def get_uniprot_info(uniprot_id):
:param uniprot_id: Uniprot ID
:type uniprot_id: str
:return: protein metadata (recommended name, organism, gene, entry name, short name, molecular functions).
:return: protein metadata (recommended name, organism, gene, entry name,
short name, molecular functions).
:rtype: dict
"""
uniprot_client = UniProt()
ns = {'u': 'http://uniprot.org/uniprot'}
ns = {"u": "http://uniprot.org/uniprot"}
try:
resp = uniprot_client.retrieve(uniprot_id)
except TypeError:
raise EntryNotFoundError(uniprot_id)
if resp.root == '':
if resp.root == "":
raise EntryNotFoundError(uniprot_id)
try:
recommended_name = resp.root.findall(
'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text
"u:entry/u:protein/u:recommendedName/u:fullName", ns
)[0].text
except:
recommended_name = None
try:
recommended_short_name = resp.root.findall(
'u:entry/u:protein/u:recommendedName/u:shortName', ns)[0].text
"u:entry/u:protein/u:recommendedName/u:shortName", ns
)[0].text
except:
recommended_short_name = None
organism = resp.root.findall(
'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns)[0].attrib['id']
'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns
)[0].attrib["id"]
gene_names = []
for el in resp.root.findall('u:entry/u:gene/u:name', ns):
gene_name = {'name': el.text, 'type': el.attrib['type']}
for el in resp.root.findall("u:entry/u:gene/u:name", ns):
gene_name = {"name": el.text, "type": el.attrib["type"]}
gene_names.append(gene_name)
try:
gene_id = resp.root.findall(
'u:entry/u:dbReference[@type="GeneID"]', ns)[0].attrib['id']
gene_id = resp.root.findall('u:entry/u:dbReference[@type="GeneID"]', ns)[
0
].attrib["id"]
except IndexError:
gene_id = None
entry_name = resp.root.findall('u:entry/u:name', ns)[0].text
entry_name = resp.root.findall("u:entry/u:name", ns)[0].text
go_els = resp.root.findall('u:entry/u:dbReference[@type="GO"]', ns)
accessions = [el.text for el in resp.root.findall('u:entry/u:accession', ns)]
accessions = [el.text for el in resp.root.findall("u:entry/u:accession", ns)]
molecular_functions = []
cellular_localisations = []
biological_processes = []
for go_el in go_els:
term_property_value = go_el.findall(
'u:property[@type="term"]', ns)[0].attrib['value']
if term_property_value[0:2] == 'F:':
molecular_functions.append('GO_' + go_el.attrib['id'][3:])
if term_property_value[0:2] == 'C:':