diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..4ac0bda5ff3d4a248ed3af1303d093938fdc49e7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.pythonPath": "venv/bin/python", + "git.ignoreLimitWarning": true, + "python.linting.flake8Enabled": true, + "python.linting.pylintEnabled": false, + "python.linting.flake8Args": ["--max-line-length=88"] +} \ No newline at end of file diff --git a/ippisite/.flake8 b/ippisite/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..dbff3b4c38c6e454b894283a282b68e5ab220c09 --- /dev/null +++ b/ippisite/.flake8 @@ -0,0 +1,3 @@ +[flake8] +exclude = .git,*migrations* +max-line-length = 88 diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py index b167850e99938ccd10e0395cb5f5109d31003a24..499347e97ab477a2daead49ef21a0b257aaefafe 100644 --- a/ippisite/ippidb/models.py +++ b/ippisite/ippidb/models.py @@ -18,16 +18,16 @@ from django.urls import reverse from django.utils.translation import ugettext_lazy as _ from .utils import FingerPrinter, smi2inchi, smi2inchikey -from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \ - get_pfam_info, get_doi_info +from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, \ + get_taxonomy_info, get_go_info, get_pfam_info, get_doi_info class AutoFillableModel(models.Model): """ AutoFillableModel makes it possible to automatically fill model fields from external sources in the autofill() method - The save method allows to either include autofill or not. in autofill kwarg is - set to True, save() will first call autofill(), otherwise it won't + The save method allows to either include autofill or not. in autofill kwarg + is set to True, save() will first call autofill(), otherwise it won't """ class Meta: @@ -49,7 +49,8 @@ class AutoFillableModel(models.Model): def autofill_post_save(self): """ - method called after the save is done, usefull for settings m2m relations + method called after the save is done, usefull for setting m2m + relations :return: """ pass @@ -74,7 +75,8 @@ class Bibliography(AutoFillableModel): DO=re.compile("^10.\d{4,9}/.+$"), ) source = models.CharField( - 'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0]) + 'Bibliographic type', max_length=2, choices=SOURCES, + default=SOURCES[0][0]) id_source = models.CharField('Bibliographic ID', max_length=25) title = models.TextField('Title') journal_name = models.TextField('Journal name', null=True, blank=True) @@ -129,7 +131,9 @@ class Bibliography(AutoFillableModel): if not id_source_validator.match(id_source): raise ValidationError( dict( - id_source=_("Must match pattern %s for this selected source" % id_source_validator.pattern) + id_source=_( + f"Must match pattern {id_source_validator.pattern}" + " for this selected source") ) ) return True @@ -259,7 +263,8 @@ class Protein(AutoFillableModel): def autofill_post_save(self): info = self.__info for go_id in info['molecular_functions']: - mol_function, created = MolecularFunction.objects.get_or_create(go_id=go_id) + mol_function, created = MolecularFunction.objects.get_or_create( + go_id=go_id) self.molecular_functions.add(mol_function) for domain_id in info['domains']: @@ -280,7 +285,8 @@ class Domain(AutoFillableModel): pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True) pfam_id = models.CharField('Pfam Family Identifier', max_length=20) pfam_description = models.CharField('Pfam Description', max_length=100) - domain_family = models.CharField('Domain family', max_length=25, blank=True, default="") + domain_family = models.CharField('Domain family', max_length=25, + blank=True, default="") # TODO: what is this field? check database # contents @@ -302,7 +308,7 @@ class Domain(AutoFillableModel): return self.pfam_id def __str__(self): - return '{} ({}-{})'.format(self.pfam_acc, self.pfam_id, self.pfam_description) + return f'{self.pfam_acc} ({self.pfam_id}-{self.pfam_description})' class ProteinDomainComplex(models.Model): @@ -390,7 +396,8 @@ class Ppi(AutoFillableModel): 'Total number of pockets in the complex', default=1) symmetry = models.ForeignKey(Symmetry, models.CASCADE) diseases = models.ManyToManyField(Disease, blank=True) - family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, blank=True) + family = models.ForeignKey(PpiFamily, models.CASCADE, null=True, + blank=True) name = models.TextField('PPI name', null=True, blank=True) def __str__(self): @@ -410,13 +417,20 @@ class Ppi(AutoFillableModel): """ return bound ppi complexes belonging to this ppi """ - return PpiComplex.objects.filter(ppi=self, complex__in=ProteinDomainBoundComplex.objects.all()) + return PpiComplex.objects.filter( + ppi=self, + complex__in=ProteinDomainBoundComplex.objects.all() + ) def compute_name_from_protein_names(self): all_protein_names = set( - [ppi_complex.complex.protein.short_name for ppi_complex in self.ppicomplex_set.all()]) + [ + ppi_complex.complex.protein.short_name + for ppi_complex in self.ppicomplex_set.all()]) bound_protein_names = set( - [ppi_complex.complex.protein.short_name for ppi_complex in self.get_ppi_bound_complexes()]) + [ + ppi_complex.complex.protein.short_name + for ppi_complex in self.get_ppi_bound_complexes()]) partner_protein_names = all_protein_names - bound_protein_names bound_str = ','.join(bound_protein_names) partner_str = ','.join(partner_protein_names) @@ -854,7 +868,6 @@ class Compound(AutoFillableModel): insilico_av = models.BooleanField('In silico tests performed', null=True, blank=True) tests_av = models.IntegerField(verbose_name='Number of tests available', null=True, blank=True) - class Meta: ordering = ['id'] indexes = [ diff --git a/ippisite/ippidb/tests.py b/ippisite/ippidb/tests.py index ea26a69d260f2bf50664f5eb1efe1a5ad42ad574..7c5112b423ccd607d12de2e6a7af5388c3f5cadf 100644 --- a/ippisite/ippidb/tests.py +++ b/ippisite/ippidb/tests.py @@ -3,12 +3,10 @@ iPPI-DB unit tests """ import re -from tempfile import NamedTemporaryFile from django.core.management import call_command from django.test import TestCase from django.urls import reverse -from django.contrib.auth import get_user_model from openbabel import vectorUnsignedInt, OBFingerprint from ippidb import ws, models @@ -17,8 +15,7 @@ from .models import ( Compound, CompoundTanimoto, create_tanimoto, - update_compound_cached_properties, - Symmetry, + update_compound_cached_properties ) from .models import DrugBankCompound from .utils import FingerPrinter, mol2smi, smi2mol, smi2inchi, smi2inchikey @@ -337,8 +334,8 @@ class CompoundAnnotationsTestCase(TestCase): class QueryCompoundViewsTestCase(TestCase): @classmethod def setUpTestData(cls): - c = create_dummy_compound(1, "CC") - c = create_dummy_compound(2, "CCC") + create_dummy_compound(1, "CC") + create_dummy_compound(2, "CCC") call_command("lle_le") call_command("pca") diff --git a/ippisite/ippidb/tests_contribute.py b/ippisite/ippidb/tests_contribute.py index cab5e273c6fb3cfcc2750e6f03ca61fbcccecac8..047ccfea4768b06b0c3ad6396dbcb184b489663a 100644 --- a/ippisite/ippidb/tests_contribute.py +++ b/ippisite/ippidb/tests_contribute.py @@ -2,7 +2,6 @@ iPPI-DB contribution module tests """ -import re from tempfile import NamedTemporaryFile from django.test import TestCase @@ -10,7 +9,6 @@ from django.urls import reverse from django.contrib.auth import get_user_model from ippidb import models -from ippidb import forms class ContributionViewsTestCase(TestCase): diff --git a/ippisite/ippidb/utils.py b/ippisite/ippidb/utils.py index 41bf2a723a1d510ca42c1fefbc1b5595b8f570fa..d22aa6c5e7c676b1abe26962affdf15aa8925053 100644 --- a/ippisite/ippidb/utils.py +++ b/ippisite/ippidb/utils.py @@ -2,35 +2,43 @@ iPPI-DB chemoinformatics utility functions """ -#FIXME: to work, this currently needs awkward PYTHONPATH tweaks, like: -#$export PYTHONPATH=/home/hmenager/openbabellocal/lib/python3.6/site-packages/:$PYTHONPATH +# FIXME: to work, this currently needs awkward PYTHONPATH tweaks, like: +# $export PYTHONPATH=/home/hmenager/openbabellocal/lib/python3.6/ +# site-packages/:$PYTHONPATH import pybel import openbabel as ob + def mol2smi(mol_string): - m = pybel.readstring('mol', mol_string) - return m.write(format='smi').strip() + m = pybel.readstring("mol", mol_string) + return m.write(format="smi").strip() + def smi2mol(smi_string): - m = pybel.readstring('smi', smi_string) + m = pybel.readstring("smi", smi_string) # generate 2D coordinates for MarvinJS layout - #NB: the line below should be replaced as soon as the new version of openbabel + # NB: the line below should be replaced as soon as the new version of openbabel # is out (>2.4.1), by a call to m.make2D() - # (see https://github.com/openbabel/openbabel/blob/06e233e2ca0c6d9f86f74bafe74ffcb75e9d76cd/scripts/python/pybel.py#L577) - pybel._operations['gen2D'].Do(m.OBMol) - return m.write(format='mol') + # (see https://github.com/openbabel/openbabel/blob/ + # 06e233e2ca0c6d9f86f74bafe74ffcb75e9d76cd/scripts/python/pybel.py#L577) + pybel._operations["gen2D"].Do(m.OBMol) + return m.write(format="mol") + smi2inchi_conv = ob.OBConversion() smi2inchi_conv.SetInAndOutFormats("smi", "inchi") + def smi2inchi(smi_string): mol = ob.OBMol() smi2inchi_conv.ReadString(mol, smi_string) return smi2inchi_conv.WriteString(mol).strip() + smi2inchikey_conv = ob.OBConversion() smi2inchikey_conv.SetInAndOutFormats("smi", "inchi") -smi2inchikey_conv.SetOptions("K",smi2inchikey_conv.OUTOPTIONS) +smi2inchikey_conv.SetOptions("K", smi2inchikey_conv.OUTOPTIONS) + def smi2inchikey(smi_string): mol = ob.OBMol() @@ -39,7 +47,6 @@ def smi2inchikey(smi_string): class FingerPrinter(object): - def __init__(self, name="FP4"): self.fingerprinter = ob.OBFingerprint.FindFingerprint(name) self._smiles_parser = ob.OBConversion() @@ -58,15 +65,30 @@ class FingerPrinter(object): return fp def fp_dict(self, smiles_dict): - "generate a dict of {compound id: fingerprint} from a dict of {compound id: fingerprint}" - return {compound_id: self.fp(smiles_entry) for compound_id, smiles_entry in smiles_dict.items()} + """ + generate a dict of {compound id: fingerprint} from a dict of + {compound id: fingerprint} + """ + return { + compound_id: self.fp(smiles_entry) + for compound_id, smiles_entry in smiles_dict.items() + } def tanimoto_fps(self, smiles_query, fp_dict): - "perform a tanimoto similarity search using a smiles query string on a dict of {compound id: fingerprint}" + """ + perform a tanimoto similarity search using a smiles query string + on a dict of {compound id: fingerprint} + """ fp_query = self.fp(smiles_query) - return {compound_id: self.fingerprinter.Tanimoto(fp_query, fp_entry) for compound_id, fp_entry in fp_dict.items()} + return { + compound_id: self.fingerprinter.Tanimoto(fp_query, fp_entry) + for compound_id, fp_entry in fp_dict.items() + } def tanimoto_smiles(self, query_smiles, smiles_dict): - "perform a tanimoto similarity search using a smiles query on a dict of {compound id: SMILES}" + """ + perform a tanimoto similarity search using a smiles query on a + dict of {compound id: SMILES} + """ fp_dict = self.fp_dict(smiles_dict) return self.tanimoto_fps(query_smiles, fp_dict) diff --git a/ippisite/ippidb/ws.py b/ippisite/ippidb/ws.py index 0095060c4a16714cad89ef60ffad82d59d5db528..ccd86c1a82359ffc96b1b11cec1b8fe61588d1f8 100644 --- a/ippisite/ippidb/ws.py +++ b/ippisite/ippidb/ws.py @@ -12,13 +12,12 @@ from bs4 import BeautifulSoup class EntryNotFoundError(Exception): - def __init__(self, entry_id, status_code=None): self.entry_id = entry_id if status_code: - msg = '%s not found.' % entry_id + msg = "%s not found." % entry_id else: - msg = '%s not found (error_code %i).' % (entry_id, status_code) + msg = "%s not found (error_code %i)." % (entry_id, status_code) super().__init__(msg) @@ -48,34 +47,37 @@ def get_pubmed_info(pmid): :rtype: dict """ eu = EUtils() - r = eu.EFetch('pubmed', pmid, retmode='dict', rettype='abstract') + r = eu.EFetch("pubmed", pmid, retmode="dict", rettype="abstract") if isinstance(r, int): raise PubMedEntryNotFound(pmid, r) - if r['PubmedArticleSet'] is None: + if r["PubmedArticleSet"] is None: raise PubMedEntryNotFound(pmid) - article = r['PubmedArticleSet'][ - 'PubmedArticle']['MedlineCitation']['Article'] - title = article['ArticleTitle'] - authors_list = [a['LastName'] + ' ' + a['Initials'] - for a in article['AuthorList']['Author']] - authors = ', '.join(authors_list) - journal_name = article['Journal']['Title'] - biblio_date = article['Journal']['JournalIssue']['PubDate'] - if 'Year' in biblio_date: - biblio_year = biblio_date['Year'] + article = r["PubmedArticleSet"]["PubmedArticle"]["MedlineCitation"]["Article"] + title = article["ArticleTitle"] + authors_list = [ + a["LastName"] + " " + a["Initials"] for a in article["AuthorList"]["Author"] + ] + authors = ", ".join(authors_list) + journal_name = article["Journal"]["Title"] + biblio_date = article["Journal"]["JournalIssue"]["PubDate"] + if "Year" in biblio_date: + biblio_year = biblio_date["Year"] else: - biblio_year = biblio_date['MedlineDate'][0:3] - return {'title': title, - 'journal_name': journal_name, - 'biblio_year': biblio_year, - 'authors_list': authors} + biblio_year = biblio_date["MedlineDate"][0:3] + return { + "title": title, + "journal_name": journal_name, + "biblio_year": biblio_year, + "authors_list": authors, + } def get_epo_info(patent_number): """ Retrieve information about a patent using the EPO website - WARNING: this is not to be used anymore, the 3.1 version of the EPO service is now offline + WARNING: this is not to be used anymore, the 3.1 version of the EPO service is now + offline :param patent_number: patent number :type patent_number: str @@ -83,24 +85,38 @@ def get_epo_info(patent_number): :rtype: dict """ resp = requests.get( - 'http://ops.epo.org/3.1/rest-services/published-data/publication/docdb/{}/biblio.json'.format(patent_number)) + f"http://ops.epo.org/3.1/rest-services/published-data/publication/docdb/" + f"{patent_number}/biblio.json" + ) data = resp.json() - exchange_doc = data['ops:world-patent-data'][ - 'exchange-documents']['exchange-document'] + exchange_doc = data["ops:world-patent-data"]["exchange-documents"][ + "exchange-document" + ] if isinstance(exchange_doc, list): exchange_doc = exchange_doc[0] - title = [el['$'] - for el in exchange_doc['bibliographic-data']['invention-title'] if el['@lang'] == 'en'][0] - authors = [i['inventor-name']['name']['$'] - for i in exchange_doc['bibliographic-data']['parties']['inventors']['inventor'] if - i['@data-format'] == 'original'][0] - biblio_year = [el['date']['$'][:4] - for el in exchange_doc['bibliographic-data']['publication-reference']['document-id'] if - el['@document-id-type'] == 'epodoc'][0] - return {'title': title, - 'journal_name': None, - 'biblio_year': biblio_year, - 'authors_list': authors} + title = [ + el["$"] + for el in exchange_doc["bibliographic-data"]["invention-title"] + if el["@lang"] == "en" + ][0] + authors = [ + i["inventor-name"]["name"]["$"] + for i in exchange_doc["bibliographic-data"]["parties"]["inventors"]["inventor"] + if i["@data-format"] == "original" + ][0] + biblio_year = [ + el["date"]["$"][:4] + for el in exchange_doc["bibliographic-data"]["publication-reference"][ + "document-id" + ] + if el["@document-id-type"] == "epodoc" + ][0] + return { + "title": title, + "journal_name": None, + "biblio_year": biblio_year, + "authors_list": authors, + } def get_google_patent_info_ris(patent_number): @@ -114,7 +130,7 @@ def get_google_patent_info_ris(patent_number): :return: patent metadata (title, journal name, publication year, authors list). :rtype: dict """ - url = 'https://encrypted.google.com/patents/{}.ris'.format(patent_number) + url = "https://encrypted.google.com/patents/{}.ris".format(patent_number) resp = requests.get(url) title = None authors = [] @@ -127,10 +143,12 @@ def get_google_patent_info_ris(patent_number): title = line[1] elif line[0] == "Y1": biblio_year = line[1].split("/")[0] - return {'title': title, - 'journal_name': None, - 'biblio_year': biblio_year, - 'authors_list': authors} + return { + "title": title, + "journal_name": None, + "biblio_year": biblio_year, + "authors_list": authors, + } def get_google_patent_info(patent_number): @@ -142,22 +160,25 @@ def get_google_patent_info(patent_number): :return: patent metadata (title, journal name, publication year, authors list). :rtype: dict """ - url = 'https://patents.google.com/patent/{}'.format(patent_number) + url = "https://patents.google.com/patent/{}".format(patent_number) resp = requests.get(url) if resp.status_code != 200: raise PatentNotFound(patent_number, resp.status_code) - soup = BeautifulSoup(resp.text, 'html.parser') - title = soup.find_all('meta', attrs={'name': 'DC.title'})[0]['content'].strip() + soup = BeautifulSoup(resp.text, "html.parser") + title = soup.find_all("meta", attrs={"name": "DC.title"})[0]["content"].strip() authors_list = [] - for author_meta in soup.find_all('meta', attrs={'name': 'DC.contributor'}): - authors_list.append(author_meta['content'].strip()) - authors = ', '.join(authors_list) - biblio_year = soup.find_all('meta', attrs={'name': 'DC.date'})[0]['content'].strip()[0:4] - return {'title': title, - 'journal_name': None, - 'biblio_year': biblio_year, - 'authors_list': authors} - + for author_meta in soup.find_all("meta", attrs={"name": "DC.contributor"}): + authors_list.append(author_meta["content"].strip()) + authors = ", ".join(authors_list) + biblio_year = soup.find_all("meta", attrs={"name": "DC.date"})[0][ + "content" + ].strip()[0:4] + return { + "title": title, + "journal_name": None, + "biblio_year": biblio_year, + "authors_list": authors, + } def get_doi_info(doi): @@ -169,41 +190,55 @@ def get_doi_info(doi): :return: publication metadata (title, journal name, publication year, authors list). :rtype: dict """ - resp = requests.get('http://dx.doi.org/%s' % doi, headers={'Accept':'application/vnd.citationstyles.csl+json'}) + resp = requests.get( + "http://dx.doi.org/%s" % doi, + headers={"Accept": "application/vnd.citationstyles.csl+json"}, + ) resp.raise_for_status() json_data = resp.json() - title = json_data['title'] - journal_name = json_data.get("container-title", json_data.get("original-title", None)) - biblio_year=0 + title = json_data["title"] + journal_name = json_data.get( + "container-title", json_data.get("original-title", None) + ) + biblio_year = 0 try: - if 'journal-issue' in json_data and 'published-print' in json_data['journal-issue']: - biblio_year = json_data['journal-issue']['published-print']['date-parts'][0][0] - elif 'published-print' in json_data: - biblio_year = json_data['published-print']['date-parts'][0][0] - elif 'issued' in json_data: - biblio_year = json_data['issued']['date-parts'][0][0] + if ( + "journal-issue" in json_data + and "published-print" in json_data["journal-issue"] + ): + biblio_year = json_data["journal-issue"]["published-print"]["date-parts"][ + 0 + ][0] + elif "published-print" in json_data: + biblio_year = json_data["published-print"]["date-parts"][0][0] + elif "issued" in json_data: + biblio_year = json_data["issued"]["date-parts"][0][0] else: - biblio_year = json_data['published-online']['date-parts'][0][0] + biblio_year = json_data["published-online"]["date-parts"][0][0] except KeyError as e: - print('http://dx.doi.org/%s' % doi) + print("http://dx.doi.org/%s" % doi) print(json_data) raise e authors_list = [] - for author_data in json_data['author']: + for author_data in json_data["author"]: try: - if 'family' in author_data: - authors_list.append('%s %s' % (author_data["family"], author_data.get("given", ""))) + if "family" in author_data: + authors_list.append( + "%s %s" % (author_data["family"], author_data.get("given", "")) + ) else: - authors_list.append(author_data['name']) + authors_list.append(author_data["name"]) except KeyError as e: - print('http://dx.doi.org/%s' % doi) + print("http://dx.doi.org/%s" % doi) print(json_data) raise e - authors = ', '.join(authors_list) - return {'title': title, - 'journal_name': journal_name, - 'biblio_year': biblio_year, - 'authors_list': authors} + authors = ", ".join(authors_list) + return { + "title": title, + "journal_name": journal_name, + "biblio_year": biblio_year, + "authors_list": authors, + } def get_uniprot_info(uniprot_id): @@ -212,66 +247,76 @@ def get_uniprot_info(uniprot_id): :param uniprot_id: Uniprot ID :type uniprot_id: str - :return: protein metadata (recommended name, organism, gene, entry name, short name, molecular functions). + :return: protein metadata (recommended name, organism, gene, entry name, + short name, molecular functions). :rtype: dict """ uniprot_client = UniProt() - ns = {'u': 'http://uniprot.org/uniprot'} + ns = {"u": "http://uniprot.org/uniprot"} try: resp = uniprot_client.retrieve(uniprot_id) except TypeError: raise EntryNotFoundError(uniprot_id) - if resp.root == '': + if resp.root == "": raise EntryNotFoundError(uniprot_id) try: recommended_name = resp.root.findall( - 'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text + "u:entry/u:protein/u:recommendedName/u:fullName", ns + )[0].text except: recommended_name = None try: recommended_short_name = resp.root.findall( - 'u:entry/u:protein/u:recommendedName/u:shortName', ns)[0].text + "u:entry/u:protein/u:recommendedName/u:shortName", ns + )[0].text except: recommended_short_name = None organism = resp.root.findall( - 'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns)[0].attrib['id'] + 'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns + )[0].attrib["id"] gene_names = [] - for el in resp.root.findall('u:entry/u:gene/u:name', ns): - gene_name = {'name': el.text, 'type': el.attrib['type']} + for el in resp.root.findall("u:entry/u:gene/u:name", ns): + gene_name = {"name": el.text, "type": el.attrib["type"]} gene_names.append(gene_name) try: - gene_id = resp.root.findall( - 'u:entry/u:dbReference[@type="GeneID"]', ns)[0].attrib['id'] + gene_id = resp.root.findall('u:entry/u:dbReference[@type="GeneID"]', ns)[ + 0 + ].attrib["id"] except IndexError: gene_id = None - entry_name = resp.root.findall('u:entry/u:name', ns)[0].text + entry_name = resp.root.findall("u:entry/u:name", ns)[0].text go_els = resp.root.findall('u:entry/u:dbReference[@type="GO"]', ns) - accessions = [el.text for el in resp.root.findall('u:entry/u:accession', ns)] + accessions = [el.text for el in resp.root.findall("u:entry/u:accession", ns)] molecular_functions = [] cellular_localisations = [] biological_processes = [] for go_el in go_els: - term_property_value = go_el.findall( - 'u:property[@type="term"]', ns)[0].attrib['value'] - if term_property_value[0:2] == 'F:': - molecular_functions.append('GO_' + go_el.attrib['id'][3:]) - if term_property_value[0:2] == 'C:': - cellular_localisations.append('GO_' + go_el.attrib['id'][3:]) - if term_property_value[0:2] == 'P:': - biological_processes.append('GO_' + go_el.attrib['id'][3:]) + term_property_value = go_el.findall('u:property[@type="term"]', ns)[0].attrib[ + "value" + ] + if term_property_value[0:2] == "F:": + molecular_functions.append("GO_" + go_el.attrib["id"][3:]) + if term_property_value[0:2] == "C:": + cellular_localisations.append("GO_" + go_el.attrib["id"][3:]) + if term_property_value[0:2] == "P:": + biological_processes.append("GO_" + go_el.attrib["id"][3:]) citations = [] - for el in resp.root.findall('u:entry/u:reference', ns): + for el in resp.root.findall("u:entry/u:reference", ns): try: - doi = el.findall('u:citation/u:dbReference[@type="DOI"]', ns)[0].attrib['id'] - pmid = el.findall('u:citation/u:dbReference[@type="PubMed"]', ns)[0].attrib['id'] - citations.append({'doi': doi, 'pmid': pmid}) + doi = el.findall('u:citation/u:dbReference[@type="DOI"]', ns)[0].attrib[ + "id" + ] + pmid = el.findall('u:citation/u:dbReference[@type="PubMed"]', ns)[0].attrib[ + "id" + ] + citations.append({"doi": doi, "pmid": pmid}) except IndexError: continue alternative_names = [] - for el in resp.root.findall('u:entry/u:protein/u:alternativeName', ns): - alternative_name = {'full': el.findall('u:fullName', ns)[0].text} - if el.findall('u:shortName', ns): - alternative_name['short'] = el.findall('u:shortName', ns)[0].text + for el in resp.root.findall("u:entry/u:protein/u:alternativeName", ns): + alternative_name = {"full": el.findall("u:fullName", ns)[0].text} + if el.findall("u:shortName", ns): + alternative_name["short"] = el.findall("u:shortName", ns)[0].text alternative_names.append(alternative_name) db_references = resp.root.findall('u:entry/u:dbReference[@type="Pfam"]', ns) @@ -280,21 +325,22 @@ def get_uniprot_info(uniprot_id): name = db_reference.attrib["id"] domains.append(name) - return {'recommended_name': recommended_name, - 'recommended_short_name': recommended_short_name, - 'organism': int(organism), - 'gene_id': int(gene_id) if gene_id else None, - 'accessions': accessions, - 'gene_names': gene_names, - 'entry_name': entry_name, - 'short_name': entry_name.split('_')[0], - 'molecular_functions': molecular_functions, - 'domains': domains, - 'cellular_localisations': cellular_localisations, - 'biological_processes': biological_processes, - 'citations': citations, - 'alternative_names': alternative_names - } + return { + "recommended_name": recommended_name, + "recommended_short_name": recommended_short_name, + "organism": int(organism), + "gene_id": int(gene_id) if gene_id else None, + "accessions": accessions, + "gene_names": gene_names, + "entry_name": entry_name, + "short_name": entry_name.split("_")[0], + "molecular_functions": molecular_functions, + "domains": domains, + "cellular_localisations": cellular_localisations, + "biological_processes": biological_processes, + "citations": citations, + "alternative_names": alternative_names, + } def get_go_info(go_id): @@ -307,11 +353,12 @@ def get_go_info(go_id): :rtype: dict """ resp = requests.get( - 'https://www.ebi.ac.uk/ols/api/ontologies/go/terms/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252F{}'.format( - go_id)) + f"https://www.ebi.ac.uk/ols/api/ontologies/go/terms/" + f"http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252F{go_id}" + ) data = resp.json() - label = data['label'] - return {'label': label} + label = data["label"] + return {"label": label} def get_taxonomy_info(taxonomy_id): @@ -324,9 +371,9 @@ def get_taxonomy_info(taxonomy_id): :rtype: dict """ eu = EUtils() - r = eu.EFetch('taxonomy', taxonomy_id, retmode='dict') - scientific_name = r['TaxaSet']['Taxon']['ScientificName'] - return {'scientific_name': scientific_name} + r = eu.EFetch("taxonomy", taxonomy_id, retmode="dict") + scientific_name = r["TaxaSet"]["Taxon"]["ScientificName"] + return {"scientific_name": scientific_name} def get_pfam_info(pfam_acc): @@ -338,15 +385,13 @@ def get_pfam_info(pfam_acc): :return: Protein family metadata (id, description). :rtype: dict """ - resp = requests.get( - 'https://pfam.xfam.org/family/{}?output=xml'.format(pfam_acc)) + resp = requests.get("https://pfam.xfam.org/family/{}?output=xml".format(pfam_acc)) root = ET.fromstring(resp.text) - ns = {'pfam': 'https://pfam.xfam.org/'} - entry = root.findall('pfam:entry', ns)[0] - pfam_id = entry.attrib['id'] - description = entry.findall('pfam:description', ns)[0].text.strip() - return {'id': pfam_id, - 'description': description} + ns = {"pfam": "https://pfam.xfam.org/"} + entry = root.findall("pfam:entry", ns)[0] + pfam_id = entry.attrib["id"] + description = entry.findall("pfam:description", ns)[0].text.strip() + return {"id": pfam_id, "description": description} def get_pdb_uniprot_mapping(pdb_id): @@ -360,10 +405,11 @@ def get_pdb_uniprot_mapping(pdb_id): """ pdb_id = pdb_id.lower() resp = requests.get( - 'https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{}'.format(pdb_id.lower())) + "https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{}".format(pdb_id.lower()) + ) if resp.status_code != 200: raise EntryNotFoundError(pdb_id, resp.status_code) - uniprot_ids = list(resp.json()[pdb_id]['UniProt'].keys()) + uniprot_ids = list(resp.json()[pdb_id]["UniProt"].keys()) return uniprot_ids @@ -378,7 +424,8 @@ def pdb_entry_exists(pdb_id): """ """ """ resp = requests.get( - 'https://www.ebi.ac.uk/pdbe/api/pdb/entry/summary/{}'.format(pdb_id.lower())) + "https://www.ebi.ac.uk/pdbe/api/pdb/entry/summary/{}".format(pdb_id.lower()) + ) # EBI sends back either a 404 or an empty json if the PDB does not exist if not (resp.ok): return False @@ -394,7 +441,8 @@ def convert_iupac_to_smiles_and_inchi(iupac): if iupac is None: raise EntryNotFoundError(iupac, 422) resp = requests.get( - 'https://opsin.ch.cam.ac.uk/opsin/{}'.format(urllib_parse.quote(iupac, safe=''))) + "https://opsin.ch.cam.ac.uk/opsin/{}".format(urllib_parse.quote(iupac, safe="")) + ) if resp.status_code != 200: raise EntryNotFoundError(iupac, resp.status_code) ret = resp.json() diff --git a/ippisite/requirements-dev.txt b/ippisite/requirements-dev.txt index d2531e117fa2ec0f344fd73b065a59b05906c116..29824591a52c9b85cff19c09d2bdda3e1e8a042c 100644 --- a/ippisite/requirements-dev.txt +++ b/ippisite/requirements-dev.txt @@ -1,4 +1,7 @@ -r requirements-core.txt +# linting and formatting +flake8 +black # documentation generation sphinx sphinx_rtd_theme