diff --git a/ippisite/ippidb/models.py b/ippisite/ippidb/models.py index c254f2967866aa0006d4fedb0c5709edae5a6e2f..6714580e6e21422f3d9c4430640067fb9d76432a 100644 --- a/ippisite/ippidb/models.py +++ b/ippisite/ippidb/models.py @@ -188,6 +188,7 @@ class Protein(AutoFillableModel): entry_name = models.CharField('Entry name', max_length=30) organism = models.ForeignKey('Taxonomy', models.CASCADE) molecular_functions = models.ManyToManyField(MolecularFunction) + domains = models.ManyToManyField('Domain') @transaction.atomic def autofill(self): @@ -208,6 +209,7 @@ class Protein(AutoFillableModel): taxonomy.save(autofill=True) self.organism = taxonomy super(Protein, self).save() + for go_id in info['molecular_functions']: try: mol_function = MolecularFunction.objects.get(go_id=go_id) @@ -217,6 +219,15 @@ class Protein(AutoFillableModel): mol_function.save(autofill=True) self.molecular_functions.add(mol_function) + for domain_id in info['domains']: + try: + domain = Domain.objects.get(pfam_acc=domain_id) + except MolecularFunction.DoesNotExist: + domain = Domain() + domain.pfam_acc = domain_id + domain.save(autofill=True) + self.domains.add(domain) + def is_autofill_done(self): return len(self.gene_name) > 0 @@ -231,7 +242,7 @@ class Domain(AutoFillableModel): pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True) pfam_id = models.CharField('Pfam Family Identifier', max_length=20) pfam_description = models.CharField('Pfam Description', max_length=100) - domain_family = models.CharField('Domain family', max_length=25) + domain_family = models.CharField('Domain family', max_length=25, blank=True, default="") # TODO: what is this field? check database # contents diff --git a/ippisite/ippidb/tests.py b/ippisite/ippidb/tests.py index 5fc938e61a76b2964a8eeab772b14e58a057d1e0..5b0ff4b7017fb1f64046ba28d0f24b7b75c29449 100644 --- a/ippisite/ippidb/tests.py +++ b/ippisite/ippidb/tests.py @@ -4,15 +4,18 @@ iPPI-DB unit tests import re +from django.core.management import call_command from django.test import TestCase from django.urls import reverse -from django.core.management import call_command from openbabel import vectorUnsignedInt, OBFingerprint +from ippidb import ws +from ippidb.ws import get_uniprot_info from .models import Compound, CompoundTanimoto, create_tanimoto -from .models import DrugBankCompound, DrugbankCompoundTanimoto +from .models import DrugBankCompound from .utils import FingerPrinter, mol2smi, smi2mol, smi2inchi, smi2inchikey + class MolSmiTestCase(TestCase): """ Test MOL to SMILES and SMILES to MOL format conversion functions @@ -322,3 +325,71 @@ class QuestionCompoundViews(TestCase): url = reverse('compound_list') response = self.client.get(url) self.assertEqual(response.status_code, 200) + + +class TestGetUniprotInfo(TestCase): + """ + Test retrieving information for a uniprot entry + """ + + def test_get_uniprot_info(self): + resp = get_uniprot_info('Q15286') + self.assertEqual(resp['recommended_name'],'Ras-related protein Rab-35') + self.assertEqual(resp['organism'],9606) + self.assertEqual(resp['gene_id'],11021) + exp_gene_names = [{'name': 'RAB35', 'type': 'primary'}, {'name': 'RAB1C', 'type': 'synonym'}, {'name': 'RAY', 'type': 'synonym'}] + self.assertEqual(sorted(resp['gene_names'], key=lambda k: k['name']), sorted(exp_gene_names, key=lambda k: k['name'])) + self.assertEqual(resp['entry_name'],'RAB35_HUMAN') + self.assertEqual(resp['short_name'],'RAB35') + exp_molecular_functions = ['GO_0003924', 'GO_0005525', 'GO_0005546', 'GO_0019003'] + self.assertEqual(sorted(resp['molecular_functions']),sorted(exp_molecular_functions)) + exp_cellular_localisations = ['GO_0000139', 'GO_0005829', 'GO_0005886', 'GO_0005905', 'GO_0010008', + 'GO_0030665', 'GO_0031253', 'GO_0042470', 'GO_0045171', 'GO_0045334', + 'GO_0055038', 'GO_0070062', 'GO_0098993'] + self.assertEqual(sorted(resp['cellular_localisations']),sorted(exp_cellular_localisations)) + exp_biological_processes = ['GO_0000281', 'GO_0006886', 'GO_0008104', 'GO_0016197', + 'GO_0019882', 'GO_0031175', 'GO_0032456', 'GO_0032482', + 'GO_0036010', 'GO_0048227', 'GO_1990090'] + self.assertEqual(sorted(resp['biological_processes']),sorted(exp_biological_processes)) + exp_accessions = ['Q15286', 'B2R6E0', 'B4E390'] + self.assertEqual(sorted(resp['accessions']),sorted(exp_accessions)) + exp_citations = [ + {'doi': '10.1006/bbrc.1994.2889', 'pmid': '7811277'}, + {'doi': '10.1038/ng1285', 'pmid': '14702039'}, + {'doi': '10.1038/nature04569', 'pmid': '16541075'}, + {'doi': '10.1101/gr.2596504', 'pmid': '15489334'}, + {'doi': '10.1016/j.cub.2006.07.020', 'pmid': '16950109'}, + {'doi': '10.1021/pr060363j', 'pmid': '17081065'}, + {'doi': '10.1074/jbc.M109.050930', 'pmid': '20154091'}, + {'doi': '10.1186/1752-0509-5-17', 'pmid': '21269460'}, + {'doi': '10.1038/nature10335', 'pmid': '21822290'}, + {'doi': '10.1038/emboj.2012.16', 'pmid': '22307087'}, + {'doi': '10.1111/j.1600-0854.2011.01294.x', 'pmid': '21951725'}, + {'doi': '10.1021/pr300630k', 'pmid': '23186163'}, + {'doi': '10.1002/pmic.201400617', 'pmid': '25944712'}, + {'doi': '10.1073/pnas.1110415108', 'pmid': '22065758'} + ] + self.assertEqual(sorted(exp_citations, key=lambda k: k['pmid']), sorted(resp['citations'], key=lambda k: k['pmid'])) + exp_alternative_names = [{'full': 'GTP-binding protein RAY'}, {'full': 'Ras-related protein Rab-1C'}] + self.assertEqual(sorted(exp_alternative_names, key=lambda k: k['full']), sorted(resp['alternative_names'], key=lambda k: k['full'])) + + def test_get_uniprot_info_domains(self): + resp = get_uniprot_info('O00255') + exp_domains = ['PF05053'] + self.assertEqual(sorted(resp['domains']), sorted(exp_domains)) + + +class TestGetPDBUniProtMapping(TestCase): + """ + Test retrieving protein for a PDB entry + """ + + def test_find_info(self): + target = sorted(['Q03164', 'O00255']) + resp = ws.get_pdb_uniprot_mapping('3u85') + resp = sorted(resp) + self.assertEqual(resp, target) + self.assertEqual(len(resp), len(set(resp))) + + def test_entry_not_found(self): + self.assertRaises(ws.EntryNotFoundError, ws.get_pdb_uniprot_mapping, 'Xu85') diff --git a/ippisite/ippidb/ws.py b/ippisite/ippidb/ws.py index ae59dcdc87a4e86c0ac7fe91dd223614b2dbb413..67ccbeab34e0b9d8f43214a1596333ad22ec0de1 100644 --- a/ippisite/ippidb/ws.py +++ b/ippisite/ippidb/ws.py @@ -1,6 +1,7 @@ """ iPPI-DB web-service client utility functions """ + import json import xml.etree.ElementTree as ET @@ -10,7 +11,14 @@ from bioservices.uniprot import UniProt from bs4 import BeautifulSoup -class BibliographicalEntryNotFound(Exception): +class EntryNotFoundError(Exception): + + def __init__(self, entry_id): + self.entry_id = entry_id + super().__init__('%s not found' % entry_id) + + +class BibliographicalEntryNotFound(EntryNotFoundError): pass @@ -182,30 +190,83 @@ def get_uniprot_info(uniprot_id): """ uniprot_client = UniProt() ns = {'u': 'http://uniprot.org/uniprot'} - resp = uniprot_client.retrieve(uniprot_id) - f = open('/tmp/'+uniprot_id+'.xml', 'w') - f.write(str(resp)) - f.close() - recommended_name = resp.root.findall( - 'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text + try: + resp = uniprot_client.retrieve(uniprot_id) + except TypeError: + raise EntryNotFoundError(uniprot_id) + if resp.root=='': + raise EntryNotFoundError(uniprot_id) + try: + recommended_name = resp.root.findall( + 'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text + except: + recommended_name = None + try: + recommended_short_name = resp.root.findall( + 'u:entry/u:protein/u:recommendedName/u:shortName', ns)[0].text + except: + recommended_short_name = None organism = resp.root.findall( 'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns)[0].attrib['id'] - gene = resp.root.findall( - 'u:entry/u:gene/u:name[@type="primary"]', ns)[0].text + gene_names = [] + for el in resp.root.findall('u:entry/u:gene/u:name', ns): + gene_name = {'name':el.text, 'type':el.attrib['type']} + gene_names.append(gene_name) + try: + gene_id = resp.root.findall( + 'u:entry/u:dbReference[@type="GeneID"]', ns)[0].attrib['id'] + except IndexError: + gene_id = None entry_name = resp.root.findall('u:entry/u:name', ns)[0].text go_els = resp.root.findall('u:entry/u:dbReference[@type="GO"]', ns) + accessions = [el.text for el in resp.root.findall('u:entry/u:accession', ns)] molecular_functions = [] + cellular_localisations = [] + biological_processes = [] for go_el in go_els: term_property_value = go_el.findall( 'u:property[@type="term"]', ns)[0].attrib['value'] if term_property_value[0:2] == 'F:': molecular_functions.append('GO_' + go_el.attrib['id'][3:]) + if term_property_value[0:2] == 'C:': + cellular_localisations.append('GO_' + go_el.attrib['id'][3:]) + if term_property_value[0:2] == 'P:': + biological_processes.append('GO_' + go_el.attrib['id'][3:]) + citations = [] + for el in resp.root.findall('u:entry/u:reference', ns): + try: + doi = el.findall('u:citation/u:dbReference[@type="DOI"]', ns)[0].attrib['id'] + pmid = el.findall('u:citation/u:dbReference[@type="PubMed"]', ns)[0].attrib['id'] + citations.append({'doi':doi, 'pmid':pmid}) + except IndexError: + continue + alternative_names = [] + for el in resp.root.findall('u:entry/u:protein/u:alternativeName', ns): + alternative_name = {'full':el.findall('u:fullName',ns)[0].text} + if el.findall('u:shortName', ns): + alternative_name['short'] = el.findall('u:shortName', ns)[0].text + alternative_names.append(alternative_name) + + db_references = resp.root.findall('u:entry/u:dbReference[@type="Pfam"]', ns) + domains = [] + for db_reference in db_references: + name = db_reference.attrib["id"] + domains.append(name) + return {'recommended_name': recommended_name, + 'recommended_short_name': recommended_short_name, 'organism': int(organism), - 'gene': gene, + 'gene_id': int(gene_id) if gene_id else None, + 'accessions': accessions, + 'gene_names': gene_names, 'entry_name': entry_name, 'short_name': entry_name.split('_')[0], - 'molecular_functions': molecular_functions + 'molecular_functions': molecular_functions, + 'domains': domains, + 'cellular_localisations': cellular_localisations, + 'biological_processes': biological_processes, + 'citations': citations, + 'alternative_names': alternative_names } @@ -272,6 +333,8 @@ def get_pdb_uniprot_mapping(pdb_id): pdb_id = pdb_id.lower() resp = requests.get( 'https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{}'.format(pdb_id.lower())) + if resp.status_code != 200: + raise EntryNotFoundError(pdb_id) uniprot_ids = list(resp.json()[pdb_id]['UniProt'].keys()) return uniprot_ids