Commit fb193e98 authored by Bryan  BRANCOTTE's avatar Bryan BRANCOTTE
Browse files

Importing domains when auto filling a protein, and adding a link between protein and domain

Importing test TestGetUniprotInfo from flemmingsomedb
Adding test to ensure that domain is retrieved
WIP #35
parent 1ffb130d
......@@ -174,6 +174,7 @@ class Protein(AutoFillableModel):
entry_name = models.CharField('Entry name', max_length=30)
organism = models.ForeignKey('Taxonomy', models.CASCADE)
molecular_functions = models.ManyToManyField(MolecularFunction)
domains = models.ManyToManyField('Domain')
def autofill(self):
"""
......@@ -193,6 +194,7 @@ class Protein(AutoFillableModel):
taxonomy.save(autofill=True)
self.organism = taxonomy
super(Protein, self).save()
for go_id in info['molecular_functions']:
try:
mol_function = MolecularFunction.objects.get(go_id=go_id)
......@@ -202,6 +204,15 @@ class Protein(AutoFillableModel):
mol_function.save(autofill=True)
self.molecular_functions.add(mol_function)
for domain_id in info['domains']:
try:
domain = Domain.objects.get(pfam_acc=domain_id)
except MolecularFunction.DoesNotExist:
domain = Domain()
domain.pfam_acc = domain_id
domain.save(autofill=True)
self.domains.add(domain)
def __str__(self):
return '{} ({})'.format(self.uniprot_id, self.recommended_name_long)
......@@ -213,7 +224,7 @@ class Domain(AutoFillableModel):
pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
pfam_description = models.CharField('Pfam Description', max_length=100)
domain_family = models.CharField('Domain family', max_length=25)
domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
# TODO: what is this field? check database
# contents
......
......@@ -4,15 +4,18 @@ iPPI-DB unit tests
import re
from django.core.management import call_command
from django.test import TestCase
from django.urls import reverse
from django.core.management import call_command
from openbabel import vectorUnsignedInt, OBFingerprint
from ippidb import ws
from ippidb.ws import get_uniprot_info
from .models import Compound, CompoundTanimoto, create_tanimoto
from .models import DrugBankCompound, DrugbankCompoundTanimoto
from .models import DrugBankCompound
from .utils import FingerPrinter, mol2smi, smi2mol, smi2inchi, smi2inchikey
class MolSmiTestCase(TestCase):
"""
Test MOL to SMILES and SMILES to MOL format conversion functions
......@@ -322,3 +325,71 @@ class QuestionCompoundViews(TestCase):
url = reverse('compound_list')
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
class TestGetUniprotInfo(TestCase):
"""
Test retrieving information for a uniprot entry
"""
def test_get_uniprot_info(self):
resp = get_uniprot_info('Q15286')
self.assertEqual(resp['recommended_name'],'Ras-related protein Rab-35')
self.assertEqual(resp['organism'],9606)
self.assertEqual(resp['gene_id'],11021)
exp_gene_names = [{'name': 'RAB35', 'type': 'primary'}, {'name': 'RAB1C', 'type': 'synonym'}, {'name': 'RAY', 'type': 'synonym'}]
self.assertEqual(sorted(resp['gene_names'], key=lambda k: k['name']), sorted(exp_gene_names, key=lambda k: k['name']))
self.assertEqual(resp['entry_name'],'RAB35_HUMAN')
self.assertEqual(resp['short_name'],'RAB35')
exp_molecular_functions = ['GO_0003924', 'GO_0005525', 'GO_0005546', 'GO_0019003']
self.assertEqual(sorted(resp['molecular_functions']),sorted(exp_molecular_functions))
exp_cellular_localisations = ['GO_0000139', 'GO_0005829', 'GO_0005886', 'GO_0005905', 'GO_0010008',
'GO_0030665', 'GO_0031253', 'GO_0042470', 'GO_0045171', 'GO_0045334',
'GO_0055038', 'GO_0070062', 'GO_0098993']
self.assertEqual(sorted(resp['cellular_localisations']),sorted(exp_cellular_localisations))
exp_biological_processes = ['GO_0000281', 'GO_0006886', 'GO_0008104', 'GO_0016197',
'GO_0019882', 'GO_0031175', 'GO_0032456', 'GO_0032482',
'GO_0036010', 'GO_0048227', 'GO_1990090']
self.assertEqual(sorted(resp['biological_processes']),sorted(exp_biological_processes))
exp_accessions = ['Q15286', 'B2R6E0', 'B4E390']
self.assertEqual(sorted(resp['accessions']),sorted(exp_accessions))
exp_citations = [
{'doi': '10.1006/bbrc.1994.2889', 'pmid': '7811277'},
{'doi': '10.1038/ng1285', 'pmid': '14702039'},
{'doi': '10.1038/nature04569', 'pmid': '16541075'},
{'doi': '10.1101/gr.2596504', 'pmid': '15489334'},
{'doi': '10.1016/j.cub.2006.07.020', 'pmid': '16950109'},
{'doi': '10.1021/pr060363j', 'pmid': '17081065'},
{'doi': '10.1074/jbc.M109.050930', 'pmid': '20154091'},
{'doi': '10.1186/1752-0509-5-17', 'pmid': '21269460'},
{'doi': '10.1038/nature10335', 'pmid': '21822290'},
{'doi': '10.1038/emboj.2012.16', 'pmid': '22307087'},
{'doi': '10.1111/j.1600-0854.2011.01294.x', 'pmid': '21951725'},
{'doi': '10.1021/pr300630k', 'pmid': '23186163'},
{'doi': '10.1002/pmic.201400617', 'pmid': '25944712'},
{'doi': '10.1073/pnas.1110415108', 'pmid': '22065758'}
]
self.assertEqual(sorted(exp_citations, key=lambda k: k['pmid']), sorted(resp['citations'], key=lambda k: k['pmid']))
exp_alternative_names = [{'full': 'GTP-binding protein RAY'}, {'full': 'Ras-related protein Rab-1C'}]
self.assertEqual(sorted(exp_alternative_names, key=lambda k: k['full']), sorted(resp['alternative_names'], key=lambda k: k['full']))
def test_get_uniprot_info_domains(self):
resp = get_uniprot_info('O00255')
exp_domains = ['PF05053']
self.assertEqual(sorted(resp['domains']), sorted(exp_domains))
class TestGetPDBUniProtMapping(TestCase):
"""
Test retrieving protein for a PDB entry
"""
def test_find_info(self):
target = sorted(['Q03164', 'O00255'])
resp = ws.get_pdb_uniprot_mapping('3u85')
resp = sorted(resp)
self.assertEqual(resp, target)
self.assertEqual(len(resp), len(set(resp)))
def test_entry_not_found(self):
self.assertRaises(ws.EntryNotFoundError, ws.get_pdb_uniprot_mapping, 'Xu85')
......@@ -2,13 +2,21 @@
iPPI-DB web-service client utility functions
"""
from bioservices.eutils import EUtils
from bioservices.uniprot import UniProt
import xml.etree.ElementTree as ET
import requests
from bioservices.eutils import EUtils
from bioservices.uniprot import UniProt
from bs4 import BeautifulSoup
class EntryNotFoundError(Exception):
def __init__(self, entry_id):
self.entry_id = entry_id
super().__init__('%s not found' % entry_id)
def get_pubmed_info(pmid):
"""
Retrieve information about a publication from NCBI PubMed
......@@ -133,30 +141,83 @@ def get_uniprot_info(uniprot_id):
"""
uniprot_client = UniProt()
ns = {'u': 'http://uniprot.org/uniprot'}
resp = uniprot_client.retrieve(uniprot_id)
f = open('/tmp/'+uniprot_id+'.xml', 'w')
f.write(str(resp))
f.close()
recommended_name = resp.root.findall(
'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text
try:
resp = uniprot_client.retrieve(uniprot_id)
except TypeError:
raise EntryNotFoundError(uniprot_id)
if resp.root=='':
raise EntryNotFoundError(uniprot_id)
try:
recommended_name = resp.root.findall(
'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text
except:
recommended_name = None
try:
recommended_short_name = resp.root.findall(
'u:entry/u:protein/u:recommendedName/u:shortName', ns)[0].text
except:
recommended_short_name = None
organism = resp.root.findall(
'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns)[0].attrib['id']
gene = resp.root.findall(
'u:entry/u:gene/u:name[@type="primary"]', ns)[0].text
gene_names = []
for el in resp.root.findall('u:entry/u:gene/u:name', ns):
gene_name = {'name':el.text, 'type':el.attrib['type']}
gene_names.append(gene_name)
try:
gene_id = resp.root.findall(
'u:entry/u:dbReference[@type="GeneID"]', ns)[0].attrib['id']
except IndexError:
gene_id = None
entry_name = resp.root.findall('u:entry/u:name', ns)[0].text
go_els = resp.root.findall('u:entry/u:dbReference[@type="GO"]', ns)
accessions = [el.text for el in resp.root.findall('u:entry/u:accession', ns)]
molecular_functions = []
cellular_localisations = []
biological_processes = []
for go_el in go_els:
term_property_value = go_el.findall(
'u:property[@type="term"]', ns)[0].attrib['value']
if term_property_value[0:2] == 'F:':
molecular_functions.append('GO_' + go_el.attrib['id'][3:])
if term_property_value[0:2] == 'C:':
cellular_localisations.append('GO_' + go_el.attrib['id'][3:])
if term_property_value[0:2] == 'P:':
biological_processes.append('GO_' + go_el.attrib['id'][3:])
citations = []
for el in resp.root.findall('u:entry/u:reference', ns):
try:
doi = el.findall('u:citation/u:dbReference[@type="DOI"]', ns)[0].attrib['id']
pmid = el.findall('u:citation/u:dbReference[@type="PubMed"]', ns)[0].attrib['id']
citations.append({'doi':doi, 'pmid':pmid})
except IndexError:
continue
alternative_names = []
for el in resp.root.findall('u:entry/u:protein/u:alternativeName', ns):
alternative_name = {'full':el.findall('u:fullName',ns)[0].text}
if el.findall('u:shortName', ns):
alternative_name['short'] = el.findall('u:shortName', ns)[0].text
alternative_names.append(alternative_name)
db_references = resp.root.findall('u:entry/u:dbReference[@type="Pfam"]', ns)
domains = []
for db_reference in db_references:
name = db_reference.attrib["id"]
domains.append(name)
return {'recommended_name': recommended_name,
'recommended_short_name': recommended_short_name,
'organism': int(organism),
'gene': gene,
'gene_id': int(gene_id) if gene_id else None,
'accessions': accessions,
'gene_names': gene_names,
'entry_name': entry_name,
'short_name': entry_name.split('_')[0],
'molecular_functions': molecular_functions
'molecular_functions': molecular_functions,
'domains': domains,
'cellular_localisations': cellular_localisations,
'biological_processes': biological_processes,
'citations': citations,
'alternative_names': alternative_names
}
......@@ -223,6 +284,8 @@ def get_pdb_uniprot_mapping(pdb_id):
pdb_id = pdb_id.lower()
resp = requests.get(
'https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{}'.format(pdb_id.lower()))
if resp.status_code != 200:
raise EntryNotFoundError(pdb_id)
uniprot_ids = list(resp.json()[pdb_id]['UniProt'].keys())
return uniprot_ids
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment