Skip to content
Snippets Groups Projects
Commit 362eb0bd authored by Bryan BRANCOTTE's avatar Bryan BRANCOTTE
Browse files

Merge branch 'add_domains_to_proteins' into wizard_form

parents 58972433 f03747d3
No related branches found
No related tags found
1 merge request!1Wizard form
...@@ -188,6 +188,7 @@ class Protein(AutoFillableModel): ...@@ -188,6 +188,7 @@ class Protein(AutoFillableModel):
entry_name = models.CharField('Entry name', max_length=30) entry_name = models.CharField('Entry name', max_length=30)
organism = models.ForeignKey('Taxonomy', models.CASCADE) organism = models.ForeignKey('Taxonomy', models.CASCADE)
molecular_functions = models.ManyToManyField(MolecularFunction) molecular_functions = models.ManyToManyField(MolecularFunction)
domains = models.ManyToManyField('Domain')
@transaction.atomic @transaction.atomic
def autofill(self): def autofill(self):
...@@ -208,6 +209,7 @@ class Protein(AutoFillableModel): ...@@ -208,6 +209,7 @@ class Protein(AutoFillableModel):
taxonomy.save(autofill=True) taxonomy.save(autofill=True)
self.organism = taxonomy self.organism = taxonomy
super(Protein, self).save() super(Protein, self).save()
for go_id in info['molecular_functions']: for go_id in info['molecular_functions']:
try: try:
mol_function = MolecularFunction.objects.get(go_id=go_id) mol_function = MolecularFunction.objects.get(go_id=go_id)
...@@ -217,6 +219,15 @@ class Protein(AutoFillableModel): ...@@ -217,6 +219,15 @@ class Protein(AutoFillableModel):
mol_function.save(autofill=True) mol_function.save(autofill=True)
self.molecular_functions.add(mol_function) self.molecular_functions.add(mol_function)
for domain_id in info['domains']:
try:
domain = Domain.objects.get(pfam_acc=domain_id)
except MolecularFunction.DoesNotExist:
domain = Domain()
domain.pfam_acc = domain_id
domain.save(autofill=True)
self.domains.add(domain)
def is_autofill_done(self): def is_autofill_done(self):
return len(self.gene_name) > 0 return len(self.gene_name) > 0
...@@ -231,7 +242,7 @@ class Domain(AutoFillableModel): ...@@ -231,7 +242,7 @@ class Domain(AutoFillableModel):
pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True) pfam_acc = models.CharField('Pfam Accession', max_length=10, unique=True)
pfam_id = models.CharField('Pfam Family Identifier', max_length=20) pfam_id = models.CharField('Pfam Family Identifier', max_length=20)
pfam_description = models.CharField('Pfam Description', max_length=100) pfam_description = models.CharField('Pfam Description', max_length=100)
domain_family = models.CharField('Domain family', max_length=25) domain_family = models.CharField('Domain family', max_length=25, blank=True, default="")
# TODO: what is this field? check database # TODO: what is this field? check database
# contents # contents
......
...@@ -4,15 +4,18 @@ iPPI-DB unit tests ...@@ -4,15 +4,18 @@ iPPI-DB unit tests
import re import re
from django.core.management import call_command
from django.test import TestCase from django.test import TestCase
from django.urls import reverse from django.urls import reverse
from django.core.management import call_command
from openbabel import vectorUnsignedInt, OBFingerprint from openbabel import vectorUnsignedInt, OBFingerprint
from ippidb import ws
from ippidb.ws import get_uniprot_info
from .models import Compound, CompoundTanimoto, create_tanimoto from .models import Compound, CompoundTanimoto, create_tanimoto
from .models import DrugBankCompound, DrugbankCompoundTanimoto from .models import DrugBankCompound
from .utils import FingerPrinter, mol2smi, smi2mol, smi2inchi, smi2inchikey from .utils import FingerPrinter, mol2smi, smi2mol, smi2inchi, smi2inchikey
class MolSmiTestCase(TestCase): class MolSmiTestCase(TestCase):
""" """
Test MOL to SMILES and SMILES to MOL format conversion functions Test MOL to SMILES and SMILES to MOL format conversion functions
...@@ -322,3 +325,71 @@ class QuestionCompoundViews(TestCase): ...@@ -322,3 +325,71 @@ class QuestionCompoundViews(TestCase):
url = reverse('compound_list') url = reverse('compound_list')
response = self.client.get(url) response = self.client.get(url)
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
class TestGetUniprotInfo(TestCase):
"""
Test retrieving information for a uniprot entry
"""
def test_get_uniprot_info(self):
resp = get_uniprot_info('Q15286')
self.assertEqual(resp['recommended_name'],'Ras-related protein Rab-35')
self.assertEqual(resp['organism'],9606)
self.assertEqual(resp['gene_id'],11021)
exp_gene_names = [{'name': 'RAB35', 'type': 'primary'}, {'name': 'RAB1C', 'type': 'synonym'}, {'name': 'RAY', 'type': 'synonym'}]
self.assertEqual(sorted(resp['gene_names'], key=lambda k: k['name']), sorted(exp_gene_names, key=lambda k: k['name']))
self.assertEqual(resp['entry_name'],'RAB35_HUMAN')
self.assertEqual(resp['short_name'],'RAB35')
exp_molecular_functions = ['GO_0003924', 'GO_0005525', 'GO_0005546', 'GO_0019003']
self.assertEqual(sorted(resp['molecular_functions']),sorted(exp_molecular_functions))
exp_cellular_localisations = ['GO_0000139', 'GO_0005829', 'GO_0005886', 'GO_0005905', 'GO_0010008',
'GO_0030665', 'GO_0031253', 'GO_0042470', 'GO_0045171', 'GO_0045334',
'GO_0055038', 'GO_0070062', 'GO_0098993']
self.assertEqual(sorted(resp['cellular_localisations']),sorted(exp_cellular_localisations))
exp_biological_processes = ['GO_0000281', 'GO_0006886', 'GO_0008104', 'GO_0016197',
'GO_0019882', 'GO_0031175', 'GO_0032456', 'GO_0032482',
'GO_0036010', 'GO_0048227', 'GO_1990090']
self.assertEqual(sorted(resp['biological_processes']),sorted(exp_biological_processes))
exp_accessions = ['Q15286', 'B2R6E0', 'B4E390']
self.assertEqual(sorted(resp['accessions']),sorted(exp_accessions))
exp_citations = [
{'doi': '10.1006/bbrc.1994.2889', 'pmid': '7811277'},
{'doi': '10.1038/ng1285', 'pmid': '14702039'},
{'doi': '10.1038/nature04569', 'pmid': '16541075'},
{'doi': '10.1101/gr.2596504', 'pmid': '15489334'},
{'doi': '10.1016/j.cub.2006.07.020', 'pmid': '16950109'},
{'doi': '10.1021/pr060363j', 'pmid': '17081065'},
{'doi': '10.1074/jbc.M109.050930', 'pmid': '20154091'},
{'doi': '10.1186/1752-0509-5-17', 'pmid': '21269460'},
{'doi': '10.1038/nature10335', 'pmid': '21822290'},
{'doi': '10.1038/emboj.2012.16', 'pmid': '22307087'},
{'doi': '10.1111/j.1600-0854.2011.01294.x', 'pmid': '21951725'},
{'doi': '10.1021/pr300630k', 'pmid': '23186163'},
{'doi': '10.1002/pmic.201400617', 'pmid': '25944712'},
{'doi': '10.1073/pnas.1110415108', 'pmid': '22065758'}
]
self.assertEqual(sorted(exp_citations, key=lambda k: k['pmid']), sorted(resp['citations'], key=lambda k: k['pmid']))
exp_alternative_names = [{'full': 'GTP-binding protein RAY'}, {'full': 'Ras-related protein Rab-1C'}]
self.assertEqual(sorted(exp_alternative_names, key=lambda k: k['full']), sorted(resp['alternative_names'], key=lambda k: k['full']))
def test_get_uniprot_info_domains(self):
resp = get_uniprot_info('O00255')
exp_domains = ['PF05053']
self.assertEqual(sorted(resp['domains']), sorted(exp_domains))
class TestGetPDBUniProtMapping(TestCase):
"""
Test retrieving protein for a PDB entry
"""
def test_find_info(self):
target = sorted(['Q03164', 'O00255'])
resp = ws.get_pdb_uniprot_mapping('3u85')
resp = sorted(resp)
self.assertEqual(resp, target)
self.assertEqual(len(resp), len(set(resp)))
def test_entry_not_found(self):
self.assertRaises(ws.EntryNotFoundError, ws.get_pdb_uniprot_mapping, 'Xu85')
""" """
iPPI-DB web-service client utility functions iPPI-DB web-service client utility functions
""" """
import json import json
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
...@@ -10,7 +11,14 @@ from bioservices.uniprot import UniProt ...@@ -10,7 +11,14 @@ from bioservices.uniprot import UniProt
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
class BibliographicalEntryNotFound(Exception): class EntryNotFoundError(Exception):
def __init__(self, entry_id):
self.entry_id = entry_id
super().__init__('%s not found' % entry_id)
class BibliographicalEntryNotFound(EntryNotFoundError):
pass pass
...@@ -182,30 +190,83 @@ def get_uniprot_info(uniprot_id): ...@@ -182,30 +190,83 @@ def get_uniprot_info(uniprot_id):
""" """
uniprot_client = UniProt() uniprot_client = UniProt()
ns = {'u': 'http://uniprot.org/uniprot'} ns = {'u': 'http://uniprot.org/uniprot'}
resp = uniprot_client.retrieve(uniprot_id) try:
f = open('/tmp/'+uniprot_id+'.xml', 'w') resp = uniprot_client.retrieve(uniprot_id)
f.write(str(resp)) except TypeError:
f.close() raise EntryNotFoundError(uniprot_id)
recommended_name = resp.root.findall( if resp.root=='':
'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text raise EntryNotFoundError(uniprot_id)
try:
recommended_name = resp.root.findall(
'u:entry/u:protein/u:recommendedName/u:fullName', ns)[0].text
except:
recommended_name = None
try:
recommended_short_name = resp.root.findall(
'u:entry/u:protein/u:recommendedName/u:shortName', ns)[0].text
except:
recommended_short_name = None
organism = resp.root.findall( organism = resp.root.findall(
'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns)[0].attrib['id'] 'u:entry/u:organism/u:dbReference[@type="NCBI Taxonomy"]', ns)[0].attrib['id']
gene = resp.root.findall( gene_names = []
'u:entry/u:gene/u:name[@type="primary"]', ns)[0].text for el in resp.root.findall('u:entry/u:gene/u:name', ns):
gene_name = {'name':el.text, 'type':el.attrib['type']}
gene_names.append(gene_name)
try:
gene_id = resp.root.findall(
'u:entry/u:dbReference[@type="GeneID"]', ns)[0].attrib['id']
except IndexError:
gene_id = None
entry_name = resp.root.findall('u:entry/u:name', ns)[0].text entry_name = resp.root.findall('u:entry/u:name', ns)[0].text
go_els = resp.root.findall('u:entry/u:dbReference[@type="GO"]', ns) go_els = resp.root.findall('u:entry/u:dbReference[@type="GO"]', ns)
accessions = [el.text for el in resp.root.findall('u:entry/u:accession', ns)]
molecular_functions = [] molecular_functions = []
cellular_localisations = []
biological_processes = []
for go_el in go_els: for go_el in go_els:
term_property_value = go_el.findall( term_property_value = go_el.findall(
'u:property[@type="term"]', ns)[0].attrib['value'] 'u:property[@type="term"]', ns)[0].attrib['value']
if term_property_value[0:2] == 'F:': if term_property_value[0:2] == 'F:':
molecular_functions.append('GO_' + go_el.attrib['id'][3:]) molecular_functions.append('GO_' + go_el.attrib['id'][3:])
if term_property_value[0:2] == 'C:':
cellular_localisations.append('GO_' + go_el.attrib['id'][3:])
if term_property_value[0:2] == 'P:':
biological_processes.append('GO_' + go_el.attrib['id'][3:])
citations = []
for el in resp.root.findall('u:entry/u:reference', ns):
try:
doi = el.findall('u:citation/u:dbReference[@type="DOI"]', ns)[0].attrib['id']
pmid = el.findall('u:citation/u:dbReference[@type="PubMed"]', ns)[0].attrib['id']
citations.append({'doi':doi, 'pmid':pmid})
except IndexError:
continue
alternative_names = []
for el in resp.root.findall('u:entry/u:protein/u:alternativeName', ns):
alternative_name = {'full':el.findall('u:fullName',ns)[0].text}
if el.findall('u:shortName', ns):
alternative_name['short'] = el.findall('u:shortName', ns)[0].text
alternative_names.append(alternative_name)
db_references = resp.root.findall('u:entry/u:dbReference[@type="Pfam"]', ns)
domains = []
for db_reference in db_references:
name = db_reference.attrib["id"]
domains.append(name)
return {'recommended_name': recommended_name, return {'recommended_name': recommended_name,
'recommended_short_name': recommended_short_name,
'organism': int(organism), 'organism': int(organism),
'gene': gene, 'gene_id': int(gene_id) if gene_id else None,
'accessions': accessions,
'gene_names': gene_names,
'entry_name': entry_name, 'entry_name': entry_name,
'short_name': entry_name.split('_')[0], 'short_name': entry_name.split('_')[0],
'molecular_functions': molecular_functions 'molecular_functions': molecular_functions,
'domains': domains,
'cellular_localisations': cellular_localisations,
'biological_processes': biological_processes,
'citations': citations,
'alternative_names': alternative_names
} }
...@@ -272,6 +333,8 @@ def get_pdb_uniprot_mapping(pdb_id): ...@@ -272,6 +333,8 @@ def get_pdb_uniprot_mapping(pdb_id):
pdb_id = pdb_id.lower() pdb_id = pdb_id.lower()
resp = requests.get( resp = requests.get(
'https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{}'.format(pdb_id.lower())) 'https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{}'.format(pdb_id.lower()))
if resp.status_code != 200:
raise EntryNotFoundError(pdb_id)
uniprot_ids = list(resp.json()[pdb_id]['UniProt'].keys()) uniprot_ids = list(resp.json()[pdb_id]['UniProt'].keys())
return uniprot_ids return uniprot_ids
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment