Commit b1b77dbe authored by Bryan  BRANCOTTE's avatar Bryan BRANCOTTE
Browse files

Working with DOI

parent b34ffef3
......@@ -11,7 +11,7 @@ from django.db.models.functions import Upper
from django.forms import ModelForm, modelformset_factory, formset_factory, inlineformset_factory
from django.utils.translation import ugettext_lazy as _, ugettext
from ippidb.ws import get_pdb_uniprot_mapping
from ippidb.ws import get_pdb_uniprot_mapping, BibliographicalEntryNotFound
from .models import *
from .ws import pdb_entry_exists
......@@ -32,19 +32,6 @@ class CharFieldDataList(forms.CharField):
""" Step 1 : IdForm """
class IdFormOld(ModelForm):
class Meta:
model = Bibliography
fields = ['source', 'id_source']
labels = {
"id_source": "ID"
}
widgets = {
'source': forms.RadioSelect,
'id_source': forms.TextInput(),
}
class IdForm(forms.Form):
source = forms.ChoiceField(
label='Bibliographic type',
......@@ -54,7 +41,7 @@ class IdForm(forms.Form):
)
id_source = forms.CharField(
label='Bibliographic ID',
max_length=25,
max_length=255,
)
allow_duplicate = forms.BooleanField(
label=_("IdForm_allow_duplicate_label"),
......
......@@ -16,7 +16,8 @@ from django.db.models.functions import Cast
from django.utils.translation import ugettext_lazy as _
from .utils import FingerPrinter, smi2inchi, smi2inchikey
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, get_pfam_info
from .ws import get_pubmed_info, get_google_patent_info, get_uniprot_info, get_taxonomy_info, get_go_info, \
get_pfam_info, get_doi_info
class AutoFillableModel(models.Model):
......@@ -52,15 +53,15 @@ class Bibliography(AutoFillableModel):
SOURCES = (
('PM', 'PubMed ID'),
('PT', 'Patent'),
('DO', 'DOI ID')
('DO', 'DOI')
)
id_source_validators = dict(
PM=re.compile("^[0-9]+$"),
PT=re.compile("^.*$"),
DO=re.compile("^.*$"),
DO=re.compile("^10.\d{4,9}/.+$"),
)
source = models.CharField(
'Bibliographic type', max_length=2, choices=SOURCES, default='PM')
'Bibliographic type', max_length=2, choices=SOURCES, default=SOURCES[0][0])
id_source = models.CharField('Bibliographic ID', max_length=25)
title = models.CharField('Title', max_length=300)
journal_name = models.CharField('Journal name', max_length=50, null=True, blank=True)
......@@ -82,8 +83,12 @@ class Bibliography(AutoFillableModel):
"""
if self.source == 'PM':
info = get_pubmed_info(self.id_source)
else:
elif self.source == 'PT':
info = get_google_patent_info(self.id_source)
elif self.source == 'DO':
info = get_doi_info(self.id_source)
else:
raise NotImplementedError()
self.title = info['title']
self.journal_name = info['journal_name']
self.authors_list = info['authors_list']
......
......@@ -145,6 +145,32 @@ def get_google_patent_info(patent_number):
'authors_list': authors}
def get_doi_info(patent_number):
"""
Retrieve information about a patent parsing Dublin Core info in the Google HTML
:param patent_number: patent number
:type patent_number: str
:return: patent metadata (title, journal name, publication year, authors list).
:rtype: dict
"""
url = 'http://dx.doi.org/{}'.format(patent_number)
resp = requests.get(url, headers={"Accept": "application/json"})
if resp.status_code != 200:
raise DOIEntryNotFound("With error code %i" % resp.status_code)
resp_json = json.loads(resp.text)
print(json.dumps(resp_json, indent=4))
authors = ', '.join(["%s %s" % (author['given'], author['family']) for author in resp_json['author']])
biblio_year = int(resp_json.get("published-print", resp_json.get("created", None))["date-parts"][0][0])
return {'title': resp_json['title'],
'journal_name': resp_json.get("container-title", resp_json.get("original-title", None)),
'biblio_year': biblio_year,
'authors_list': authors}
def get_uniprot_info(uniprot_id):
"""
Retrieve information about a protein from the Uniprot database
......
......@@ -257,7 +257,7 @@ msgstr ""
msgid "IdForm_desc"
msgstr ""
"Please provide a valid ID for your bibliographic source, either a PubMed ID "
"or WIPO ID. This ID should correspond to a bibliographic source in its final "
"a patent ID, or a DOI. This ID should correspond to a bibliographic source in its final "
"format. \n"
"Therefore, PubMed articles in « Just Accepted » format should not be used as "
"a source of data as they are not considered the official version of record."
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment