Skip to content
Snippets Groups Projects
Commit a503a3d3 authored by Hervé  MENAGER's avatar Hervé MENAGER
Browse files

create a new patent retrieval function (Google URL)

1- the old RIS url is not functional anymore, so need to
   parse HTML...
2- require BeautifulSoup to do it
3- the authors list is properly comma-joined this time

fix #110


Former-commit-id: e05195236de8b2f1c44fc216900b5257b01c0b39
parent ce6b67f1
No related branches found
No related tags found
No related merge requests found
a09756589adb11dcccd5c98f7aaa3e2439d58355 fedac345bcf7c524515fb3711f9b9a29e0dbf726
\ No newline at end of file \ No newline at end of file
...@@ -2,7 +2,7 @@ from bioservices.eutils import EUtils ...@@ -2,7 +2,7 @@ from bioservices.eutils import EUtils
from bioservices.uniprot import UniProt from bioservices.uniprot import UniProt
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import requests import requests
from bs4 import BeautifulSoup
def get_pubmed_info(pmid): def get_pubmed_info(pmid):
eu = EUtils() eu = EUtils()
...@@ -26,6 +26,7 @@ def get_pubmed_info(pmid): ...@@ -26,6 +26,7 @@ def get_pubmed_info(pmid):
def get_epo_info(patent_number): def get_epo_info(patent_number):
""" warning: this is not to be used anymore, the 3.1 version of the EPO service is now offline """
resp = requests.get( resp = requests.get(
'http://ops.epo.org/3.1/rest-services/published-data/publication/docdb/{}/biblio.json'.format(patent_number)) 'http://ops.epo.org/3.1/rest-services/published-data/publication/docdb/{}/biblio.json'.format(patent_number))
data = resp.json() data = resp.json()
...@@ -44,7 +45,8 @@ def get_epo_info(patent_number): ...@@ -44,7 +45,8 @@ def get_epo_info(patent_number):
'biblio_year': biblio_year, 'biblio_year': biblio_year,
'authors_list': authors} 'authors_list': authors}
def get_google_patent_info(patent_number): def get_google_patent_info_ris(patent_number):
""" warning: now offline """
url = 'https://encrypted.google.com/patents/{}.ris'.format(patent_number) url = 'https://encrypted.google.com/patents/{}.ris'.format(patent_number)
resp = requests.get(url) resp = requests.get(url)
title = None title = None
...@@ -63,6 +65,21 @@ def get_google_patent_info(patent_number): ...@@ -63,6 +65,21 @@ def get_google_patent_info(patent_number):
'biblio_year': biblio_year, 'biblio_year': biblio_year,
'authors_list': authors} 'authors_list': authors}
def get_google_patent_info(patent_number):
url = 'https://encrypted.google.com/patents/{}'.format(patent_number)
resp = requests.get(url)
soup = BeautifulSoup(resp.text, 'html.parser')
title = soup.find_all('meta',attrs={'name':'DC.title'})[0]['content'].strip()
authors_list = []
for author_meta in soup.find_all('meta',attrs={'name':'DC.contributor'}):
authors_list.append(author_meta['content'].strip())
authors = ', '.join(authors_list)
biblio_year = soup.find_all('meta',attrs={'name':'DC.date'})[0]['content'].strip()[0:4]
return {'title': title,
'journal_name': None,
'biblio_year': biblio_year,
'authors_list': authors}
def get_uniprot_info(uniprot_id): def get_uniprot_info(uniprot_id):
uniprot_client = UniProt() uniprot_client = UniProt()
ns = {'u': 'http://uniprot.org/uniprot'} ns = {'u': 'http://uniprot.org/uniprot'}
......
...@@ -16,3 +16,4 @@ scipy ...@@ -16,3 +16,4 @@ scipy
matplotlib==2.2.3 #Matplotlib 3.0+ does not support Python 2.x, 3.0, 3.1, 3.2, 3.3, or 3.4. matplotlib==2.2.3 #Matplotlib 3.0+ does not support Python 2.x, 3.0, 3.1, 3.2, 3.3, or 3.4.
psycopg2 psycopg2
openbabel openbabel
bs4
\ No newline at end of file
...@@ -15,3 +15,4 @@ sklearn ...@@ -15,3 +15,4 @@ sklearn
scipy scipy
matplotlib==2.2.3 #Matplotlib 3.0+ does not support Python 2.x, 3.0, 3.1, 3.2, 3.3, or 3.4. matplotlib==2.2.3 #Matplotlib 3.0+ does not support Python 2.x, 3.0, 3.1, 3.2, 3.3, or 3.4.
psycopg2 psycopg2
bs4
\ No newline at end of file
...@@ -12,3 +12,4 @@ mod_wsgi ...@@ -12,3 +12,4 @@ mod_wsgi
bioblend bioblend
django-allauth django-allauth
#pygraphviz>=1.3 --install-option="--include-path=/usr/local/include/graphviz/" --install-option="--library-path=/usr/local/lib/graphviz" #pygraphviz>=1.3 --install-option="--include-path=/usr/local/include/graphviz/" --install-option="--library-path=/usr/local/lib/graphviz"
bs4
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment