Merge pull request #724 from Athemis/master

[engine] PDBe (Protein Data Bank Europe)
This commit is contained in:
Adam Tauber 2016-10-13 11:19:11 +02:00 committed by GitHub
commit e2245611d7
4 changed files with 226 additions and 0 deletions

View File

@ -59,3 +59,4 @@ generally made searx better:
- Harry Wood @harry-wood - Harry Wood @harry-wood
- Thomas Renard @threnard - Thomas Renard @threnard
- Pydo `<https://github.com/pydo>`_ - Pydo `<https://github.com/pydo>`_
- Athemis `<https://github.com/Athemis>`_

109
searx/engines/pdbe.py Normal file
View File

@ -0,0 +1,109 @@
"""
PDBe (Protein Data Bank in Europe)
@website https://www.ebi.ac.uk/pdbe
@provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
unlimited
@using-api yes
@results python dictionary (from json)
@stable yes
@parse url, title, content, img_src
"""
from json import loads
from flask_babel import gettext
categories = ['science']
hide_obsolete = False
# status codes of unpublished entries
pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN']
# url for api query
pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?'
# base url for results
pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}'
# link to preview image of structure
pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png'
def request(query, params):
params['url'] = pdbe_solr_url
params['method'] = 'POST'
params['data'] = {
'q': query,
'wt': "json" # request response in parsable format
}
return params
def construct_body(result):
# set title
title = result['title']
# construct content body
content = """{title}<br />{authors} {journal} <strong>{volume}</strong>&nbsp;{page} ({year})"""
# replace placeholders with actual content
try:
if result['journal']:
content = content.format(
title=result['citation_title'],
authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'],
page=result['journal_page'], year=result['citation_year'])
else:
content = content.format(
title=result['citation_title'],
authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year'])
img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
except (KeyError):
content = None
img_src = None
# construct url for preview image
try:
img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
except (KeyError):
img_src = None
return [title, content, img_src]
def response(resp):
results = []
json = loads(resp.text)['response']['docs']
# parse results
for result in json:
# catch obsolete entries and mark them accordingly
if result['status'] in pdb_unpublished_codes:
continue
if hide_obsolete:
continue
if result['status'] == 'OBS':
# expand title to add some sort of warning message
title = gettext('{title}&nbsp;(OBSOLETE)').format(title=result['title'])
superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by'])
# since we can't construct a proper body from the response, we'll make up our own
msg_superseded = gettext("This entry has been superseded by")
content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format(
msg_superseded=msg_superseded,
url=superseded_url,
pdb_id=result['superseded_by'], )
# obsoleted entries don't have preview images
img_src = None
else:
title, content, img_src = construct_body(result)
results.append({
'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
'title': title,
'content': content,
'img_src': img_src
})
return results

View File

@ -339,6 +339,13 @@ engines:
disabled : True disabled : True
shortcut : or shortcut : or
- name : pdbe
engine : pdbe
shortcut : pdb
# Hide obsolete PDB entries.
# Default is not to hide obsolete structures
# hide_obsolete : False
- name : photon - name : photon
engine : photon engine : photon
shortcut : ph shortcut : ph

View File

@ -0,0 +1,109 @@
import mock
from collections import defaultdict
from searx.engines import pdbe
from searx.testing import SearxTestCase
class TestPdbeEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
params = pdbe.request(query, dicto)
self.assertTrue('url' in params)
self.assertTrue('ebi.ac.uk' in params['url'])
self.assertTrue('data' in params)
self.assertTrue('q' in params['data'])
self.assertTrue(query in params['data']['q'])
self.assertTrue('wt' in params['data'])
self.assertTrue('json' in params['data']['wt'])
self.assertTrue('method' in params)
self.assertTrue(params['method'] == 'POST')
def test_response(self):
self.assertRaises(AttributeError, pdbe.response, None)
self.assertRaises(AttributeError, pdbe.response, [])
self.assertRaises(AttributeError, pdbe.response, '')
self.assertRaises(AttributeError, pdbe.response, '[]')
json = """
{
"response": {
"docs": [
{
"citation_title": "X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.",
"citation_year": 1993,
"entry_author_list": [
"Conti E, Moser C, Rizzi M, Mattevi A, Lionetti C, Coda A, Ascenzi P, Brunori M, Bolognesi M"
],
"journal": "J. Mol. Biol.",
"journal_page": "498-508",
"journal_volume": "233",
"pdb_id": "2fal",
"status": "REL",
"title": "X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES"
}
],
"numFound": 1,
"start": 0
},
"responseHeader": {
"QTime": 0,
"params": {
"q": "2fal",
"wt": "json"
},
"status": 0
}
}
"""
response = mock.Mock(text=json)
results = pdbe.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'],
'X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES')
self.assertEqual(results[0]['url'], pdbe.pdbe_entry_url.format(pdb_id='2fal'))
self.assertEqual(results[0]['img_src'], pdbe.pdbe_preview_url.format(pdb_id='2fal'))
self.assertTrue('Conti E' in results[0]['content'])
self.assertTrue('X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.' in
results[0]['content'])
self.assertTrue('1993' in results[0]['content'])
# Testing proper handling of PDB entries marked as obsolete
json = """
{
"response": {
"docs": [
{
"citation_title": "Obsolete entry test",
"citation_year": 2016,
"entry_author_list": ["Doe J"],
"journal": "J. Obs.",
"journal_page": "1-2",
"journal_volume": "1",
"pdb_id": "xxxx",
"status": "OBS",
"title": "OBSOLETE ENTRY TEST",
"superseded_by": "yyyy"
}
],
"numFound": 1,
"start": 0
},
"responseHeader": {
"QTime": 0,
"params": {
"q": "xxxx",
"wt": "json"
},
"status": 0
}
}
"""
response = mock.Mock(text=json)
results = pdbe.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'OBSOLETE ENTRY TEST&nbsp;(OBSOLETE)')
self.assertTrue(results[0]['content'].startswith('<em>This entry has been superseded by'))