Compare commits

...

8 Commits

Author SHA1 Message Date
Alexandre Flament fc389f009d
Merge pull request #1705 from dalf/template_paper
Theme: add a paper.html template and update of the science engines
2022-09-23 23:09:27 +02:00
Alexandre Flament d6446be38f [mod] science category: various update of about PR 1705 2022-09-23 20:52:55 +02:00
Markus Heiser 08b8859705 [doc] paper.html result template
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2022-09-23 20:45:58 +02:00
Alexandre FLAMENT fe43b6e821 [build] /static 2022-09-23 20:45:58 +02:00
Alexandre FLAMENT e36f85b836 Science category: update the engines
* use the paper.html template
* fetch more data from the engines
* add crossref.py
2022-09-23 20:45:58 +02:00
Alexandre FLAMENT 593026ad9c oa_doi_rewrite: add the doi to the result when it is found.
Currentty, when oa_doi_rewrite find a DOI in the result URL, it replace the URL.
In this commit, the plugin adds the key "doi" to the result,
so the paper.html can show it.
2022-09-23 20:45:58 +02:00
Alexandre FLAMENT 5ba831d6a8 Add paper.html result template 2022-09-23 20:45:58 +02:00
Alexandre FLAMENT a96f503d7b Add searx.webutils.searxng_format_date
* Move the datetime to str code from searx.webapp.search to searx.webutils.searxng_format_date
* When the month, day, hour, day and second are zero, the function returns only the year.
2022-09-23 20:45:58 +02:00
18 changed files with 536 additions and 146 deletions

View File

@ -311,3 +311,88 @@ the parameter ``template`` must be set to the desired type.
address.postcode postcode of object
address.country country of object
========================= =====================================================
.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
.. list-table:: Parameter of the **paper** media type /
see `BibTeX field types`_ and `BibTeX format`_
:header-rows: 2
:width: 100%
* - result-parameter
- Python type
- information
* - template
- :py:class:`str`
- is set to ``paper.html``
* - title
- :py:class:`str`
- title of the result
* - content
- :py:class:`str`
- abstract
* - comments
- :py:class:`str`
- free text display in italic below the content
* - tags
- :py:class:`List <list>`\ [\ :py:class:`str`\ ]
- free tag list
* - publishedDate
- :py:class:`datetime <datetime.datetime>`
- last publication date
* - authors
- :py:class:`List <list>`\ [\ :py:class:`str`\ ]
- list of authors of the work (authors with a "s")
* - editor
- :py:class:`str`
- list of editors of a book
* - publisher
- :py:class:`str`
- name of the publisher
* - journal
- :py:class:`str`
- name of the journal or magazine the article was
published in
* - volume
- :py:class:`str`
- volume number
* - pages
- :py:class:`str`
- page range where the article is
* - number
- :py:class:`str`
- number of the report or the issue number for a journal article
* - doi
- :py:class:`str`
- DOI number (like ``10.1038/d41586-018-07848-2``)
* - issn
- :py:class:`str`
- ISSN number like ``1476-4687``
* - isbn
- :py:class:`str`
- ISBN number like ``9780201896831``
* - pdf_url
- :py:class:`str`
- URL to the full article, the PDF version
* - html_url
- :py:class:`str`
- URL to full article, HTML version

View File

@ -3,9 +3,10 @@
ArXiV (Scientific preprints)
"""
from lxml import html
from lxml import etree
from lxml.etree import XPath
from datetime import datetime
from searx.utils import eval_xpath_list, eval_xpath_getindex
from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
# about
about = {
@ -17,7 +18,7 @@ about = {
"results": 'XML-RSS',
}
categories = ['science']
categories = ['science', 'scientific publications']
paging = True
base_url = (
@ -27,6 +28,23 @@ base_url = (
# engine dependent config
number_of_results = 10
# xpaths
arxiv_namespaces = {
"atom": "http://www.w3.org/2005/Atom",
"arxiv": "http://arxiv.org/schemas/atom",
}
xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
def request(query, params):
# basic search
@ -41,30 +59,50 @@ def request(query, params):
def response(resp):
results = []
dom = etree.fromstring(resp.content)
for entry in eval_xpath_list(dom, xpath_entry):
title = eval_xpath_getindex(entry, xpath_title, 0).text
dom = html.fromstring(resp.content)
url = eval_xpath_getindex(entry, xpath_id, 0).text
abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
for entry in eval_xpath_list(dom, '//entry'):
title = eval_xpath_getindex(entry, './/title', 0).text
authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
url = eval_xpath_getindex(entry, './/id', 0).text
# doi
doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
doi = None if doi_element is None else doi_element.text
content_string = '{doi_content}{abstract_content}'
# pdf
pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
abstract = eval_xpath_getindex(entry, './/summary', 0).text
# journal
journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
journal = None if journal_element is None else journal_element.text
# If a doi is available, add it to the snipppet
doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None)
doi_content = doi_element.text if doi_element is not None else ''
content = content_string.format(doi_content=doi_content, abstract_content=abstract)
# tags
tag_elements = eval_xpath(entry, xpath_category)
tags = [str(tag) for tag in tag_elements]
if len(content) > 300:
content = content[0:300] + "..."
# TODO: center snippet on query term
# comments
comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
comments = None if comments_elements is None else comments_elements.text
publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
res_dict = {
'template': 'paper.html',
'url': url,
'title': title,
'publishedDate': publishedDate,
'content': abstract,
'doi': doi,
'authors': authors,
'journal': journal,
'tags': tags,
'comments': comments,
'pdf_url': pdf_url,
}
results.append(res_dict)

59
searx/engines/crossref.py Normal file
View File

@ -0,0 +1,59 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Semantic Scholar (Science)
"""
from urllib.parse import urlencode
from searx.utils import html_to_text
about = {
"website": 'https://www.crossref.org/',
"wikidata_id": 'Q5188229',
"official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
categories = ['science', 'scientific publications']
paging = True
search_url = 'https://api.crossref.org/works'
def request(query, params):
params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
return params
def response(resp):
res = resp.json()
results = []
for record in res['message']['items']:
record_type = record['type']
if record_type == 'book-chapter':
title = record['container-title'][0]
if record['title'][0].lower().strip() != title.lower().strip():
title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
journal = None
else:
title = html_to_text(record['title'][0])
journal = record.get('container-title', [None])[0]
url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
results.append(
{
'template': 'paper.html',
'url': url,
'title': title,
'journal': journal,
'volume': record.get('volume'),
'type': record['type'],
'content': html_to_text(record.get('abstract', '')),
'publisher': record.get('publisher'),
'authors': authors,
'doi': record['DOI'],
'isbn': isbn,
}
)
return results

View File

@ -13,10 +13,12 @@ Definitions`_.
from urllib.parse import urlencode
from datetime import datetime
from typing import Optional
from lxml import html
from searx.utils import (
eval_xpath,
eval_xpath_getindex,
eval_xpath_list,
extract_text,
)
@ -46,7 +48,7 @@ about = {
}
# engine dependent config
categories = ['science']
categories = ['science', 'scientific publications']
paging = True
language_support = True
use_locale_domain = True
@ -99,7 +101,43 @@ def request(query, params):
return params
def response(resp):
def parse_gs_a(text: Optional[str]):
"""Parse the text written in green.
Possible formats:
* "{authors} - {journal}, {year} - {publisher}"
* "{authors} - {year} - {publisher}"
* "{authors} - {publisher}"
"""
if text is None or text == "":
return None, None, None, None
s_text = text.split(' - ')
authors = s_text[0].split(', ')
publisher = s_text[-1]
if len(s_text) != 3:
return authors, None, publisher, None
# the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
# get journal and year
journal_year = s_text[1].split(', ')
# journal is optional and may contains some coma
if len(journal_year) > 1:
journal = ', '.join(journal_year[0:-1])
if journal == '':
journal = None
else:
journal = None
# year
year = journal_year[-1]
try:
publishedDate = datetime.strptime(year.strip(), '%Y')
except ValueError:
publishedDate = None
return authors, journal, publisher, publishedDate
def response(resp): # pylint: disable=too-many-locals
"""Get response from google's search request"""
results = []
@ -112,30 +150,53 @@ def response(resp):
dom = html.fromstring(resp.text)
# parse results
for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'):
for result in eval_xpath_list(dom, '//div[@data-cid]'):
title = extract_text(eval_xpath(result, './h3[1]//a'))
title = extract_text(eval_xpath(result, './/h3[1]//a'))
if not title:
# this is a [ZITATION] block
continue
url = eval_xpath(result, './h3[1]//a/@href')[0]
content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or ''
pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]'))
if pub_info:
content += "[%s]" % pub_info
pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
if pub_type:
title = title + " " + pub_type
pub_type = pub_type[1:-1].lower()
url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
authors, journal, publisher, publishedDate = parse_gs_a(
extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
)
if publisher in url:
publisher = None
# cited by
comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
# link to the html or pdf document
html_url = None
pdf_url = None
doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
if doc_type == "[PDF]":
pdf_url = doc_url
else:
html_url = doc_url
results.append(
{
'template': 'paper.html',
'type': pub_type,
'url': url,
'title': title,
'authors': authors,
'publisher': publisher,
'journal': journal,
'publishedDate': publishedDate,
'content': content,
'comments': comments,
'html_url': html_url,
'pdf_url': pdf_url,
}
)

View File

@ -3,11 +3,15 @@
PubMed (Scholar publications)
"""
from flask_babel import gettext
from lxml import etree
from datetime import datetime
from urllib.parse import urlencode
from searx.network import get
from searx.utils import (
eval_xpath_getindex,
eval_xpath_list,
extract_text,
)
# about
about = {
@ -22,7 +26,7 @@ about = {
"results": 'XML',
}
categories = ['science']
categories = ['science', 'scientific publications']
base_url = (
'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
@ -63,45 +67,60 @@ def response(resp):
retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
search_results_xml = get(retrieve_url_encoded).content
search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation')
search_results_response = get(retrieve_url_encoded).content
search_results = etree.XML(search_results_response)
for entry in eval_xpath_list(search_results, '//PubmedArticle'):
medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
for entry in search_results:
title = entry.xpath('.//Article/ArticleTitle')[0].text
pmid = entry.xpath('.//PMID')[0].text
title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
pmid = eval_xpath_getindex(medline, './/PMID', 0).text
url = pubmed_url + pmid
content = extract_text(
eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
)
doi = extract_text(
eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
)
journal = extract_text(
eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
)
issn = extract_text(
eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
)
authors = []
for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
f = eval_xpath_getindex(author, './ForeName', 0, default=None)
l = eval_xpath_getindex(author, './LastName', 0, default=None)
f = '' if f is None else f.text
l = '' if l is None else l.text
authors.append((f + ' ' + l).strip())
try:
content = entry.xpath('.//Abstract/AbstractText')[0].text
except:
content = gettext('No abstract is available for this publication.')
# If a doi is available, add it to the snipppet
try:
doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text
content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content)
except:
pass
if len(content) > 300:
content = content[0:300] + "..."
# TODO: center snippet on query term
res_dict = {'url': url, 'title': title, 'content': content}
res_dict = {
'template': 'paper.html',
'url': url,
'title': title,
'content': content,
'journal': journal,
'issn': [issn],
'authors': authors,
'doi': doi,
}
accepted_date = eval_xpath_getindex(
entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
)
if accepted_date is not None:
year = eval_xpath_getindex(accepted_date, './Year', 0)
month = eval_xpath_getindex(accepted_date, './Month', 0)
day = eval_xpath_getindex(accepted_date, './Day', 0)
try:
publishedDate = datetime.strptime(
entry.xpath('.//DateCreated/Year')[0].text
+ '-'
+ entry.xpath('.//DateCreated/Month')[0].text
+ '-'
+ entry.xpath('.//DateCreated/Day')[0].text,
year.text + '-' + month.text + '-' + day.text,
'%Y-%m-%d',
)
res_dict['publishedDate'] = publishedDate
except:
pass
except Exception as e:
print(e)
results.append(res_dict)

View File

@ -6,6 +6,8 @@
from json import dumps, loads
from datetime import datetime
from flask_babel import gettext
about = {
"website": 'https://www.semanticscholar.org/',
"wikidata_id": 'Q22908627',
@ -15,6 +17,7 @@ about = {
"results": 'JSON',
}
categories = ['science', 'scientific publications']
paging = True
search_url = 'https://www.semanticscholar.org/api/1/search'
paper_url = 'https://www.semanticscholar.org/paper'
@ -45,11 +48,7 @@ def request(query, params):
def response(resp):
res = loads(resp.text)
results = []
for result in res['results']:
item = {}
metadata = []
url = result.get('primaryPaperLink', {}).get('url')
if not url and result.get('links'):
url = result.get('links')[0]
@ -60,22 +59,47 @@ def response(resp):
if not url:
url = paper_url + '/%s' % result['id']
item['url'] = url
# publishedDate
if 'pubDate' in result:
publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
else:
publishedDate = None
item['title'] = result['title']['text']
item['content'] = result['paperAbstract']['text']
# authors
authors = [author[0]['name'] for author in result.get('authors', [])]
metadata = result.get('fieldsOfStudy') or []
venue = result.get('venue', {}).get('text')
if venue:
metadata.append(venue)
if metadata:
item['metadata'] = ', '.join(metadata)
# pick for the first alternate link, but not from the crawler
pdf_url = None
for doc in result.get('alternatePaperLinks', []):
if doc['linkType'] not in ('crawler', 'doi'):
pdf_url = doc['url']
break
pubDate = result.get('pubDate')
if pubDate:
item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
# comments
comments = None
if 'citationStats' in result:
comments = gettext(
'{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
).format(
numCitations=result['citationStats']['numCitations'],
firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
)
results.append(item)
results.append(
{
'template': 'paper.html',
'url': url,
'title': result['title']['text'],
'content': result['paperAbstract']['text'],
'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
'doi': result.get('doiInfo', {}).get('doi'),
'tags': result.get('fieldsOfStudy'),
'authors': authors,
'pdf_url': pdf_url,
'publishedDate': publishedDate,
'comments': comments,
}
)
return results

View File

@ -19,7 +19,7 @@ about = {
"results": 'JSON',
}
categories = ['science']
categories = ['science', 'scientific publications']
paging = True
nb_per_page = 10
api_key = 'unset'
@ -41,32 +41,29 @@ def response(resp):
json_data = loads(resp.text)
for record in json_data['records']:
content = record['abstract'][0:500]
if len(record['abstract']) > len(content):
content += "..."
content = record['abstract']
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
metadata = [
record[x]
for x in [
'publicationName',
'identifier',
'contentType',
]
if record.get(x) is not None
]
metadata = ' / '.join(metadata)
if record.get('startingPage') and record.get('endingPage') is not None:
metadata += " (%(startingPage)s-%(endingPage)s)" % record
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
tags = record.get('genre')
if isinstance(tags, str):
tags = [tags]
results.append(
{
'template': 'paper.html',
'title': record['title'],
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
'type': record.get('contentType'),
'content': content,
'publishedDate': published,
'metadata': metadata,
'authors': authors,
'doi': record.get('doi'),
'journal': record.get('publicationName'),
'pages': record.get('start_page') + '-' + record.get('end_page'),
'tags': tags,
'issn': [record.get('issn')],
'isbn': [record.get('isbn')],
'volume': record.get('volume') or None,
'number': record.get('number') or None,
}
)
return results

View File

@ -42,4 +42,6 @@ def on_result(request, search, result):
doi = doi[: -len(suffix)]
result['url'] = get_doi_resolver(request.preferences) + doi
result['parsed_url'] = urlparse(result['url'])
if 'doi' not in result:
result['doi'] = doi
return True

View File

@ -43,6 +43,7 @@ CATEGORY_GROUPS = {
'REPOS': 'repos',
'SOFTWARE_WIKIS': 'software wikis',
'WEB': 'web',
'SCIENTIFIC PUBLICATIONS': 'scientific publications',
}
STYLE_NAMES = {

View File

@ -319,7 +319,6 @@ engines:
- name: arxiv
engine: arxiv
shortcut: arx
categories: science
timeout: 4.0
# tmp suspended: dh key too small
@ -411,23 +410,10 @@ engines:
# api_key: 'unset'
- name: crossref
engine: json_engine
paging: true
search_url: https://search.crossref.org/dois?q={query}&page={pageno}
url_query: doi
title_query: title
title_html_to_text: true
content_query: fullCitation
content_html_to_text: true
categories: science
engine: crossref
shortcut: cr
about:
website: https://www.crossref.org/
wikidata_id: Q5188229
official_api_documentation: https://github.com/CrossRef/rest-api-doc
use_official_api: false
require_api_key: false
results: JSON
timeout: 30
disable: true
- name: yep
engine: json_engine
@ -1068,7 +1054,7 @@ engines:
title_query: metadata/oaf:entity/oaf:result/title/$
content_query: metadata/oaf:entity/oaf:result/description/$
content_html_to_text: true
categories: science
categories: "science"
shortcut: oad
timeout: 5.0
about:
@ -1198,7 +1184,6 @@ engines:
- name: pubmed
engine: pubmed
shortcut: pub
categories: science
timeout: 3.0
- name: pypi
@ -1346,7 +1331,6 @@ engines:
engine: semantic_scholar
disabled: true
shortcut: se
categories: science
# Spotify needs API credentials
# - name: spotify
@ -1372,8 +1356,7 @@ engines:
# # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
# api_key: 'unset'
# shortcut: springer
# categories: science
# timeout: 6.0
# timeout: 15.0
- name: startpage
engine: startpage

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -302,6 +302,49 @@ article[data-vim-selected].category-social {
}
}
.result-paper {
.attributes {
display: table;
border-spacing: 0.125rem;
div {
display: table-row;
span {
font-size: 0.9rem;
margin-top: 0.25rem;
display: table-cell;
time {
font-size: 0.9rem;
}
}
span:first-child {
color: var(--color-base-font);
min-width: 10rem;
}
span:nth-child(2) {
color: var(--color-result-publishdate-font);
}
}
}
.content {
margin-top: 0.25rem;
}
.comments {
font-size: 0.9rem;
margin: 0.25rem 0 0 0;
padding: 0;
word-wrap: break-word;
line-height: 1.24;
font-style: italic;
}
}
.template_group_images {
display: flex;
flex-wrap: wrap;
@ -955,6 +998,28 @@ article[data-vim-selected].category-social {
border: none !important;
background-color: var(--color-sidebar-background);
}
.result-paper {
.attributes {
display: block;
div {
display: block;
span {
display: inline;
}
span:first-child {
font-weight: bold;
}
span:nth-child(2) {
.ltr-margin-left(0.5rem);
}
}
}
}
}
/*

View File

@ -0,0 +1,44 @@
{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer with context %}
{{ result_header(result, favicons, image_proxify) -}}
<div class="attributes">
{%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
{%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
{%- if result.journal -%}
<div class="result_journal">
<span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
{%- if result.volume -%}
&nbsp;{{- result.volume -}}
{%- if result.number -%}
.{{- result.number -}}
{%- endif -%}
{%- endif -%}
{%- if result.pages -%}
&nbsp;{{- result.pages -}}
{%- endif -%}
</span>
</div>
{%- endif %}
{%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
{%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
{%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
{%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
{%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{- result.doi -}}</span></div>{% endif -%}
{%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
{%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
</div>
{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
<p class="altlink">
{%- if result.pdf_url -%}
<a href="{{ result.pdf_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('PDF') }}</a>
{%- endif -%}
{%- if result.html_url -%}
<a href="{{ result.html_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('HTML') }}</a>
{%- endif -%}
{%- if result.doi %}
<a href="https://www.altmetric.com/details/doi/{{result.doi}}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>Altmetric</a>
{% endif -%}
</p>
{{- result_sub_footer(result, proxify) -}}
{{- result_footer(result) }}

View File

@ -12,7 +12,6 @@ import os
import sys
import base64
from datetime import datetime, timedelta
from timeit import default_timer
from html import escape
from io import StringIO
@ -45,7 +44,6 @@ from flask.json import jsonify
from flask_babel import (
Babel,
gettext,
format_date,
format_decimal,
)
@ -79,6 +77,7 @@ from searx.webutils import (
is_hmac_of,
is_flask_run_cmdline,
group_engines_in_tab,
searxng_l10n_timespan,
)
from searx.webadapter import (
get_search_query_from_webapp,
@ -718,25 +717,13 @@ def search():
if 'url' in result:
result['pretty_url'] = prettify_url(result['url'])
# TODO, check if timezone is calculated right # pylint: disable=fixme
if result.get('publishedDate'): # do not try to get a date from an empty string or a None type
try: # test if publishedDate >= 1900 (datetime module bug)
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
except ValueError:
result['publishedDate'] = None
else:
if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
else:
result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(
hours=hours, minutes=minutes
)
else:
result['publishedDate'] = format_date(result['publishedDate'])
result['publishedDate'] = searxng_l10n_timespan(result['publishedDate'])
# set result['open_group'] = True when the template changes from the previous result
# set result['close_group'] = True when the template changes on the next result

View File

@ -7,11 +7,14 @@ import hmac
import re
import inspect
import itertools
from datetime import datetime, timedelta
from typing import Iterable, List, Tuple, Dict
from io import StringIO
from codecs import getincrementalencoder
from flask_babel import gettext, format_date
from searx import logger, settings
from searx.engines import Engine, OTHER_CATEGORY
@ -138,6 +141,28 @@ def highlight_content(content, query):
return content
def searxng_l10n_timespan(dt: datetime) -> str: # pylint: disable=invalid-name
"""Returns a human-readable and translated string indicating how long ago
a date was in the past / the time span of the date to the present.
On January 1st, midnight, the returned string only indicates how many years
ago the date was.
"""
# TODO, check if timezone is calculated right # pylint: disable=fixme
d = dt.date()
t = dt.time()
if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0:
return str(d.year)
if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - dt.replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
return gettext('{minutes} minute(s) ago').format(minutes=minutes)
return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)
return format_date(dt)
def is_flask_run_cmdline():
"""Check if the application was started using "flask run" command line