diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst index e950ae667..7d94b83f1 100644 --- a/docs/dev/engine_overview.rst +++ b/docs/dev/engine_overview.rst @@ -311,3 +311,88 @@ the parameter ``template`` must be set to the desired type. address.postcode postcode of object address.country country of object ========================= ===================================================== + +.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/ +.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types + +.. list-table:: Parameter of the **paper** media type / + see `BibTeX field types`_ and `BibTeX format`_ + :header-rows: 2 + :width: 100% + + * - result-parameter + - Python type + - information + + * - template + - :py:class:`str` + - is set to ``paper.html`` + + * - title + - :py:class:`str` + - title of the result + + * - content + - :py:class:`str` + - abstract + + * - comments + - :py:class:`str` + - free text display in italic below the content + + * - tags + - :py:class:`List `\ [\ :py:class:`str`\ ] + - free tag list + + * - publishedDate + - :py:class:`datetime ` + - last publication date + + * - authors + - :py:class:`List `\ [\ :py:class:`str`\ ] + - list of authors of the work (authors with a "s") + + * - editor + - :py:class:`str` + - list of editors of a book + + * - publisher + - :py:class:`str` + - name of the publisher + + * - journal + - :py:class:`str` + - name of the journal or magazine the article was + published in + + * - volume + - :py:class:`str` + - volume number + + * - pages + - :py:class:`str` + - page range where the article is + + * - number + - :py:class:`str` + - number of the report or the issue number for a journal article + + * - doi + - :py:class:`str` + - DOI number (like ``10.1038/d41586-018-07848-2``) + + * - issn + - :py:class:`str` + - ISSN number like ``1476-4687`` + + * - isbn + - :py:class:`str` + - ISBN number like ``9780201896831`` + + * - pdf_url + - :py:class:`str` + - URL to the full article, the PDF version + + * - html_url + - :py:class:`str` + - URL to full article, HTML version diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index a1a58172d..a4811ebd5 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -3,9 +3,10 @@ ArXiV (Scientific preprints) """ -from lxml import html +from lxml import etree +from lxml.etree import XPath from datetime import datetime -from searx.utils import eval_xpath_list, eval_xpath_getindex +from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex # about about = { @@ -17,7 +18,7 @@ about = { "results": 'XML-RSS', } -categories = ['science'] +categories = ['science', 'scientific publications'] paging = True base_url = ( @@ -27,6 +28,23 @@ base_url = ( # engine dependent config number_of_results = 10 +# xpaths +arxiv_namespaces = { + "atom": "http://www.w3.org/2005/Atom", + "arxiv": "http://arxiv.org/schemas/atom", +} +xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces) +xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces) +xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces) +xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces) +xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces) +xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces) +xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces) +xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces) +xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces) +xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces) +xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces) + def request(query, params): # basic search @@ -41,30 +59,50 @@ def request(query, params): def response(resp): results = [] + dom = etree.fromstring(resp.content) + for entry in eval_xpath_list(dom, xpath_entry): + title = eval_xpath_getindex(entry, xpath_title, 0).text - dom = html.fromstring(resp.content) + url = eval_xpath_getindex(entry, xpath_id, 0).text + abstract = eval_xpath_getindex(entry, xpath_summary, 0).text - for entry in eval_xpath_list(dom, '//entry'): - title = eval_xpath_getindex(entry, './/title', 0).text + authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)] - url = eval_xpath_getindex(entry, './/id', 0).text + # doi + doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None) + doi = None if doi_element is None else doi_element.text - content_string = '{doi_content}{abstract_content}' + # pdf + pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None) + pdf_url = None if pdf_element is None else pdf_element.attrib.get('href') - abstract = eval_xpath_getindex(entry, './/summary', 0).text + # journal + journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None) + journal = None if journal_element is None else journal_element.text - # If a doi is available, add it to the snipppet - doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None) - doi_content = doi_element.text if doi_element is not None else '' - content = content_string.format(doi_content=doi_content, abstract_content=abstract) + # tags + tag_elements = eval_xpath(entry, xpath_category) + tags = [str(tag) for tag in tag_elements] - if len(content) > 300: - content = content[0:300] + "..." - # TODO: center snippet on query term + # comments + comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None) + comments = None if comments_elements is None else comments_elements.text - publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ') + publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ') - res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} + res_dict = { + 'template': 'paper.html', + 'url': url, + 'title': title, + 'publishedDate': publishedDate, + 'content': abstract, + 'doi': doi, + 'authors': authors, + 'journal': journal, + 'tags': tags, + 'comments': comments, + 'pdf_url': pdf_url, + } results.append(res_dict) diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py new file mode 100644 index 000000000..fbe2f0c2a --- /dev/null +++ b/searx/engines/crossref.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Semantic Scholar (Science) +""" + +from urllib.parse import urlencode +from searx.utils import html_to_text + +about = { + "website": 'https://www.crossref.org/', + "wikidata_id": 'Q5188229', + "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc', + "use_official_api": False, + "require_api_key": False, + "results": 'JSON', +} + +categories = ['science', 'scientific publications'] +paging = True +search_url = 'https://api.crossref.org/works' + + +def request(query, params): + params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1))) + return params + + +def response(resp): + res = resp.json() + results = [] + for record in res['message']['items']: + record_type = record['type'] + if record_type == 'book-chapter': + title = record['container-title'][0] + if record['title'][0].lower().strip() != title.lower().strip(): + title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')' + journal = None + else: + title = html_to_text(record['title'][0]) + journal = record.get('container-title', [None])[0] + url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL'] + authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])] + isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])] + results.append( + { + 'template': 'paper.html', + 'url': url, + 'title': title, + 'journal': journal, + 'volume': record.get('volume'), + 'type': record['type'], + 'content': html_to_text(record.get('abstract', '')), + 'publisher': record.get('publisher'), + 'authors': authors, + 'doi': record['DOI'], + 'isbn': isbn, + } + ) + return results diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py index 41c62886b..c07cd4cea 100644 --- a/searx/engines/google_scholar.py +++ b/searx/engines/google_scholar.py @@ -13,10 +13,12 @@ Definitions`_. from urllib.parse import urlencode from datetime import datetime +from typing import Optional from lxml import html from searx.utils import ( eval_xpath, + eval_xpath_getindex, eval_xpath_list, extract_text, ) @@ -46,7 +48,7 @@ about = { } # engine dependent config -categories = ['science'] +categories = ['science', 'scientific publications'] paging = True language_support = True use_locale_domain = True @@ -99,7 +101,43 @@ def request(query, params): return params -def response(resp): +def parse_gs_a(text: Optional[str]): + """Parse the text written in green. + + Possible formats: + * "{authors} - {journal}, {year} - {publisher}" + * "{authors} - {year} - {publisher}" + * "{authors} - {publisher}" + """ + if text is None or text == "": + return None, None, None, None + + s_text = text.split(' - ') + authors = s_text[0].split(', ') + publisher = s_text[-1] + if len(s_text) != 3: + return authors, None, publisher, None + + # the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}" + # get journal and year + journal_year = s_text[1].split(', ') + # journal is optional and may contains some coma + if len(journal_year) > 1: + journal = ', '.join(journal_year[0:-1]) + if journal == '…': + journal = None + else: + journal = None + # year + year = journal_year[-1] + try: + publishedDate = datetime.strptime(year.strip(), '%Y') + except ValueError: + publishedDate = None + return authors, journal, publisher, publishedDate + + +def response(resp): # pylint: disable=too-many-locals """Get response from google's search request""" results = [] @@ -112,30 +150,53 @@ def response(resp): dom = html.fromstring(resp.text) # parse results - for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'): + for result in eval_xpath_list(dom, '//div[@data-cid]'): - title = extract_text(eval_xpath(result, './h3[1]//a')) + title = extract_text(eval_xpath(result, './/h3[1]//a')) if not title: # this is a [ZITATION] block continue - url = eval_xpath(result, './h3[1]//a/@href')[0] - content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or '' - - pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]')) - if pub_info: - content += "[%s]" % pub_info - pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]')) if pub_type: - title = title + " " + pub_type + pub_type = pub_type[1:-1].lower() + + url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0) + content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]')) + authors, journal, publisher, publishedDate = parse_gs_a( + extract_text(eval_xpath(result, './/div[@class="gs_a"]')) + ) + if publisher in url: + publisher = None + + # cited by + comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]')) + + # link to the html or pdf document + html_url = None + pdf_url = None + doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None) + doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]')) + if doc_type == "[PDF]": + pdf_url = doc_url + else: + html_url = doc_url results.append( { + 'template': 'paper.html', + 'type': pub_type, 'url': url, 'title': title, + 'authors': authors, + 'publisher': publisher, + 'journal': journal, + 'publishedDate': publishedDate, 'content': content, + 'comments': comments, + 'html_url': html_url, + 'pdf_url': pdf_url, } ) diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 27444ae24..02e282d5f 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -3,11 +3,15 @@ PubMed (Scholar publications) """ -from flask_babel import gettext from lxml import etree from datetime import datetime from urllib.parse import urlencode from searx.network import get +from searx.utils import ( + eval_xpath_getindex, + eval_xpath_list, + extract_text, +) # about about = { @@ -22,7 +26,7 @@ about = { "results": 'XML', } -categories = ['science'] +categories = ['science', 'scientific publications'] base_url = ( 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}' @@ -63,46 +67,61 @@ def response(resp): retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args) - search_results_xml = get(retrieve_url_encoded).content - search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation') + search_results_response = get(retrieve_url_encoded).content + search_results = etree.XML(search_results_response) + for entry in eval_xpath_list(search_results, '//PubmedArticle'): + medline = eval_xpath_getindex(entry, './MedlineCitation', 0) - for entry in search_results: - title = entry.xpath('.//Article/ArticleTitle')[0].text - - pmid = entry.xpath('.//PMID')[0].text + title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text + pmid = eval_xpath_getindex(medline, './/PMID', 0).text url = pubmed_url + pmid + content = extract_text( + eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True + ) + doi = extract_text( + eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True + ) + journal = extract_text( + eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True + ) + issn = extract_text( + eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True + ) + authors = [] + for author in eval_xpath_list(medline, './Article/AuthorList/Author'): + f = eval_xpath_getindex(author, './ForeName', 0, default=None) + l = eval_xpath_getindex(author, './LastName', 0, default=None) + f = '' if f is None else f.text + l = '' if l is None else l.text + authors.append((f + ' ' + l).strip()) - try: - content = entry.xpath('.//Abstract/AbstractText')[0].text - except: - content = gettext('No abstract is available for this publication.') + res_dict = { + 'template': 'paper.html', + 'url': url, + 'title': title, + 'content': content, + 'journal': journal, + 'issn': [issn], + 'authors': authors, + 'doi': doi, + } - # If a doi is available, add it to the snipppet - try: - doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text - content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content) - except: - pass - - if len(content) > 300: - content = content[0:300] + "..." - # TODO: center snippet on query term - - res_dict = {'url': url, 'title': title, 'content': content} - - try: - publishedDate = datetime.strptime( - entry.xpath('.//DateCreated/Year')[0].text - + '-' - + entry.xpath('.//DateCreated/Month')[0].text - + '-' - + entry.xpath('.//DateCreated/Day')[0].text, - '%Y-%m-%d', - ) - res_dict['publishedDate'] = publishedDate - except: - pass + accepted_date = eval_xpath_getindex( + entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None + ) + if accepted_date is not None: + year = eval_xpath_getindex(accepted_date, './Year', 0) + month = eval_xpath_getindex(accepted_date, './Month', 0) + day = eval_xpath_getindex(accepted_date, './Day', 0) + try: + publishedDate = datetime.strptime( + year.text + '-' + month.text + '-' + day.text, + '%Y-%m-%d', + ) + res_dict['publishedDate'] = publishedDate + except Exception as e: + print(e) results.append(res_dict) - return results + return results diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index bda731047..7a1b5b231 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -6,6 +6,8 @@ from json import dumps, loads from datetime import datetime +from flask_babel import gettext + about = { "website": 'https://www.semanticscholar.org/', "wikidata_id": 'Q22908627', @@ -15,6 +17,7 @@ about = { "results": 'JSON', } +categories = ['science', 'scientific publications'] paging = True search_url = 'https://www.semanticscholar.org/api/1/search' paper_url = 'https://www.semanticscholar.org/paper' @@ -45,11 +48,7 @@ def request(query, params): def response(resp): res = loads(resp.text) results = [] - for result in res['results']: - item = {} - metadata = [] - url = result.get('primaryPaperLink', {}).get('url') if not url and result.get('links'): url = result.get('links')[0] @@ -60,22 +59,47 @@ def response(resp): if not url: url = paper_url + '/%s' % result['id'] - item['url'] = url + # publishedDate + if 'pubDate' in result: + publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d") + else: + publishedDate = None - item['title'] = result['title']['text'] - item['content'] = result['paperAbstract']['text'] + # authors + authors = [author[0]['name'] for author in result.get('authors', [])] - metadata = result.get('fieldsOfStudy') or [] - venue = result.get('venue', {}).get('text') - if venue: - metadata.append(venue) - if metadata: - item['metadata'] = ', '.join(metadata) + # pick for the first alternate link, but not from the crawler + pdf_url = None + for doc in result.get('alternatePaperLinks', []): + if doc['linkType'] not in ('crawler', 'doi'): + pdf_url = doc['url'] + break - pubDate = result.get('pubDate') - if pubDate: - item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d") + # comments + comments = None + if 'citationStats' in result: + comments = gettext( + '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}' + ).format( + numCitations=result['citationStats']['numCitations'], + firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'], + lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'], + ) - results.append(item) + results.append( + { + 'template': 'paper.html', + 'url': url, + 'title': result['title']['text'], + 'content': result['paperAbstract']['text'], + 'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'), + 'doi': result.get('doiInfo', {}).get('doi'), + 'tags': result.get('fieldsOfStudy'), + 'authors': authors, + 'pdf_url': pdf_url, + 'publishedDate': publishedDate, + 'comments': comments, + } + ) return results diff --git a/searx/engines/springer.py b/searx/engines/springer.py index 512d71e5e..e5255b794 100644 --- a/searx/engines/springer.py +++ b/searx/engines/springer.py @@ -19,7 +19,7 @@ about = { "results": 'JSON', } -categories = ['science'] +categories = ['science', 'scientific publications'] paging = True nb_per_page = 10 api_key = 'unset' @@ -41,32 +41,29 @@ def response(resp): json_data = loads(resp.text) for record in json_data['records']: - content = record['abstract'][0:500] - if len(record['abstract']) > len(content): - content += "..." + content = record['abstract'] published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') - - metadata = [ - record[x] - for x in [ - 'publicationName', - 'identifier', - 'contentType', - ] - if record.get(x) is not None - ] - - metadata = ' / '.join(metadata) - if record.get('startingPage') and record.get('endingPage') is not None: - metadata += " (%(startingPage)s-%(endingPage)s)" % record - + authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']] + tags = record.get('genre') + if isinstance(tags, str): + tags = [tags] results.append( { + 'template': 'paper.html', 'title': record['title'], 'url': record['url'][0]['value'].replace('http://', 'https://', 1), + 'type': record.get('contentType'), 'content': content, 'publishedDate': published, - 'metadata': metadata, + 'authors': authors, + 'doi': record.get('doi'), + 'journal': record.get('publicationName'), + 'pages': record.get('start_page') + '-' + record.get('end_page'), + 'tags': tags, + 'issn': [record.get('issn')], + 'isbn': [record.get('isbn')], + 'volume': record.get('volume') or None, + 'number': record.get('number') or None, } ) return results diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index 54d28bc9a..f0e07735d 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -42,4 +42,6 @@ def on_result(request, search, result): doi = doi[: -len(suffix)] result['url'] = get_doi_resolver(request.preferences) + doi result['parsed_url'] = urlparse(result['url']) + if 'doi' not in result: + result['doi'] = doi return True diff --git a/searx/searxng.msg b/searx/searxng.msg index 3b876f96d..c37240f83 100644 --- a/searx/searxng.msg +++ b/searx/searxng.msg @@ -43,6 +43,7 @@ CATEGORY_GROUPS = { 'REPOS': 'repos', 'SOFTWARE_WIKIS': 'software wikis', 'WEB': 'web', + 'SCIENTIFIC PUBLICATIONS': 'scientific publications', } STYLE_NAMES = { diff --git a/searx/settings.yml b/searx/settings.yml index 3f07bb2dd..9e9b2f9e6 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -319,7 +319,6 @@ engines: - name: arxiv engine: arxiv shortcut: arx - categories: science timeout: 4.0 # tmp suspended: dh key too small @@ -411,23 +410,10 @@ engines: # api_key: 'unset' - name: crossref - engine: json_engine - paging: true - search_url: https://search.crossref.org/dois?q={query}&page={pageno} - url_query: doi - title_query: title - title_html_to_text: true - content_query: fullCitation - content_html_to_text: true - categories: science + engine: crossref shortcut: cr - about: - website: https://www.crossref.org/ - wikidata_id: Q5188229 - official_api_documentation: https://github.com/CrossRef/rest-api-doc - use_official_api: false - require_api_key: false - results: JSON + timeout: 30 + disable: true - name: yep engine: json_engine @@ -1068,7 +1054,7 @@ engines: title_query: metadata/oaf:entity/oaf:result/title/$ content_query: metadata/oaf:entity/oaf:result/description/$ content_html_to_text: true - categories: science + categories: "science" shortcut: oad timeout: 5.0 about: @@ -1198,7 +1184,6 @@ engines: - name: pubmed engine: pubmed shortcut: pub - categories: science timeout: 3.0 - name: pypi @@ -1346,7 +1331,6 @@ engines: engine: semantic_scholar disabled: true shortcut: se - categories: science # Spotify needs API credentials # - name: spotify @@ -1372,8 +1356,7 @@ engines: # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" # api_key: 'unset' # shortcut: springer - # categories: science - # timeout: 6.0 + # timeout: 15.0 - name: startpage engine: startpage diff --git a/searx/static/themes/simple/css/searxng-rtl.min.css b/searx/static/themes/simple/css/searxng-rtl.min.css index 450f5d96d..1462d0d5e 100644 Binary files a/searx/static/themes/simple/css/searxng-rtl.min.css and b/searx/static/themes/simple/css/searxng-rtl.min.css differ diff --git a/searx/static/themes/simple/css/searxng-rtl.min.css.map b/searx/static/themes/simple/css/searxng-rtl.min.css.map index 21923a32c..8768d1ba7 100644 Binary files a/searx/static/themes/simple/css/searxng-rtl.min.css.map and b/searx/static/themes/simple/css/searxng-rtl.min.css.map differ diff --git a/searx/static/themes/simple/css/searxng.min.css b/searx/static/themes/simple/css/searxng.min.css index 09d26b534..90820978f 100644 Binary files a/searx/static/themes/simple/css/searxng.min.css and b/searx/static/themes/simple/css/searxng.min.css differ diff --git a/searx/static/themes/simple/css/searxng.min.css.map b/searx/static/themes/simple/css/searxng.min.css.map index ad344d144..95d4dac28 100644 Binary files a/searx/static/themes/simple/css/searxng.min.css.map and b/searx/static/themes/simple/css/searxng.min.css.map differ diff --git a/searx/static/themes/simple/src/less/style.less b/searx/static/themes/simple/src/less/style.less index 11d2ef58d..dd8e8a596 100644 --- a/searx/static/themes/simple/src/less/style.less +++ b/searx/static/themes/simple/src/less/style.less @@ -302,6 +302,49 @@ article[data-vim-selected].category-social { } } +.result-paper { + .attributes { + display: table; + border-spacing: 0.125rem; + + div { + display: table-row; + + span { + font-size: 0.9rem; + margin-top: 0.25rem; + display: table-cell; + + time { + font-size: 0.9rem; + } + } + + span:first-child { + color: var(--color-base-font); + min-width: 10rem; + } + + span:nth-child(2) { + color: var(--color-result-publishdate-font); + } + } + } + + .content { + margin-top: 0.25rem; + } + + .comments { + font-size: 0.9rem; + margin: 0.25rem 0 0 0; + padding: 0; + word-wrap: break-word; + line-height: 1.24; + font-style: italic; + } +} + .template_group_images { display: flex; flex-wrap: wrap; @@ -955,6 +998,28 @@ article[data-vim-selected].category-social { border: none !important; background-color: var(--color-sidebar-background); } + + .result-paper { + .attributes { + display: block; + + div { + display: block; + + span { + display: inline; + } + + span:first-child { + font-weight: bold; + } + + span:nth-child(2) { + .ltr-margin-left(0.5rem); + } + } + } + } } /* diff --git a/searx/templates/simple/result_templates/paper.html b/searx/templates/simple/result_templates/paper.html new file mode 100644 index 000000000..54704c866 --- /dev/null +++ b/searx/templates/simple/result_templates/paper.html @@ -0,0 +1,44 @@ +{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer with context %} + +{{ result_header(result, favicons, image_proxify) -}} +
+ {%- if result.publishedDate %}
{{ _("Published date") }}:
{% endif -%} + {%- if result.authors %}
{{ _("Author") }}:{{ result.authors | join(", ") }}
{% endif -%} + {%- if result.journal -%} +
+ {{- _("Journal") }}:{{ result.journal -}} + {%- if result.volume -%} +  {{- result.volume -}} + {%- if result.number -%} + .{{- result.number -}} + {%- endif -%} + {%- endif -%} + {%- if result.pages -%} +  {{- result.pages -}} + {%- endif -%} + +
+ {%- endif %} + {%- if result.editor %}
{{ _("Editor") }}:{{ result.editor }}
{% endif -%} + {%- if result.publisher %}
{{ _("Publisher") }}:{{ result.publisher }}
{% endif -%} + {%- if result.type %}
{{ _("Type") }}:{{ result.type }}
{% endif -%} + {%- if result.tags %}
{{ _("Tags") }}:{{ result.tags | join(", ")}}
{%- endif -%} + {%- if result.doi %}
{{ _("DOI") }}:{{- result.doi -}}
{% endif -%} + {%- if result.issn %}
{{ _("ISSN") }}:{{ result.issn | join(", ") }}
{% endif -%} + {%- if result.isbn %}
{{ _("ISBN") }}:{{ result.isbn | join(", ") }}
{% endif -%} +
+{%- if result.content -%}

{{- result.content | safe -}}

{%- endif -%} +{%- if result.comments -%}

{{- result.comments -}}

{%- endif -%} + +{{- result_sub_footer(result, proxify) -}} +{{- result_footer(result) }} diff --git a/searx/webapp.py b/searx/webapp.py index bd76cc534..44500911a 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -12,7 +12,6 @@ import os import sys import base64 -from datetime import datetime, timedelta from timeit import default_timer from html import escape from io import StringIO @@ -45,7 +44,6 @@ from flask.json import jsonify from flask_babel import ( Babel, gettext, - format_date, format_decimal, ) @@ -79,6 +77,7 @@ from searx.webutils import ( is_hmac_of, is_flask_run_cmdline, group_engines_in_tab, + searxng_l10n_timespan, ) from searx.webadapter import ( get_search_query_from_webapp, @@ -718,25 +717,13 @@ def search(): if 'url' in result: result['pretty_url'] = prettify_url(result['url']) - # TODO, check if timezone is calculated right # pylint: disable=fixme if result.get('publishedDate'): # do not try to get a date from an empty string or a None type try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: result['publishedDate'] = None else: - if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): - timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) - minutes = int((timedifference.seconds / 60) % 60) - hours = int(timedifference.seconds / 60 / 60) - if hours == 0: - result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) - else: - result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format( - hours=hours, minutes=minutes - ) - else: - result['publishedDate'] = format_date(result['publishedDate']) + result['publishedDate'] = searxng_l10n_timespan(result['publishedDate']) # set result['open_group'] = True when the template changes from the previous result # set result['close_group'] = True when the template changes on the next result diff --git a/searx/webutils.py b/searx/webutils.py index b18fd5c6a..a5ed27c2c 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -7,11 +7,14 @@ import hmac import re import inspect import itertools +from datetime import datetime, timedelta from typing import Iterable, List, Tuple, Dict from io import StringIO from codecs import getincrementalencoder +from flask_babel import gettext, format_date + from searx import logger, settings from searx.engines import Engine, OTHER_CATEGORY @@ -138,6 +141,28 @@ def highlight_content(content, query): return content +def searxng_l10n_timespan(dt: datetime) -> str: # pylint: disable=invalid-name + """Returns a human-readable and translated string indicating how long ago + a date was in the past / the time span of the date to the present. + + On January 1st, midnight, the returned string only indicates how many years + ago the date was. + """ + # TODO, check if timezone is calculated right # pylint: disable=fixme + d = dt.date() + t = dt.time() + if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0: + return str(d.year) + if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1): + timedifference = datetime.now() - dt.replace(tzinfo=None) + minutes = int((timedifference.seconds / 60) % 60) + hours = int(timedifference.seconds / 60 / 60) + if hours == 0: + return gettext('{minutes} minute(s) ago').format(minutes=minutes) + return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) + return format_date(dt) + + def is_flask_run_cmdline(): """Check if the application was started using "flask run" command line