Compare commits

..

2 Commits

Author SHA1 Message Date
Bnyro b7648fb6be
Merge d5b0fb3d03 into b8f1a329d3 2024-11-22 10:03:04 +01:00
Bnyro d5b0fb3d03 [feat] engine: add adobe stock photos 2024-11-09 17:46:49 +01:00
9 changed files with 139 additions and 266 deletions

View File

@ -1,13 +0,0 @@
.. _adobe stock engine:
===========
Adobe Stock
===========
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.engines.adobe_stock
:members:

View File

@ -4,31 +4,26 @@ Welcome to SearXNG
*Search without being tracked.* *Search without being tracked.*
.. jinja:: searx SearXNG is a free internet metasearch engine which aggregates results from more
than 70 search services. Users are neither tracked nor profiled. Additionally,
SearXNG is a free internet metasearch engine which aggregates results from up SearXNG can be used over Tor for online anonymity.
to {{engines | length}} :ref:`search services <configured engines>`. Users
are neither tracked nor profiled. Additionally, SearXNG can be used over Tor
for online anonymity.
Get started with SearXNG by using one of the instances listed at searx.space_. Get started with SearXNG by using one of the instances listed at searx.space_.
If you don't trust anyone, you can set up your own, see :ref:`installation`. If you don't trust anyone, you can set up your own, see :ref:`installation`.
.. jinja:: searx .. sidebar:: features
.. sidebar:: features - :ref:`self hosted <installation>`
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
- :ref:`self hosted <installation>` - script & cookies are optional
- :ref:`no user tracking / no profiling <SearXNG protect privacy>` - secure, encrypted connections
- script & cookies are optional - :ref:`about 200 search engines <configured engines>`
- secure, encrypted connections - `about 60 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
- :ref:`{{engines | length}} search engines <configured engines>` - about 100 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
- `58 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_ - :ref:`easy integration of search engines <demo online engine>`
- about 70 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_ - professional development: `CI <https://github.com/searxng/searxng/actions>`_,
- :ref:`easy integration of search engines <demo online engine>` `quality assurance <https://dev.searxng.org/>`_ &
- professional development: `CI <https://github.com/searxng/searxng/actions>`_, `automated tested UI <https://dev.searxng.org/screenshots.html>`_
`quality assurance <https://dev.searxng.org/>`_ &
`automated tested UI <https://dev.searxng.org/screenshots.html>`_
.. sidebar:: be a part .. sidebar:: be a part

View File

@ -19,4 +19,3 @@ tomli==2.0.2; python_version < '3.11'
msgspec==0.18.6 msgspec==0.18.6
eval_type_backport; python_version < '3.9' eval_type_backport; python_version < '3.9'
typer-slim==0.13.1 typer-slim==0.13.1
isodate==0.7.2

View File

@ -1,229 +1,67 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""`Adobe Stock`_ is a service that gives access to millions of royalty-free """Adobe Stock (images)
assets. Assets types include photos, vectors, illustrations, templates, 3D
assets, videos, motion graphics templates and audio tracks.
.. Adobe Stock: https://stock.adobe.com/
Configuration
=============
The engine has the following mandatory setting:
- SearXNG's :ref:`engine categories`
- Adobe-Stock's :py:obj:`adobe_order`
- Adobe-Stock's :py:obj:`adobe_content_types`
.. code:: yaml
- name: adobe stock
engine: adobe_stock
shortcut: asi
categories: [images]
adobe_order: relevance
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
- name: adobe stock video
engine: adobe_stock
network: adobe stock
shortcut: asi
categories: [videos]
adobe_order: relevance
adobe_content_types: ["video"]
Implementation
==============
""" """
from __future__ import annotations
from typing import TYPE_CHECKING
from datetime import datetime, timedelta
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.utils import gen_useragent
import isodate
if TYPE_CHECKING:
import logging
logger: logging.Logger
about = { about = {
"website": "https://stock.adobe.com/", "website": 'https://stock.adobe.com/',
"wikidata_id": "Q5977430", "wikidata_id": 'Q5977430',
"official_api_documentation": None, "official_api_documentation": None,
"use_official_api": False, "use_official_api": False,
"require_api_key": False, "require_api_key": False,
"results": "JSON", "results": 'JSON',
} }
categories = [] categories = ['images']
paging = True paging = True
send_accept_language_header = True
base_url = 'https://stock.adobe.com'
results_per_page = 10 results_per_page = 10
adobe_order = "relevance" # one of 'relevant', 'featured', 'creation' or 'nb_downloads'
base_url = "https://stock.adobe.com"
adobe_order: str = ""
"""Sort order, can be one of:
- ``relevance`` or
- ``featured`` or
- ``creation`` (most recent) or
- ``nb_downloads`` (number of downloads)
"""
ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
adobe_content_types: list = []
"""A list of of content types. The following content types are offered:
- Images: ``image``
- Videos: ``video``
- Templates: ``template``
- 3D: ``3d``
- Audio ``audio``
Additional subcategories:
- Photos: ``photo``
- Illustrations: ``illustration``
- Vectors: ``zip_vector`` (Vectors),
"""
# Do we need support for "free_collection" and "include_stock_enterprise"?
def init(_):
if not categories:
raise ValueError("adobe_stock engine: categories is unset")
# adobe_order
if not adobe_order:
raise ValueError("adobe_stock engine: adobe_order is unset")
if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
raise ValueError(f"unsupported adobe_order: {adobe_order}")
# adobe_content_types
if not adobe_content_types:
raise ValueError("adobe_stock engine: adobe_content_types is unset")
if isinstance(adobe_content_types, list):
for t in adobe_content_types:
if t not in ADOBE_VALID_TYPES:
raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
else:
raise ValueError(
"adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
)
def request(query, params): def request(query, params):
args = { args = {
"k": query, 'k': query,
"limit": results_per_page, 'limit': results_per_page,
"order": adobe_order, 'order': adobe_order,
"search_page": params["pageno"], 'search_page': params['pageno'],
"search_type": "pagination", 'search_type': 'pagination',
'filters[content_type:video]': 0,
'filters[content_type:audio]': 0,
} }
params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
for content_type in ADOBE_VALID_TYPES:
args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
# headers required to bypass bot-detection # headers required to bypass bot-detection
if params["searxng_locale"] == "all": params['headers'] = {
params["headers"]["Accept-Language"] = "en-US,en;q=0.5" "User-Agent": gen_useragent(),
"Accept-Language": "en-US,en;q=0.5",
}
return params return params
def parse_image_item(item):
return {
"template": "images.html",
"url": item["content_url"],
"title": item["title"],
"content": item["asset_type"],
"img_src": item["content_thumb_extra_large_url"],
"thumbnail_src": item["thumbnail_url"],
"resolution": f"{item['content_original_width']}x{item['content_original_height']}",
"img_format": item["format"],
"author": item["author"],
}
def parse_video_item(item):
# in video items, the title is more or less a "content description", we try
# to reduce the lenght of the title ..
title = item["title"]
content = ""
if "." in title.strip()[:-1]:
content = title
title = title.split(".", 1)[0]
elif "," in title:
content = title
title = title.split(",", 1)[0]
elif len(title) > 50:
content = title
title = ""
for w in content.split(" "):
title += f" {w}"
if len(title) > 50:
title = title.strip() + "\u2026"
break
return {
"template": "videos.html",
"url": item["content_url"],
"title": title,
"content": content,
# https://en.wikipedia.org/wiki/ISO_8601#Durations
"length": isodate.parse_duration(item["time_duration"]),
"publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
"thumbnail": item["thumbnail_url"],
"iframe_src": item["video_small_preview_url"],
"metadata": item["asset_type"],
}
def parse_audio_item(item):
audio_data = item["audio_data"]
content = audio_data.get("description") or ""
if audio_data.get("album"):
content = audio_data["album"] + " - " + content
return {
"url": item["content_url"],
"title": item["title"],
"content": content,
# "thumbnail": base_url + item["thumbnail_url"],
"iframe_src": audio_data["preview"]["url"],
"publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
"length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
"author": item.get("artist_name"),
}
def response(resp): def response(resp):
results = [] results = []
json_resp = resp.json() json_resp = resp.json()
if isinstance(json_resp["items"], list): for item in json_resp['items'].values():
return None results.append(
for item in json_resp["items"].values(): {
if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]: 'template': 'images.html',
result = parse_image_item(item) 'url': item['content_url'],
elif item["asset_type"].lower() == "video": 'title': item['title'],
result = parse_video_item(item) 'content': '',
elif item["asset_type"].lower() == "audio": 'img_src': item['content_thumb_extra_large_url'],
result = parse_audio_item(item) 'thumbnail_src': item['thumbnail_url'],
else: 'resolution': f"{item['content_original_width']}x{item['content_original_height']}",
logger.error("no handle for %s --> %s", item["asset_type"], item) 'img_format': item['format'],
continue 'author': item['author'],
results.append(result) }
)
return results return results

View File

@ -0,0 +1,71 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Internet Archive scholar(science)
"""
from datetime import datetime
from urllib.parse import urlencode
from searx.utils import html_to_text
about = {
"website": "https://scholar.archive.org/",
"wikidata_id": "Q115667709",
"official_api_documentation": "https://scholar.archive.org/api/redoc",
"use_official_api": True,
"require_api_key": False,
"results": "JSON",
}
categories = ['science', 'scientific publications']
paging = True
base_url = "https://scholar.archive.org"
results_per_page = 15
def request(query, params):
args = {
"q": query,
"limit": results_per_page,
"offset": (params["pageno"] - 1) * results_per_page,
}
params["url"] = f"{base_url}/search?{urlencode(args)}"
params["headers"]["Accept"] = "application/json"
return params
def response(resp):
results = []
json = resp.json()
for result in json["results"]:
publishedDate, content, doi = None, '', None
if result['biblio'].get('release_date'):
publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")
if len(result['abstracts']) > 0:
content = result['abstracts'][0].get('body')
elif len(result['_highlights']) > 0:
content = result['_highlights'][0]
if len(result['releases']) > 0:
doi = result['releases'][0].get('doi')
results.append(
{
'template': 'paper.html',
'url': result['fulltext']['access_url'],
'title': result['biblio'].get('title') or result['biblio'].get('container_name'),
'content': html_to_text(content),
'publisher': result['biblio'].get('publisher'),
'doi': doi,
'journal': result['biblio'].get('container_name'),
'authors': result['biblio'].get('contrib_names'),
'tags': result['tags'],
'publishedDate': publishedDate,
'issns': result['biblio'].get('issns'),
'pdf_url': result['fulltext'].get('access_url'),
}
)
return results

View File

@ -27,7 +27,7 @@ categories = ['images']
paging = True paging = True
endpoint = 'photos' endpoint = 'photos'
base_url = 'https://www.loc.gov' base_url = 'https://loc.gov'
search_string = "/{endpoint}/?sp={page}&{query}&fo=json" search_string = "/{endpoint}/?sp={page}&{query}&fo=json"

View File

@ -233,7 +233,8 @@ class Network:
del kwargs['raise_for_httperror'] del kwargs['raise_for_httperror']
return do_raise_for_httperror return do_raise_for_httperror
def patch_response(self, response, do_raise_for_httperror): @staticmethod
def patch_response(response, do_raise_for_httperror):
if isinstance(response, httpx.Response): if isinstance(response, httpx.Response):
# requests compatibility (response is not streamed) # requests compatibility (response is not streamed)
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses # see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
@ -241,11 +242,8 @@ class Network:
# raise an exception # raise an exception
if do_raise_for_httperror: if do_raise_for_httperror:
try: raise_for_httperror(response)
raise_for_httperror(response)
except:
self._logger.warning(f"HTTP Request failed: {response.request.method} {response.request.url}")
raise
return response return response
def is_valid_response(self, response): def is_valid_response(self, response):
@ -271,7 +269,7 @@ class Network:
else: else:
response = await client.request(method, url, **kwargs) response = await client.request(method, url, **kwargs)
if self.is_valid_response(response) or retries <= 0: if self.is_valid_response(response) or retries <= 0:
return self.patch_response(response, do_raise_for_httperror) return Network.patch_response(response, do_raise_for_httperror)
except httpx.RemoteProtocolError as e: except httpx.RemoteProtocolError as e:
if not was_disconnected: if not was_disconnected:
# the server has closed the connection: # the server has closed the connection:

View File

@ -137,6 +137,9 @@ class OnlineProcessor(EngineProcessor):
self.engine.request(query, params) self.engine.request(query, params)
# ignoring empty urls # ignoring empty urls
if params['url'] is None:
return None
if not params['url']: if not params['url']:
return None return None

View File

@ -327,32 +327,9 @@ engines:
- name: adobe stock - name: adobe stock
engine: adobe_stock engine: adobe_stock
shortcut: asi # available search orders: 'relevant', 'featured', 'creation', 'nb_downloads'
categories: ["images"] # adobe_order: relevance
# https://docs.searxng.org/dev/engines/online/adobe_stock.html shortcut: as
adobe_order: relevance
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
timeout: 6
disabled: true
- name: adobe stock video
engine: adobe_stock
shortcut: asv
network: adobe stock
categories: ["videos"]
adobe_order: relevance
adobe_content_types: ["video"]
timeout: 6
disabled: true
- name: adobe stock audio
engine: adobe_stock
shortcut: asa
network: adobe stock
categories: ["music"]
adobe_order: relevance
adobe_content_types: ["audio"]
timeout: 6
disabled: true disabled: true
- name: alpine linux packages - name: alpine linux packages
@ -1652,6 +1629,11 @@ engines:
api_site: 'askubuntu' api_site: 'askubuntu'
categories: [it, q&a] categories: [it, q&a]
- name: internetarchivescholar
engine: internet_archive_scholar
shortcut: ias
timeout: 15.0
- name: superuser - name: superuser
engine: stackexchange engine: stackexchange
shortcut: su shortcut: su