mirror of https://github.com/searxng/searxng.git
Compare commits
7 Commits
b7648fb6be
...
a36aa54570
Author | SHA1 | Date |
---|---|---|
Bnyro | a36aa54570 | |
Markus Heiser | 02ebea58fb | |
Bnyro | 14b7ef3c01 | |
Markus Heiser | 0f9694c90b | |
Markus Heiser | ccc4f30b20 | |
Markus Heiser | c4b874e9b0 | |
Markus Heiser | 7c4e4ebd40 |
|
@ -0,0 +1,13 @@
|
||||||
|
.. _adobe stock engine:
|
||||||
|
|
||||||
|
===========
|
||||||
|
Adobe Stock
|
||||||
|
===========
|
||||||
|
|
||||||
|
.. contents:: Contents
|
||||||
|
:depth: 2
|
||||||
|
:local:
|
||||||
|
:backlinks: entry
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.adobe_stock
|
||||||
|
:members:
|
|
@ -4,22 +4,27 @@ Welcome to SearXNG
|
||||||
|
|
||||||
*Search without being tracked.*
|
*Search without being tracked.*
|
||||||
|
|
||||||
SearXNG is a free internet metasearch engine which aggregates results from more
|
.. jinja:: searx
|
||||||
than 70 search services. Users are neither tracked nor profiled. Additionally,
|
|
||||||
SearXNG can be used over Tor for online anonymity.
|
SearXNG is a free internet metasearch engine which aggregates results from up
|
||||||
|
to {{engines | length}} :ref:`search services <configured engines>`. Users
|
||||||
|
are neither tracked nor profiled. Additionally, SearXNG can be used over Tor
|
||||||
|
for online anonymity.
|
||||||
|
|
||||||
Get started with SearXNG by using one of the instances listed at searx.space_.
|
Get started with SearXNG by using one of the instances listed at searx.space_.
|
||||||
If you don't trust anyone, you can set up your own, see :ref:`installation`.
|
If you don't trust anyone, you can set up your own, see :ref:`installation`.
|
||||||
|
|
||||||
.. sidebar:: features
|
.. jinja:: searx
|
||||||
|
|
||||||
|
.. sidebar:: features
|
||||||
|
|
||||||
- :ref:`self hosted <installation>`
|
- :ref:`self hosted <installation>`
|
||||||
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
|
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
|
||||||
- script & cookies are optional
|
- script & cookies are optional
|
||||||
- secure, encrypted connections
|
- secure, encrypted connections
|
||||||
- :ref:`about 200 search engines <configured engines>`
|
- :ref:`{{engines | length}} search engines <configured engines>`
|
||||||
- `about 60 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
- `58 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
||||||
- about 100 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
- about 70 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
||||||
- :ref:`easy integration of search engines <demo online engine>`
|
- :ref:`easy integration of search engines <demo online engine>`
|
||||||
- professional development: `CI <https://github.com/searxng/searxng/actions>`_,
|
- professional development: `CI <https://github.com/searxng/searxng/actions>`_,
|
||||||
`quality assurance <https://dev.searxng.org/>`_ &
|
`quality assurance <https://dev.searxng.org/>`_ &
|
||||||
|
|
|
@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
|
||||||
msgspec==0.18.6
|
msgspec==0.18.6
|
||||||
eval_type_backport; python_version < '3.9'
|
eval_type_backport; python_version < '3.9'
|
||||||
typer-slim==0.13.1
|
typer-slim==0.13.1
|
||||||
|
isodate==0.7.2
|
||||||
|
|
|
@ -1,67 +1,229 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Adobe Stock (images)
|
"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
|
||||||
"""
|
assets. Assets types include photos, vectors, illustrations, templates, 3D
|
||||||
|
assets, videos, motion graphics templates and audio tracks.
|
||||||
|
|
||||||
|
.. Adobe Stock: https://stock.adobe.com/
|
||||||
|
|
||||||
|
Configuration
|
||||||
|
=============
|
||||||
|
|
||||||
|
The engine has the following mandatory setting:
|
||||||
|
|
||||||
|
- SearXNG's :ref:`engine categories`
|
||||||
|
- Adobe-Stock's :py:obj:`adobe_order`
|
||||||
|
- Adobe-Stock's :py:obj:`adobe_content_types`
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
- name: adobe stock
|
||||||
|
engine: adobe_stock
|
||||||
|
shortcut: asi
|
||||||
|
categories: [images]
|
||||||
|
adobe_order: relevance
|
||||||
|
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
||||||
|
|
||||||
|
- name: adobe stock video
|
||||||
|
engine: adobe_stock
|
||||||
|
network: adobe stock
|
||||||
|
shortcut: asi
|
||||||
|
categories: [videos]
|
||||||
|
adobe_order: relevance
|
||||||
|
adobe_content_types: ["video"]
|
||||||
|
|
||||||
|
Implementation
|
||||||
|
==============
|
||||||
|
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
from datetime import datetime, timedelta
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
from searx.utils import gen_useragent
|
|
||||||
|
import isodate
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://stock.adobe.com/',
|
"website": "https://stock.adobe.com/",
|
||||||
"wikidata_id": 'Q5977430',
|
"wikidata_id": "Q5977430",
|
||||||
"official_api_documentation": None,
|
"official_api_documentation": None,
|
||||||
"use_official_api": False,
|
"use_official_api": False,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'JSON',
|
"results": "JSON",
|
||||||
}
|
}
|
||||||
|
|
||||||
categories = ['images']
|
categories = []
|
||||||
paging = True
|
paging = True
|
||||||
|
send_accept_language_header = True
|
||||||
base_url = 'https://stock.adobe.com'
|
|
||||||
|
|
||||||
results_per_page = 10
|
results_per_page = 10
|
||||||
adobe_order = "relevance" # one of 'relevant', 'featured', 'creation' or 'nb_downloads'
|
|
||||||
|
base_url = "https://stock.adobe.com"
|
||||||
|
|
||||||
|
adobe_order: str = ""
|
||||||
|
"""Sort order, can be one of:
|
||||||
|
|
||||||
|
- ``relevance`` or
|
||||||
|
- ``featured`` or
|
||||||
|
- ``creation`` (most recent) or
|
||||||
|
- ``nb_downloads`` (number of downloads)
|
||||||
|
"""
|
||||||
|
|
||||||
|
ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
|
||||||
|
adobe_content_types: list = []
|
||||||
|
"""A list of of content types. The following content types are offered:
|
||||||
|
|
||||||
|
- Images: ``image``
|
||||||
|
- Videos: ``video``
|
||||||
|
- Templates: ``template``
|
||||||
|
- 3D: ``3d``
|
||||||
|
- Audio ``audio``
|
||||||
|
|
||||||
|
Additional subcategories:
|
||||||
|
|
||||||
|
- Photos: ``photo``
|
||||||
|
- Illustrations: ``illustration``
|
||||||
|
- Vectors: ``zip_vector`` (Vectors),
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Do we need support for "free_collection" and "include_stock_enterprise"?
|
||||||
|
|
||||||
|
|
||||||
|
def init(_):
|
||||||
|
if not categories:
|
||||||
|
raise ValueError("adobe_stock engine: categories is unset")
|
||||||
|
|
||||||
|
# adobe_order
|
||||||
|
if not adobe_order:
|
||||||
|
raise ValueError("adobe_stock engine: adobe_order is unset")
|
||||||
|
if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
|
||||||
|
raise ValueError(f"unsupported adobe_order: {adobe_order}")
|
||||||
|
|
||||||
|
# adobe_content_types
|
||||||
|
if not adobe_content_types:
|
||||||
|
raise ValueError("adobe_stock engine: adobe_content_types is unset")
|
||||||
|
|
||||||
|
if isinstance(adobe_content_types, list):
|
||||||
|
for t in adobe_content_types:
|
||||||
|
if t not in ADOBE_VALID_TYPES:
|
||||||
|
raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
args = {
|
args = {
|
||||||
'k': query,
|
"k": query,
|
||||||
'limit': results_per_page,
|
"limit": results_per_page,
|
||||||
'order': adobe_order,
|
"order": adobe_order,
|
||||||
'search_page': params['pageno'],
|
"search_page": params["pageno"],
|
||||||
'search_type': 'pagination',
|
"search_type": "pagination",
|
||||||
'filters[content_type:video]': 0,
|
|
||||||
'filters[content_type:audio]': 0,
|
|
||||||
}
|
}
|
||||||
params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
|
|
||||||
|
for content_type in ADOBE_VALID_TYPES:
|
||||||
|
args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
|
||||||
|
|
||||||
|
params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
|
||||||
|
|
||||||
# headers required to bypass bot-detection
|
# headers required to bypass bot-detection
|
||||||
params['headers'] = {
|
if params["searxng_locale"] == "all":
|
||||||
"User-Agent": gen_useragent(),
|
params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
|
||||||
"Accept-Language": "en-US,en;q=0.5",
|
|
||||||
}
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def parse_image_item(item):
|
||||||
|
return {
|
||||||
|
"template": "images.html",
|
||||||
|
"url": item["content_url"],
|
||||||
|
"title": item["title"],
|
||||||
|
"content": item["asset_type"],
|
||||||
|
"img_src": item["content_thumb_extra_large_url"],
|
||||||
|
"thumbnail_src": item["thumbnail_url"],
|
||||||
|
"resolution": f"{item['content_original_width']}x{item['content_original_height']}",
|
||||||
|
"img_format": item["format"],
|
||||||
|
"author": item["author"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_video_item(item):
|
||||||
|
|
||||||
|
# in video items, the title is more or less a "content description", we try
|
||||||
|
# to reduce the lenght of the title ..
|
||||||
|
|
||||||
|
title = item["title"]
|
||||||
|
content = ""
|
||||||
|
if "." in title.strip()[:-1]:
|
||||||
|
content = title
|
||||||
|
title = title.split(".", 1)[0]
|
||||||
|
elif "," in title:
|
||||||
|
content = title
|
||||||
|
title = title.split(",", 1)[0]
|
||||||
|
elif len(title) > 50:
|
||||||
|
content = title
|
||||||
|
title = ""
|
||||||
|
for w in content.split(" "):
|
||||||
|
title += f" {w}"
|
||||||
|
if len(title) > 50:
|
||||||
|
title = title.strip() + "\u2026"
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
"template": "videos.html",
|
||||||
|
"url": item["content_url"],
|
||||||
|
"title": title,
|
||||||
|
"content": content,
|
||||||
|
# https://en.wikipedia.org/wiki/ISO_8601#Durations
|
||||||
|
"length": isodate.parse_duration(item["time_duration"]),
|
||||||
|
"publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
|
||||||
|
"thumbnail": item["thumbnail_url"],
|
||||||
|
"iframe_src": item["video_small_preview_url"],
|
||||||
|
"metadata": item["asset_type"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_audio_item(item):
|
||||||
|
audio_data = item["audio_data"]
|
||||||
|
content = audio_data.get("description") or ""
|
||||||
|
if audio_data.get("album"):
|
||||||
|
content = audio_data["album"] + " - " + content
|
||||||
|
|
||||||
|
return {
|
||||||
|
"url": item["content_url"],
|
||||||
|
"title": item["title"],
|
||||||
|
"content": content,
|
||||||
|
# "thumbnail": base_url + item["thumbnail_url"],
|
||||||
|
"iframe_src": audio_data["preview"]["url"],
|
||||||
|
"publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
|
||||||
|
"length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
|
||||||
|
"author": item.get("artist_name"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
json_resp = resp.json()
|
json_resp = resp.json()
|
||||||
|
|
||||||
for item in json_resp['items'].values():
|
if isinstance(json_resp["items"], list):
|
||||||
results.append(
|
return None
|
||||||
{
|
for item in json_resp["items"].values():
|
||||||
'template': 'images.html',
|
if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
|
||||||
'url': item['content_url'],
|
result = parse_image_item(item)
|
||||||
'title': item['title'],
|
elif item["asset_type"].lower() == "video":
|
||||||
'content': '',
|
result = parse_video_item(item)
|
||||||
'img_src': item['content_thumb_extra_large_url'],
|
elif item["asset_type"].lower() == "audio":
|
||||||
'thumbnail_src': item['thumbnail_url'],
|
result = parse_audio_item(item)
|
||||||
'resolution': f"{item['content_original_width']}x{item['content_original_height']}",
|
else:
|
||||||
'img_format': item['format'],
|
logger.error("no handle for %s --> %s", item["asset_type"], item)
|
||||||
'author': item['author'],
|
continue
|
||||||
}
|
results.append(result)
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -1,71 +0,0 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""Internet Archive scholar(science)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from searx.utils import html_to_text
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": "https://scholar.archive.org/",
|
|
||||||
"wikidata_id": "Q115667709",
|
|
||||||
"official_api_documentation": "https://scholar.archive.org/api/redoc",
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": "JSON",
|
|
||||||
}
|
|
||||||
categories = ['science', 'scientific publications']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
base_url = "https://scholar.archive.org"
|
|
||||||
results_per_page = 15
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
args = {
|
|
||||||
"q": query,
|
|
||||||
"limit": results_per_page,
|
|
||||||
"offset": (params["pageno"] - 1) * results_per_page,
|
|
||||||
}
|
|
||||||
params["url"] = f"{base_url}/search?{urlencode(args)}"
|
|
||||||
params["headers"]["Accept"] = "application/json"
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
json = resp.json()
|
|
||||||
|
|
||||||
for result in json["results"]:
|
|
||||||
publishedDate, content, doi = None, '', None
|
|
||||||
|
|
||||||
if result['biblio'].get('release_date'):
|
|
||||||
publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")
|
|
||||||
|
|
||||||
if len(result['abstracts']) > 0:
|
|
||||||
content = result['abstracts'][0].get('body')
|
|
||||||
elif len(result['_highlights']) > 0:
|
|
||||||
content = result['_highlights'][0]
|
|
||||||
|
|
||||||
if len(result['releases']) > 0:
|
|
||||||
doi = result['releases'][0].get('doi')
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'paper.html',
|
|
||||||
'url': result['fulltext']['access_url'],
|
|
||||||
'title': result['biblio'].get('title') or result['biblio'].get('container_name'),
|
|
||||||
'content': html_to_text(content),
|
|
||||||
'publisher': result['biblio'].get('publisher'),
|
|
||||||
'doi': doi,
|
|
||||||
'journal': result['biblio'].get('container_name'),
|
|
||||||
'authors': result['biblio'].get('contrib_names'),
|
|
||||||
'tags': result['tags'],
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'issns': result['biblio'].get('issns'),
|
|
||||||
'pdf_url': result['fulltext'].get('access_url'),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
|
@ -27,7 +27,7 @@ categories = ['images']
|
||||||
paging = True
|
paging = True
|
||||||
|
|
||||||
endpoint = 'photos'
|
endpoint = 'photos'
|
||||||
base_url = 'https://loc.gov'
|
base_url = 'https://www.loc.gov'
|
||||||
search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
|
search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -233,8 +233,7 @@ class Network:
|
||||||
del kwargs['raise_for_httperror']
|
del kwargs['raise_for_httperror']
|
||||||
return do_raise_for_httperror
|
return do_raise_for_httperror
|
||||||
|
|
||||||
@staticmethod
|
def patch_response(self, response, do_raise_for_httperror):
|
||||||
def patch_response(response, do_raise_for_httperror):
|
|
||||||
if isinstance(response, httpx.Response):
|
if isinstance(response, httpx.Response):
|
||||||
# requests compatibility (response is not streamed)
|
# requests compatibility (response is not streamed)
|
||||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||||
|
@ -242,8 +241,11 @@ class Network:
|
||||||
|
|
||||||
# raise an exception
|
# raise an exception
|
||||||
if do_raise_for_httperror:
|
if do_raise_for_httperror:
|
||||||
|
try:
|
||||||
raise_for_httperror(response)
|
raise_for_httperror(response)
|
||||||
|
except:
|
||||||
|
self._logger.warning(f"HTTP Request failed: {response.request.method} {response.request.url}")
|
||||||
|
raise
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def is_valid_response(self, response):
|
def is_valid_response(self, response):
|
||||||
|
@ -269,7 +271,7 @@ class Network:
|
||||||
else:
|
else:
|
||||||
response = await client.request(method, url, **kwargs)
|
response = await client.request(method, url, **kwargs)
|
||||||
if self.is_valid_response(response) or retries <= 0:
|
if self.is_valid_response(response) or retries <= 0:
|
||||||
return Network.patch_response(response, do_raise_for_httperror)
|
return self.patch_response(response, do_raise_for_httperror)
|
||||||
except httpx.RemoteProtocolError as e:
|
except httpx.RemoteProtocolError as e:
|
||||||
if not was_disconnected:
|
if not was_disconnected:
|
||||||
# the server has closed the connection:
|
# the server has closed the connection:
|
||||||
|
|
|
@ -137,9 +137,6 @@ class OnlineProcessor(EngineProcessor):
|
||||||
self.engine.request(query, params)
|
self.engine.request(query, params)
|
||||||
|
|
||||||
# ignoring empty urls
|
# ignoring empty urls
|
||||||
if params['url'] is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not params['url']:
|
if not params['url']:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -327,9 +327,32 @@ engines:
|
||||||
|
|
||||||
- name: adobe stock
|
- name: adobe stock
|
||||||
engine: adobe_stock
|
engine: adobe_stock
|
||||||
# available search orders: 'relevant', 'featured', 'creation', 'nb_downloads'
|
shortcut: asi
|
||||||
# adobe_order: relevance
|
categories: ["images"]
|
||||||
shortcut: as
|
# https://docs.searxng.org/dev/engines/online/adobe_stock.html
|
||||||
|
adobe_order: relevance
|
||||||
|
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
||||||
|
timeout: 6
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
- name: adobe stock video
|
||||||
|
engine: adobe_stock
|
||||||
|
shortcut: asv
|
||||||
|
network: adobe stock
|
||||||
|
categories: ["videos"]
|
||||||
|
adobe_order: relevance
|
||||||
|
adobe_content_types: ["video"]
|
||||||
|
timeout: 6
|
||||||
|
disabled: true
|
||||||
|
|
||||||
|
- name: adobe stock audio
|
||||||
|
engine: adobe_stock
|
||||||
|
shortcut: asa
|
||||||
|
network: adobe stock
|
||||||
|
categories: ["music"]
|
||||||
|
adobe_order: relevance
|
||||||
|
adobe_content_types: ["audio"]
|
||||||
|
timeout: 6
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
- name: alpine linux packages
|
- name: alpine linux packages
|
||||||
|
@ -1629,11 +1652,6 @@ engines:
|
||||||
api_site: 'askubuntu'
|
api_site: 'askubuntu'
|
||||||
categories: [it, q&a]
|
categories: [it, q&a]
|
||||||
|
|
||||||
- name: internetarchivescholar
|
|
||||||
engine: internet_archive_scholar
|
|
||||||
shortcut: ias
|
|
||||||
timeout: 15.0
|
|
||||||
|
|
||||||
- name: superuser
|
- name: superuser
|
||||||
engine: stackexchange
|
engine: stackexchange
|
||||||
shortcut: su
|
shortcut: su
|
||||||
|
|
Loading…
Reference in New Issue