mirror of https://github.com/searxng/searxng.git
Compare commits
2 Commits
a36aa54570
...
b7648fb6be
Author | SHA1 | Date |
---|---|---|
Bnyro | b7648fb6be | |
Bnyro | d5b0fb3d03 |
|
@ -1,13 +0,0 @@
|
||||||
.. _adobe stock engine:
|
|
||||||
|
|
||||||
===========
|
|
||||||
Adobe Stock
|
|
||||||
===========
|
|
||||||
|
|
||||||
.. contents:: Contents
|
|
||||||
:depth: 2
|
|
||||||
:local:
|
|
||||||
:backlinks: entry
|
|
||||||
|
|
||||||
.. automodule:: searx.engines.adobe_stock
|
|
||||||
:members:
|
|
|
@ -4,31 +4,26 @@ Welcome to SearXNG
|
||||||
|
|
||||||
*Search without being tracked.*
|
*Search without being tracked.*
|
||||||
|
|
||||||
.. jinja:: searx
|
SearXNG is a free internet metasearch engine which aggregates results from more
|
||||||
|
than 70 search services. Users are neither tracked nor profiled. Additionally,
|
||||||
SearXNG is a free internet metasearch engine which aggregates results from up
|
SearXNG can be used over Tor for online anonymity.
|
||||||
to {{engines | length}} :ref:`search services <configured engines>`. Users
|
|
||||||
are neither tracked nor profiled. Additionally, SearXNG can be used over Tor
|
|
||||||
for online anonymity.
|
|
||||||
|
|
||||||
Get started with SearXNG by using one of the instances listed at searx.space_.
|
Get started with SearXNG by using one of the instances listed at searx.space_.
|
||||||
If you don't trust anyone, you can set up your own, see :ref:`installation`.
|
If you don't trust anyone, you can set up your own, see :ref:`installation`.
|
||||||
|
|
||||||
.. jinja:: searx
|
.. sidebar:: features
|
||||||
|
|
||||||
.. sidebar:: features
|
- :ref:`self hosted <installation>`
|
||||||
|
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
|
||||||
- :ref:`self hosted <installation>`
|
- script & cookies are optional
|
||||||
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
|
- secure, encrypted connections
|
||||||
- script & cookies are optional
|
- :ref:`about 200 search engines <configured engines>`
|
||||||
- secure, encrypted connections
|
- `about 60 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
||||||
- :ref:`{{engines | length}} search engines <configured engines>`
|
- about 100 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
||||||
- `58 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
- :ref:`easy integration of search engines <demo online engine>`
|
||||||
- about 70 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
- professional development: `CI <https://github.com/searxng/searxng/actions>`_,
|
||||||
- :ref:`easy integration of search engines <demo online engine>`
|
`quality assurance <https://dev.searxng.org/>`_ &
|
||||||
- professional development: `CI <https://github.com/searxng/searxng/actions>`_,
|
`automated tested UI <https://dev.searxng.org/screenshots.html>`_
|
||||||
`quality assurance <https://dev.searxng.org/>`_ &
|
|
||||||
`automated tested UI <https://dev.searxng.org/screenshots.html>`_
|
|
||||||
|
|
||||||
.. sidebar:: be a part
|
.. sidebar:: be a part
|
||||||
|
|
||||||
|
|
|
@ -19,4 +19,3 @@ tomli==2.0.2; python_version < '3.11'
|
||||||
msgspec==0.18.6
|
msgspec==0.18.6
|
||||||
eval_type_backport; python_version < '3.9'
|
eval_type_backport; python_version < '3.9'
|
||||||
typer-slim==0.13.1
|
typer-slim==0.13.1
|
||||||
isodate==0.7.2
|
|
||||||
|
|
|
@ -1,229 +1,67 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
|
"""Adobe Stock (images)
|
||||||
assets. Assets types include photos, vectors, illustrations, templates, 3D
|
|
||||||
assets, videos, motion graphics templates and audio tracks.
|
|
||||||
|
|
||||||
.. Adobe Stock: https://stock.adobe.com/
|
|
||||||
|
|
||||||
Configuration
|
|
||||||
=============
|
|
||||||
|
|
||||||
The engine has the following mandatory setting:
|
|
||||||
|
|
||||||
- SearXNG's :ref:`engine categories`
|
|
||||||
- Adobe-Stock's :py:obj:`adobe_order`
|
|
||||||
- Adobe-Stock's :py:obj:`adobe_content_types`
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: adobe stock
|
|
||||||
engine: adobe_stock
|
|
||||||
shortcut: asi
|
|
||||||
categories: [images]
|
|
||||||
adobe_order: relevance
|
|
||||||
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
|
||||||
|
|
||||||
- name: adobe stock video
|
|
||||||
engine: adobe_stock
|
|
||||||
network: adobe stock
|
|
||||||
shortcut: asi
|
|
||||||
categories: [videos]
|
|
||||||
adobe_order: relevance
|
|
||||||
adobe_content_types: ["video"]
|
|
||||||
|
|
||||||
Implementation
|
|
||||||
==============
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
from searx.utils import gen_useragent
|
||||||
import isodate
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
"website": "https://stock.adobe.com/",
|
"website": 'https://stock.adobe.com/',
|
||||||
"wikidata_id": "Q5977430",
|
"wikidata_id": 'Q5977430',
|
||||||
"official_api_documentation": None,
|
"official_api_documentation": None,
|
||||||
"use_official_api": False,
|
"use_official_api": False,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": "JSON",
|
"results": 'JSON',
|
||||||
}
|
}
|
||||||
|
|
||||||
categories = []
|
categories = ['images']
|
||||||
paging = True
|
paging = True
|
||||||
send_accept_language_header = True
|
|
||||||
|
base_url = 'https://stock.adobe.com'
|
||||||
|
|
||||||
results_per_page = 10
|
results_per_page = 10
|
||||||
|
adobe_order = "relevance" # one of 'relevant', 'featured', 'creation' or 'nb_downloads'
|
||||||
base_url = "https://stock.adobe.com"
|
|
||||||
|
|
||||||
adobe_order: str = ""
|
|
||||||
"""Sort order, can be one of:
|
|
||||||
|
|
||||||
- ``relevance`` or
|
|
||||||
- ``featured`` or
|
|
||||||
- ``creation`` (most recent) or
|
|
||||||
- ``nb_downloads`` (number of downloads)
|
|
||||||
"""
|
|
||||||
|
|
||||||
ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
|
|
||||||
adobe_content_types: list = []
|
|
||||||
"""A list of of content types. The following content types are offered:
|
|
||||||
|
|
||||||
- Images: ``image``
|
|
||||||
- Videos: ``video``
|
|
||||||
- Templates: ``template``
|
|
||||||
- 3D: ``3d``
|
|
||||||
- Audio ``audio``
|
|
||||||
|
|
||||||
Additional subcategories:
|
|
||||||
|
|
||||||
- Photos: ``photo``
|
|
||||||
- Illustrations: ``illustration``
|
|
||||||
- Vectors: ``zip_vector`` (Vectors),
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Do we need support for "free_collection" and "include_stock_enterprise"?
|
|
||||||
|
|
||||||
|
|
||||||
def init(_):
|
|
||||||
if not categories:
|
|
||||||
raise ValueError("adobe_stock engine: categories is unset")
|
|
||||||
|
|
||||||
# adobe_order
|
|
||||||
if not adobe_order:
|
|
||||||
raise ValueError("adobe_stock engine: adobe_order is unset")
|
|
||||||
if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
|
|
||||||
raise ValueError(f"unsupported adobe_order: {adobe_order}")
|
|
||||||
|
|
||||||
# adobe_content_types
|
|
||||||
if not adobe_content_types:
|
|
||||||
raise ValueError("adobe_stock engine: adobe_content_types is unset")
|
|
||||||
|
|
||||||
if isinstance(adobe_content_types, list):
|
|
||||||
for t in adobe_content_types:
|
|
||||||
if t not in ADOBE_VALID_TYPES:
|
|
||||||
raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
|
||||||
args = {
|
args = {
|
||||||
"k": query,
|
'k': query,
|
||||||
"limit": results_per_page,
|
'limit': results_per_page,
|
||||||
"order": adobe_order,
|
'order': adobe_order,
|
||||||
"search_page": params["pageno"],
|
'search_page': params['pageno'],
|
||||||
"search_type": "pagination",
|
'search_type': 'pagination',
|
||||||
|
'filters[content_type:video]': 0,
|
||||||
|
'filters[content_type:audio]': 0,
|
||||||
}
|
}
|
||||||
|
params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
|
||||||
for content_type in ADOBE_VALID_TYPES:
|
|
||||||
args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
|
|
||||||
|
|
||||||
params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
|
|
||||||
|
|
||||||
# headers required to bypass bot-detection
|
# headers required to bypass bot-detection
|
||||||
if params["searxng_locale"] == "all":
|
params['headers'] = {
|
||||||
params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
|
"User-Agent": gen_useragent(),
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
}
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def parse_image_item(item):
|
|
||||||
return {
|
|
||||||
"template": "images.html",
|
|
||||||
"url": item["content_url"],
|
|
||||||
"title": item["title"],
|
|
||||||
"content": item["asset_type"],
|
|
||||||
"img_src": item["content_thumb_extra_large_url"],
|
|
||||||
"thumbnail_src": item["thumbnail_url"],
|
|
||||||
"resolution": f"{item['content_original_width']}x{item['content_original_height']}",
|
|
||||||
"img_format": item["format"],
|
|
||||||
"author": item["author"],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_video_item(item):
|
|
||||||
|
|
||||||
# in video items, the title is more or less a "content description", we try
|
|
||||||
# to reduce the lenght of the title ..
|
|
||||||
|
|
||||||
title = item["title"]
|
|
||||||
content = ""
|
|
||||||
if "." in title.strip()[:-1]:
|
|
||||||
content = title
|
|
||||||
title = title.split(".", 1)[0]
|
|
||||||
elif "," in title:
|
|
||||||
content = title
|
|
||||||
title = title.split(",", 1)[0]
|
|
||||||
elif len(title) > 50:
|
|
||||||
content = title
|
|
||||||
title = ""
|
|
||||||
for w in content.split(" "):
|
|
||||||
title += f" {w}"
|
|
||||||
if len(title) > 50:
|
|
||||||
title = title.strip() + "\u2026"
|
|
||||||
break
|
|
||||||
|
|
||||||
return {
|
|
||||||
"template": "videos.html",
|
|
||||||
"url": item["content_url"],
|
|
||||||
"title": title,
|
|
||||||
"content": content,
|
|
||||||
# https://en.wikipedia.org/wiki/ISO_8601#Durations
|
|
||||||
"length": isodate.parse_duration(item["time_duration"]),
|
|
||||||
"publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
|
|
||||||
"thumbnail": item["thumbnail_url"],
|
|
||||||
"iframe_src": item["video_small_preview_url"],
|
|
||||||
"metadata": item["asset_type"],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_audio_item(item):
|
|
||||||
audio_data = item["audio_data"]
|
|
||||||
content = audio_data.get("description") or ""
|
|
||||||
if audio_data.get("album"):
|
|
||||||
content = audio_data["album"] + " - " + content
|
|
||||||
|
|
||||||
return {
|
|
||||||
"url": item["content_url"],
|
|
||||||
"title": item["title"],
|
|
||||||
"content": content,
|
|
||||||
# "thumbnail": base_url + item["thumbnail_url"],
|
|
||||||
"iframe_src": audio_data["preview"]["url"],
|
|
||||||
"publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
|
|
||||||
"length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
|
|
||||||
"author": item.get("artist_name"),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
json_resp = resp.json()
|
json_resp = resp.json()
|
||||||
|
|
||||||
if isinstance(json_resp["items"], list):
|
for item in json_resp['items'].values():
|
||||||
return None
|
results.append(
|
||||||
for item in json_resp["items"].values():
|
{
|
||||||
if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
|
'template': 'images.html',
|
||||||
result = parse_image_item(item)
|
'url': item['content_url'],
|
||||||
elif item["asset_type"].lower() == "video":
|
'title': item['title'],
|
||||||
result = parse_video_item(item)
|
'content': '',
|
||||||
elif item["asset_type"].lower() == "audio":
|
'img_src': item['content_thumb_extra_large_url'],
|
||||||
result = parse_audio_item(item)
|
'thumbnail_src': item['thumbnail_url'],
|
||||||
else:
|
'resolution': f"{item['content_original_width']}x{item['content_original_height']}",
|
||||||
logger.error("no handle for %s --> %s", item["asset_type"], item)
|
'img_format': item['format'],
|
||||||
continue
|
'author': item['author'],
|
||||||
results.append(result)
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Internet Archive scholar(science)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
|
about = {
|
||||||
|
"website": "https://scholar.archive.org/",
|
||||||
|
"wikidata_id": "Q115667709",
|
||||||
|
"official_api_documentation": "https://scholar.archive.org/api/redoc",
|
||||||
|
"use_official_api": True,
|
||||||
|
"require_api_key": False,
|
||||||
|
"results": "JSON",
|
||||||
|
}
|
||||||
|
categories = ['science', 'scientific publications']
|
||||||
|
paging = True
|
||||||
|
|
||||||
|
base_url = "https://scholar.archive.org"
|
||||||
|
results_per_page = 15
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
args = {
|
||||||
|
"q": query,
|
||||||
|
"limit": results_per_page,
|
||||||
|
"offset": (params["pageno"] - 1) * results_per_page,
|
||||||
|
}
|
||||||
|
params["url"] = f"{base_url}/search?{urlencode(args)}"
|
||||||
|
params["headers"]["Accept"] = "application/json"
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
json = resp.json()
|
||||||
|
|
||||||
|
for result in json["results"]:
|
||||||
|
publishedDate, content, doi = None, '', None
|
||||||
|
|
||||||
|
if result['biblio'].get('release_date'):
|
||||||
|
publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")
|
||||||
|
|
||||||
|
if len(result['abstracts']) > 0:
|
||||||
|
content = result['abstracts'][0].get('body')
|
||||||
|
elif len(result['_highlights']) > 0:
|
||||||
|
content = result['_highlights'][0]
|
||||||
|
|
||||||
|
if len(result['releases']) > 0:
|
||||||
|
doi = result['releases'][0].get('doi')
|
||||||
|
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
'template': 'paper.html',
|
||||||
|
'url': result['fulltext']['access_url'],
|
||||||
|
'title': result['biblio'].get('title') or result['biblio'].get('container_name'),
|
||||||
|
'content': html_to_text(content),
|
||||||
|
'publisher': result['biblio'].get('publisher'),
|
||||||
|
'doi': doi,
|
||||||
|
'journal': result['biblio'].get('container_name'),
|
||||||
|
'authors': result['biblio'].get('contrib_names'),
|
||||||
|
'tags': result['tags'],
|
||||||
|
'publishedDate': publishedDate,
|
||||||
|
'issns': result['biblio'].get('issns'),
|
||||||
|
'pdf_url': result['fulltext'].get('access_url'),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
|
@ -27,7 +27,7 @@ categories = ['images']
|
||||||
paging = True
|
paging = True
|
||||||
|
|
||||||
endpoint = 'photos'
|
endpoint = 'photos'
|
||||||
base_url = 'https://www.loc.gov'
|
base_url = 'https://loc.gov'
|
||||||
search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
|
search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -233,7 +233,8 @@ class Network:
|
||||||
del kwargs['raise_for_httperror']
|
del kwargs['raise_for_httperror']
|
||||||
return do_raise_for_httperror
|
return do_raise_for_httperror
|
||||||
|
|
||||||
def patch_response(self, response, do_raise_for_httperror):
|
@staticmethod
|
||||||
|
def patch_response(response, do_raise_for_httperror):
|
||||||
if isinstance(response, httpx.Response):
|
if isinstance(response, httpx.Response):
|
||||||
# requests compatibility (response is not streamed)
|
# requests compatibility (response is not streamed)
|
||||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||||
|
@ -241,11 +242,8 @@ class Network:
|
||||||
|
|
||||||
# raise an exception
|
# raise an exception
|
||||||
if do_raise_for_httperror:
|
if do_raise_for_httperror:
|
||||||
try:
|
raise_for_httperror(response)
|
||||||
raise_for_httperror(response)
|
|
||||||
except:
|
|
||||||
self._logger.warning(f"HTTP Request failed: {response.request.method} {response.request.url}")
|
|
||||||
raise
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def is_valid_response(self, response):
|
def is_valid_response(self, response):
|
||||||
|
@ -271,7 +269,7 @@ class Network:
|
||||||
else:
|
else:
|
||||||
response = await client.request(method, url, **kwargs)
|
response = await client.request(method, url, **kwargs)
|
||||||
if self.is_valid_response(response) or retries <= 0:
|
if self.is_valid_response(response) or retries <= 0:
|
||||||
return self.patch_response(response, do_raise_for_httperror)
|
return Network.patch_response(response, do_raise_for_httperror)
|
||||||
except httpx.RemoteProtocolError as e:
|
except httpx.RemoteProtocolError as e:
|
||||||
if not was_disconnected:
|
if not was_disconnected:
|
||||||
# the server has closed the connection:
|
# the server has closed the connection:
|
||||||
|
|
|
@ -137,6 +137,9 @@ class OnlineProcessor(EngineProcessor):
|
||||||
self.engine.request(query, params)
|
self.engine.request(query, params)
|
||||||
|
|
||||||
# ignoring empty urls
|
# ignoring empty urls
|
||||||
|
if params['url'] is None:
|
||||||
|
return None
|
||||||
|
|
||||||
if not params['url']:
|
if not params['url']:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -327,32 +327,9 @@ engines:
|
||||||
|
|
||||||
- name: adobe stock
|
- name: adobe stock
|
||||||
engine: adobe_stock
|
engine: adobe_stock
|
||||||
shortcut: asi
|
# available search orders: 'relevant', 'featured', 'creation', 'nb_downloads'
|
||||||
categories: ["images"]
|
# adobe_order: relevance
|
||||||
# https://docs.searxng.org/dev/engines/online/adobe_stock.html
|
shortcut: as
|
||||||
adobe_order: relevance
|
|
||||||
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
|
||||||
timeout: 6
|
|
||||||
disabled: true
|
|
||||||
|
|
||||||
- name: adobe stock video
|
|
||||||
engine: adobe_stock
|
|
||||||
shortcut: asv
|
|
||||||
network: adobe stock
|
|
||||||
categories: ["videos"]
|
|
||||||
adobe_order: relevance
|
|
||||||
adobe_content_types: ["video"]
|
|
||||||
timeout: 6
|
|
||||||
disabled: true
|
|
||||||
|
|
||||||
- name: adobe stock audio
|
|
||||||
engine: adobe_stock
|
|
||||||
shortcut: asa
|
|
||||||
network: adobe stock
|
|
||||||
categories: ["music"]
|
|
||||||
adobe_order: relevance
|
|
||||||
adobe_content_types: ["audio"]
|
|
||||||
timeout: 6
|
|
||||||
disabled: true
|
disabled: true
|
||||||
|
|
||||||
- name: alpine linux packages
|
- name: alpine linux packages
|
||||||
|
@ -1652,6 +1629,11 @@ engines:
|
||||||
api_site: 'askubuntu'
|
api_site: 'askubuntu'
|
||||||
categories: [it, q&a]
|
categories: [it, q&a]
|
||||||
|
|
||||||
|
- name: internetarchivescholar
|
||||||
|
engine: internet_archive_scholar
|
||||||
|
shortcut: ias
|
||||||
|
timeout: 15.0
|
||||||
|
|
||||||
- name: superuser
|
- name: superuser
|
||||||
engine: stackexchange
|
engine: stackexchange
|
||||||
shortcut: su
|
shortcut: su
|
||||||
|
|
Loading…
Reference in New Issue