mirror of https://github.com/searxng/searxng.git
Compare commits
7 Commits
b7648fb6be
...
a36aa54570
Author | SHA1 | Date |
---|---|---|
Bnyro | a36aa54570 | |
Markus Heiser | 02ebea58fb | |
Bnyro | 14b7ef3c01 | |
Markus Heiser | 0f9694c90b | |
Markus Heiser | ccc4f30b20 | |
Markus Heiser | c4b874e9b0 | |
Markus Heiser | 7c4e4ebd40 |
|
@ -0,0 +1,13 @@
|
|||
.. _adobe stock engine:
|
||||
|
||||
===========
|
||||
Adobe Stock
|
||||
===========
|
||||
|
||||
.. contents:: Contents
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
.. automodule:: searx.engines.adobe_stock
|
||||
:members:
|
|
@ -4,22 +4,27 @@ Welcome to SearXNG
|
|||
|
||||
*Search without being tracked.*
|
||||
|
||||
SearXNG is a free internet metasearch engine which aggregates results from more
|
||||
than 70 search services. Users are neither tracked nor profiled. Additionally,
|
||||
SearXNG can be used over Tor for online anonymity.
|
||||
.. jinja:: searx
|
||||
|
||||
SearXNG is a free internet metasearch engine which aggregates results from up
|
||||
to {{engines | length}} :ref:`search services <configured engines>`. Users
|
||||
are neither tracked nor profiled. Additionally, SearXNG can be used over Tor
|
||||
for online anonymity.
|
||||
|
||||
Get started with SearXNG by using one of the instances listed at searx.space_.
|
||||
If you don't trust anyone, you can set up your own, see :ref:`installation`.
|
||||
|
||||
.. jinja:: searx
|
||||
|
||||
.. sidebar:: features
|
||||
|
||||
- :ref:`self hosted <installation>`
|
||||
- :ref:`no user tracking / no profiling <SearXNG protect privacy>`
|
||||
- script & cookies are optional
|
||||
- secure, encrypted connections
|
||||
- :ref:`about 200 search engines <configured engines>`
|
||||
- `about 60 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
||||
- about 100 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
||||
- :ref:`{{engines | length}} search engines <configured engines>`
|
||||
- `58 translations <https://translate.codeberg.org/projects/searxng/searxng/>`_
|
||||
- about 70 `well maintained <https://uptime.searxng.org/>`__ instances on searx.space_
|
||||
- :ref:`easy integration of search engines <demo online engine>`
|
||||
- professional development: `CI <https://github.com/searxng/searxng/actions>`_,
|
||||
`quality assurance <https://dev.searxng.org/>`_ &
|
||||
|
|
|
@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
|
|||
msgspec==0.18.6
|
||||
eval_type_backport; python_version < '3.9'
|
||||
typer-slim==0.13.1
|
||||
isodate==0.7.2
|
||||
|
|
|
@ -1,67 +1,229 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Adobe Stock (images)
|
||||
"""
|
||||
"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
|
||||
assets. Assets types include photos, vectors, illustrations, templates, 3D
|
||||
assets, videos, motion graphics templates and audio tracks.
|
||||
|
||||
.. Adobe Stock: https://stock.adobe.com/
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following mandatory setting:
|
||||
|
||||
- SearXNG's :ref:`engine categories`
|
||||
- Adobe-Stock's :py:obj:`adobe_order`
|
||||
- Adobe-Stock's :py:obj:`adobe_content_types`
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: adobe stock
|
||||
engine: adobe_stock
|
||||
shortcut: asi
|
||||
categories: [images]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
||||
|
||||
- name: adobe stock video
|
||||
engine: adobe_stock
|
||||
network: adobe stock
|
||||
shortcut: asi
|
||||
categories: [videos]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["video"]
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime, timedelta
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import gen_useragent
|
||||
|
||||
import isodate
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger: logging.Logger
|
||||
|
||||
about = {
|
||||
"website": 'https://stock.adobe.com/',
|
||||
"wikidata_id": 'Q5977430',
|
||||
"website": "https://stock.adobe.com/",
|
||||
"wikidata_id": "Q5977430",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = ['images']
|
||||
categories = []
|
||||
paging = True
|
||||
|
||||
base_url = 'https://stock.adobe.com'
|
||||
|
||||
send_accept_language_header = True
|
||||
results_per_page = 10
|
||||
adobe_order = "relevance" # one of 'relevant', 'featured', 'creation' or 'nb_downloads'
|
||||
|
||||
base_url = "https://stock.adobe.com"
|
||||
|
||||
adobe_order: str = ""
|
||||
"""Sort order, can be one of:
|
||||
|
||||
- ``relevance`` or
|
||||
- ``featured`` or
|
||||
- ``creation`` (most recent) or
|
||||
- ``nb_downloads`` (number of downloads)
|
||||
"""
|
||||
|
||||
ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
|
||||
adobe_content_types: list = []
|
||||
"""A list of of content types. The following content types are offered:
|
||||
|
||||
- Images: ``image``
|
||||
- Videos: ``video``
|
||||
- Templates: ``template``
|
||||
- 3D: ``3d``
|
||||
- Audio ``audio``
|
||||
|
||||
Additional subcategories:
|
||||
|
||||
- Photos: ``photo``
|
||||
- Illustrations: ``illustration``
|
||||
- Vectors: ``zip_vector`` (Vectors),
|
||||
"""
|
||||
|
||||
# Do we need support for "free_collection" and "include_stock_enterprise"?
|
||||
|
||||
|
||||
def init(_):
|
||||
if not categories:
|
||||
raise ValueError("adobe_stock engine: categories is unset")
|
||||
|
||||
# adobe_order
|
||||
if not adobe_order:
|
||||
raise ValueError("adobe_stock engine: adobe_order is unset")
|
||||
if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
|
||||
raise ValueError(f"unsupported adobe_order: {adobe_order}")
|
||||
|
||||
# adobe_content_types
|
||||
if not adobe_content_types:
|
||||
raise ValueError("adobe_stock engine: adobe_content_types is unset")
|
||||
|
||||
if isinstance(adobe_content_types, list):
|
||||
for t in adobe_content_types:
|
||||
if t not in ADOBE_VALID_TYPES:
|
||||
raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
|
||||
else:
|
||||
raise ValueError(
|
||||
"adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
|
||||
)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
args = {
|
||||
'k': query,
|
||||
'limit': results_per_page,
|
||||
'order': adobe_order,
|
||||
'search_page': params['pageno'],
|
||||
'search_type': 'pagination',
|
||||
'filters[content_type:video]': 0,
|
||||
'filters[content_type:audio]': 0,
|
||||
"k": query,
|
||||
"limit": results_per_page,
|
||||
"order": adobe_order,
|
||||
"search_page": params["pageno"],
|
||||
"search_type": "pagination",
|
||||
}
|
||||
params['url'] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
|
||||
|
||||
for content_type in ADOBE_VALID_TYPES:
|
||||
args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
|
||||
|
||||
params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
|
||||
|
||||
# headers required to bypass bot-detection
|
||||
params['headers'] = {
|
||||
"User-Agent": gen_useragent(),
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
}
|
||||
if params["searxng_locale"] == "all":
|
||||
params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def parse_image_item(item):
|
||||
return {
|
||||
"template": "images.html",
|
||||
"url": item["content_url"],
|
||||
"title": item["title"],
|
||||
"content": item["asset_type"],
|
||||
"img_src": item["content_thumb_extra_large_url"],
|
||||
"thumbnail_src": item["thumbnail_url"],
|
||||
"resolution": f"{item['content_original_width']}x{item['content_original_height']}",
|
||||
"img_format": item["format"],
|
||||
"author": item["author"],
|
||||
}
|
||||
|
||||
|
||||
def parse_video_item(item):
|
||||
|
||||
# in video items, the title is more or less a "content description", we try
|
||||
# to reduce the lenght of the title ..
|
||||
|
||||
title = item["title"]
|
||||
content = ""
|
||||
if "." in title.strip()[:-1]:
|
||||
content = title
|
||||
title = title.split(".", 1)[0]
|
||||
elif "," in title:
|
||||
content = title
|
||||
title = title.split(",", 1)[0]
|
||||
elif len(title) > 50:
|
||||
content = title
|
||||
title = ""
|
||||
for w in content.split(" "):
|
||||
title += f" {w}"
|
||||
if len(title) > 50:
|
||||
title = title.strip() + "\u2026"
|
||||
break
|
||||
|
||||
return {
|
||||
"template": "videos.html",
|
||||
"url": item["content_url"],
|
||||
"title": title,
|
||||
"content": content,
|
||||
# https://en.wikipedia.org/wiki/ISO_8601#Durations
|
||||
"length": isodate.parse_duration(item["time_duration"]),
|
||||
"publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
|
||||
"thumbnail": item["thumbnail_url"],
|
||||
"iframe_src": item["video_small_preview_url"],
|
||||
"metadata": item["asset_type"],
|
||||
}
|
||||
|
||||
|
||||
def parse_audio_item(item):
|
||||
audio_data = item["audio_data"]
|
||||
content = audio_data.get("description") or ""
|
||||
if audio_data.get("album"):
|
||||
content = audio_data["album"] + " - " + content
|
||||
|
||||
return {
|
||||
"url": item["content_url"],
|
||||
"title": item["title"],
|
||||
"content": content,
|
||||
# "thumbnail": base_url + item["thumbnail_url"],
|
||||
"iframe_src": audio_data["preview"]["url"],
|
||||
"publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
|
||||
"length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
|
||||
"author": item.get("artist_name"),
|
||||
}
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json_resp = resp.json()
|
||||
|
||||
for item in json_resp['items'].values():
|
||||
results.append(
|
||||
{
|
||||
'template': 'images.html',
|
||||
'url': item['content_url'],
|
||||
'title': item['title'],
|
||||
'content': '',
|
||||
'img_src': item['content_thumb_extra_large_url'],
|
||||
'thumbnail_src': item['thumbnail_url'],
|
||||
'resolution': f"{item['content_original_width']}x{item['content_original_height']}",
|
||||
'img_format': item['format'],
|
||||
'author': item['author'],
|
||||
}
|
||||
)
|
||||
if isinstance(json_resp["items"], list):
|
||||
return None
|
||||
for item in json_resp["items"].values():
|
||||
if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
|
||||
result = parse_image_item(item)
|
||||
elif item["asset_type"].lower() == "video":
|
||||
result = parse_video_item(item)
|
||||
elif item["asset_type"].lower() == "audio":
|
||||
result = parse_audio_item(item)
|
||||
else:
|
||||
logger.error("no handle for %s --> %s", item["asset_type"], item)
|
||||
continue
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Internet Archive scholar(science)
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
about = {
|
||||
"website": "https://scholar.archive.org/",
|
||||
"wikidata_id": "Q115667709",
|
||||
"official_api_documentation": "https://scholar.archive.org/api/redoc",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
|
||||
base_url = "https://scholar.archive.org"
|
||||
results_per_page = 15
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = {
|
||||
"q": query,
|
||||
"limit": results_per_page,
|
||||
"offset": (params["pageno"] - 1) * results_per_page,
|
||||
}
|
||||
params["url"] = f"{base_url}/search?{urlencode(args)}"
|
||||
params["headers"]["Accept"] = "application/json"
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
json = resp.json()
|
||||
|
||||
for result in json["results"]:
|
||||
publishedDate, content, doi = None, '', None
|
||||
|
||||
if result['biblio'].get('release_date'):
|
||||
publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")
|
||||
|
||||
if len(result['abstracts']) > 0:
|
||||
content = result['abstracts'][0].get('body')
|
||||
elif len(result['_highlights']) > 0:
|
||||
content = result['_highlights'][0]
|
||||
|
||||
if len(result['releases']) > 0:
|
||||
doi = result['releases'][0].get('doi')
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': result['fulltext']['access_url'],
|
||||
'title': result['biblio'].get('title') or result['biblio'].get('container_name'),
|
||||
'content': html_to_text(content),
|
||||
'publisher': result['biblio'].get('publisher'),
|
||||
'doi': doi,
|
||||
'journal': result['biblio'].get('container_name'),
|
||||
'authors': result['biblio'].get('contrib_names'),
|
||||
'tags': result['tags'],
|
||||
'publishedDate': publishedDate,
|
||||
'issns': result['biblio'].get('issns'),
|
||||
'pdf_url': result['fulltext'].get('access_url'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
|
@ -27,7 +27,7 @@ categories = ['images']
|
|||
paging = True
|
||||
|
||||
endpoint = 'photos'
|
||||
base_url = 'https://loc.gov'
|
||||
base_url = 'https://www.loc.gov'
|
||||
search_string = "/{endpoint}/?sp={page}&{query}&fo=json"
|
||||
|
||||
|
||||
|
|
|
@ -233,8 +233,7 @@ class Network:
|
|||
del kwargs['raise_for_httperror']
|
||||
return do_raise_for_httperror
|
||||
|
||||
@staticmethod
|
||||
def patch_response(response, do_raise_for_httperror):
|
||||
def patch_response(self, response, do_raise_for_httperror):
|
||||
if isinstance(response, httpx.Response):
|
||||
# requests compatibility (response is not streamed)
|
||||
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
|
||||
|
@ -242,8 +241,11 @@ class Network:
|
|||
|
||||
# raise an exception
|
||||
if do_raise_for_httperror:
|
||||
try:
|
||||
raise_for_httperror(response)
|
||||
|
||||
except:
|
||||
self._logger.warning(f"HTTP Request failed: {response.request.method} {response.request.url}")
|
||||
raise
|
||||
return response
|
||||
|
||||
def is_valid_response(self, response):
|
||||
|
@ -269,7 +271,7 @@ class Network:
|
|||
else:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
if self.is_valid_response(response) or retries <= 0:
|
||||
return Network.patch_response(response, do_raise_for_httperror)
|
||||
return self.patch_response(response, do_raise_for_httperror)
|
||||
except httpx.RemoteProtocolError as e:
|
||||
if not was_disconnected:
|
||||
# the server has closed the connection:
|
||||
|
|
|
@ -137,9 +137,6 @@ class OnlineProcessor(EngineProcessor):
|
|||
self.engine.request(query, params)
|
||||
|
||||
# ignoring empty urls
|
||||
if params['url'] is None:
|
||||
return None
|
||||
|
||||
if not params['url']:
|
||||
return None
|
||||
|
||||
|
|
|
@ -327,9 +327,32 @@ engines:
|
|||
|
||||
- name: adobe stock
|
||||
engine: adobe_stock
|
||||
# available search orders: 'relevant', 'featured', 'creation', 'nb_downloads'
|
||||
# adobe_order: relevance
|
||||
shortcut: as
|
||||
shortcut: asi
|
||||
categories: ["images"]
|
||||
# https://docs.searxng.org/dev/engines/online/adobe_stock.html
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
|
||||
timeout: 6
|
||||
disabled: true
|
||||
|
||||
- name: adobe stock video
|
||||
engine: adobe_stock
|
||||
shortcut: asv
|
||||
network: adobe stock
|
||||
categories: ["videos"]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["video"]
|
||||
timeout: 6
|
||||
disabled: true
|
||||
|
||||
- name: adobe stock audio
|
||||
engine: adobe_stock
|
||||
shortcut: asa
|
||||
network: adobe stock
|
||||
categories: ["music"]
|
||||
adobe_order: relevance
|
||||
adobe_content_types: ["audio"]
|
||||
timeout: 6
|
||||
disabled: true
|
||||
|
||||
- name: alpine linux packages
|
||||
|
@ -1629,11 +1652,6 @@ engines:
|
|||
api_site: 'askubuntu'
|
||||
categories: [it, q&a]
|
||||
|
||||
- name: internetarchivescholar
|
||||
engine: internet_archive_scholar
|
||||
shortcut: ias
|
||||
timeout: 15.0
|
||||
|
||||
- name: superuser
|
||||
engine: stackexchange
|
||||
shortcut: su
|
||||
|
|
Loading…
Reference in New Issue