Compare commits

...

4 Commits

Author SHA1 Message Date
Bnyro 853ecc9719
Merge 7edd75ff93 into 0253c10b52 2024-11-24 12:38:56 +01:00
Markus Heiser 0253c10b52 [feat] engine: add adobe stock video and audio engines
The engine has been revised; there is now the option ``adobe_content_types``
with which it is possible to configure engines for video and audio from the
adobe stock.  BTW this patch adds documentation to the engine.

To test all three engines in one use a search term like::

    !asi !asv !asa sound

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-24 11:56:12 +01:00
Bnyro f20a7632f1 [feat] engine: add adobe stock photos 2024-11-24 11:56:12 +01:00
Bnyro 7edd75ff93 [refactor] translation engines: common interface 2024-10-17 17:17:40 +02:00
12 changed files with 398 additions and 78 deletions

View File

@ -0,0 +1,13 @@
.. _adobe stock engine:
===========
Adobe Stock
===========
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.engines.adobe_stock
:members:

View File

@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
msgspec==0.18.6
eval_type_backport; python_version < '3.9'
typer-slim==0.13.1
isodate==0.7.2

View File

@ -0,0 +1,229 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
assets. Assets types include photos, vectors, illustrations, templates, 3D
assets, videos, motion graphics templates and audio tracks.
.. Adobe Stock: https://stock.adobe.com/
Configuration
=============
The engine has the following mandatory setting:
- SearXNG's :ref:`engine categories`
- Adobe-Stock's :py:obj:`adobe_order`
- Adobe-Stock's :py:obj:`adobe_content_types`
.. code:: yaml
- name: adobe stock
engine: adobe_stock
shortcut: asi
categories: [images]
adobe_order: relevance
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
- name: adobe stock video
engine: adobe_stock
network: adobe stock
shortcut: asi
categories: [videos]
adobe_order: relevance
adobe_content_types: ["video"]
Implementation
==============
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from datetime import datetime, timedelta
from urllib.parse import urlencode
import isodate
if TYPE_CHECKING:
import logging
logger: logging.Logger
about = {
"website": "https://stock.adobe.com/",
"wikidata_id": "Q5977430",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
categories = []
paging = True
send_accept_language_header = True
results_per_page = 10
base_url = "https://stock.adobe.com"
adobe_order: str = ""
"""Sort order, can be one of:
- ``relevance`` or
- ``featured`` or
- ``creation`` (most recent) or
- ``nb_downloads`` (number of downloads)
"""
ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
adobe_content_types: list = []
"""A list of of content types. The following content types are offered:
- Images: ``image``
- Videos: ``video``
- Templates: ``template``
- 3D: ``3d``
- Audio ``audio``
Additional subcategories:
- Photos: ``photo``
- Illustrations: ``illustration``
- Vectors: ``zip_vector`` (Vectors),
"""
# Do we need support for "free_collection" and "include_stock_enterprise"?
def init(_):
if not categories:
raise ValueError("adobe_stock engine: categories is unset")
# adobe_order
if not adobe_order:
raise ValueError("adobe_stock engine: adobe_order is unset")
if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
raise ValueError(f"unsupported adobe_order: {adobe_order}")
# adobe_content_types
if not adobe_content_types:
raise ValueError("adobe_stock engine: adobe_content_types is unset")
if isinstance(adobe_content_types, list):
for t in adobe_content_types:
if t not in ADOBE_VALID_TYPES:
raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
else:
raise ValueError(
"adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
)
def request(query, params):
args = {
"k": query,
"limit": results_per_page,
"order": adobe_order,
"search_page": params["pageno"],
"search_type": "pagination",
}
for content_type in ADOBE_VALID_TYPES:
args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
# headers required to bypass bot-detection
if params["searxng_locale"] == "all":
params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
return params
def parse_image_item(item):
return {
"template": "images.html",
"url": item["content_url"],
"title": item["title"],
"content": item["asset_type"],
"img_src": item["content_thumb_extra_large_url"],
"thumbnail_src": item["thumbnail_url"],
"resolution": f"{item['content_original_width']}x{item['content_original_height']}",
"img_format": item["format"],
"author": item["author"],
}
def parse_video_item(item):
# in video items, the title is more or less a "content description", we try
# to reduce the lenght of the title ..
title = item["title"]
content = ""
if "." in title.strip()[:-1]:
content = title
title = title.split(".", 1)[0]
elif "," in title:
content = title
title = title.split(",", 1)[0]
elif len(title) > 50:
content = title
title = ""
for w in content.split(" "):
title += f" {w}"
if len(title) > 50:
title = title.strip() + "\u2026"
break
return {
"template": "videos.html",
"url": item["content_url"],
"title": title,
"content": content,
# https://en.wikipedia.org/wiki/ISO_8601#Durations
"length": isodate.parse_duration(item["time_duration"]),
"publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
"thumbnail": item["thumbnail_url"],
"iframe_src": item["video_small_preview_url"],
"metadata": item["asset_type"],
}
def parse_audio_item(item):
audio_data = item["audio_data"]
content = audio_data.get("description") or ""
if audio_data.get("album"):
content = audio_data["album"] + " - " + content
return {
"url": item["content_url"],
"title": item["title"],
"content": content,
# "thumbnail": base_url + item["thumbnail_url"],
"iframe_src": audio_data["preview"]["url"],
"publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
"length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
"author": item.get("artist_name"),
}
def response(resp):
results = []
json_resp = resp.json()
if isinstance(json_resp["items"], list):
return None
for item in json_resp["items"].values():
if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
result = parse_image_item(item)
elif item["asset_type"].lower() == "video":
result = parse_video_item(item)
elif item["asset_type"].lower() == "audio":
result = parse_audio_item(item)
else:
logger.error("no handle for %s --> %s", item["asset_type"], item)
continue
results.append(result)
return results

View File

@ -1,8 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Deepl translation engine"""
from json import loads
about = {
"website": 'https://deepl.com',
"wikidata_id": 'Q43968444',
@ -41,16 +39,14 @@ def request(_query, params):
def response(resp):
results = []
result = loads(resp.text)
translations = result['translations']
infobox = "<dl>"
result = resp.json()
for translation in translations:
infobox += f"<dd>{translation['text']}</dd>"
if not result.get('translations'):
return results
infobox += "</dl>"
translations = [{'text': translation['text']} for translation in result['translations']]
results.append({'answer': infobox})
results.append({'answer': translations[0]['text'], 'answer_type': 'translations', 'translations': translations})
return results

View File

@ -3,7 +3,6 @@
Dictzone
"""
from urllib.parse import urljoin
from lxml import html
from searx.utils import eval_xpath
@ -33,11 +32,10 @@ def request(query, params): # pylint: disable=unused-argument
def response(resp):
results = []
dom = html.fromstring(resp.text)
for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
translations = []
for result in eval_xpath(dom, results_xpath)[1:]:
try:
from_result, to_results_raw = eval_xpath(result, './td')
except: # pylint: disable=bare-except
@ -49,12 +47,17 @@ def response(resp):
if t.strip():
to_results.append(to_result.text_content())
results.append(
translations.append(
{
'url': urljoin(str(resp.url), '?%d' % k),
'title': from_result.text_content(),
'content': '; '.join(to_results),
'text': f"{from_result.text_content()} - {'; '.join(to_results)}",
}
)
return results
if translations:
result = {
'answer': translations[0]['text'],
'translations': translations,
'answer_type': 'translations',
}
return [result]

View File

@ -24,7 +24,7 @@ def request(_query, params):
request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
params['url'] = f"{request_url}/translate"
args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query']}
args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query'], 'alternatives': 3}
if api_key:
args['api_key'] = api_key
params['data'] = dumps(args)
@ -42,12 +42,11 @@ def response(resp):
json_resp = resp.json()
text = json_resp.get('translatedText')
from_lang = resp.search_params["from_lang"][1]
to_lang = resp.search_params["to_lang"][1]
query = resp.search_params["query"]
req_url = resp.search_params["req_url"]
if not text:
return results
if text:
results.append({"answer": text, "url": f"{req_url}/?source={from_lang}&target={to_lang}&q={query}"})
translations = [{'text': text}] + [{'text': alternative} for alternative in json_resp.get('alternatives', [])]
results.append({'answer': text, 'answer_type': 'translations', 'translations': translations})
return results

View File

@ -1,8 +1,6 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Lingva (alternative Google Translate frontend)"""
from json import loads
about = {
"website": 'https://lingva.ml',
"wikidata_id": None,
@ -29,7 +27,7 @@ def request(_query, params):
def response(resp):
results = []
result = loads(resp.text)
result = resp.json()
info = result["info"]
from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])
@ -38,27 +36,40 @@ def response(resp):
if 'definitions' in info: # pylint: disable=too-many-nested-blocks
for definition in info['definitions']:
if 'list' in definition:
for item in definition['list']:
if 'synonyms' in item:
for synonym in item['synonyms']:
results.append({"suggestion": from_to_prefix + synonym})
for item in definition.get('list', []):
for synonym in item.get('synonyms', []):
results.append({"suggestion": from_to_prefix + synonym})
infobox = ""
data = []
for definition in info['definitions']:
for translation in definition['list']:
data.append(
{
'text': result['translation'],
'definitions': [translation['definition']] if translation['definition'] else [],
'examples': [translation['example']] if translation['example'] else [],
'synonyms': translation['synonyms'],
}
)
for translation in info["extraTranslations"]:
for word in translation["list"]:
infobox += f"<dl><dt>{word['word']}</dt>"
data.append(
{
'text': word['word'],
'definitions': word['meanings'],
}
)
for meaning in word["meanings"]:
infobox += f"<dd>{meaning}</dd>"
infobox += "</dl>"
if not data and result['translation']:
data.append({'text': result['translation']})
results.append(
{
'infobox': result["translation"],
'content': infobox,
'answer': data[0]['text'],
'answer_type': 'translations',
'translations': data,
}
)

View File

@ -4,7 +4,6 @@
import random
import re
from urllib.parse import urlencode
from flask_babel import gettext
about = {
"website": 'https://codeberg.org/aryak/mozhi',
@ -35,30 +34,27 @@ def request(_query, params):
def response(resp):
translation = resp.json()
infobox = ""
data = {'text': translation['translated-text'], 'definitions': [], 'examples': []}
if translation['target_transliteration'] and not re.match(
re_transliteration_unsupported, translation['target_transliteration']
):
infobox = f"<b>{translation['target_transliteration']}</b>"
data['transliteration'] = translation['target_transliteration']
if translation['word_choices']:
for word in translation['word_choices']:
infobox += f"<dl><dt>{word['word']}: {word['definition']}</dt>"
if word.get('definition'):
data['definitions'].append(word['definition'])
if word['examples_target']:
for example in word['examples_target']:
infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
for example in word.get('examples_target', []):
data['examples'].append(re.sub(r"<|>", "", example).lstrip('- '))
infobox += "</dl>"
if translation['source_synonyms']:
infobox += f"<dl><dt>{gettext('Synonyms')}: {', '.join(translation['source_synonyms'])}</dt></dl>"
data['synonyms'] = translation.get('source_synonyms', [])
result = {
'infobox': translation['translated-text'],
'content': infobox,
'answer': translation['translated-text'],
'answer_type': 'translations',
'translations': [data],
}
return [result]

View File

@ -35,18 +35,16 @@ def request(query, params): # pylint: disable=unused-argument
def response(resp):
results = []
results.append(
{
'url': web_url.format(
from_lang=resp.search_params['from_lang'][2],
to_lang=resp.search_params['to_lang'][2],
query=resp.search_params['query'],
),
'title': '[{0}-{1}] {2}'.format(
resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query']
),
'content': resp.json()['responseData']['translatedText'],
}
)
return results
json_resp = resp.json()
text = json_resp['responseData']['translatedText']
alternatives = [match['translation'] for match in json_resp['matches'] if match['translation'] != text]
translations = [{'text': translation} for translation in [text] + alternatives]
result = {
'answer': translations[0]['text'],
'answer_type': 'translations',
'translations': translations,
}
return [result]

View File

@ -325,6 +325,36 @@ engines:
shortcut: 9g
disabled: true
- name: adobe stock
engine: adobe_stock
shortcut: asi
categories: ["images"]
# https://docs.searxng.org/dev/engines/online/adobe_stock.html
adobe_order: relevance
adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
timeout: 6
disabled: true
- name: adobe stock video
engine: adobe_stock
shortcut: asv
network: adobe stock
categories: ["videos"]
adobe_order: relevance
adobe_content_types: ["video"]
timeout: 6
disabled: true
- name: adobe stock audio
engine: adobe_stock
shortcut: asa
network: adobe stock
categories: ["music"]
adobe_order: relevance
adobe_content_types: ["audio"]
timeout: 6
disabled: true
- name: alpine linux packages
engine: alpinelinux
disabled: true

View File

@ -0,0 +1,38 @@
<div class="answer-translations">
{% for translation in translations %}
{% if loop.index > 1 %}
<hr />
{% endif %}
<h3>{{ translation.text }}</h3>
{% if translation.transliteration %}
<b>translation.transliteration</b>
{% endif %} {% if translation.definitions %}
<dl>
<dt>{{ _('Definitions') }}</dt>
<ul>
{% for definition in translation.definitions %}
<li>{{ definition }}</li>
{% endfor %}
<ul>
</dl>
{% endif %} {% if translation.examples %}
<dl>
<dt>{{ _('Examples') }}</dt>
<ul>
{% for example in translation.examples %}
<li>{{ example }}</li>
{% endfor %}
</ul>
</dl>
{% endif %} {% if translation.synonyms %}
<dl>
<dt>{{ _('Synonyms') }}</dt>
<ul>
{% for synonym in translation.synonyms %}
<li>{{ synonym }}</li>
{% endfor %}
</ul>
</dl>
{% endif %}
{% endfor %}
</div>

View File

@ -23,14 +23,20 @@
<div id="answers" role="complementary" aria-labelledby="answers-title"><h4 class="title" id="answers-title">{{ _('Answers') }} : </h4>
{%- for answer in answers.values() -%}
<div class="answer">
<span>{{ answer.answer }}</span>
{%- if answer.url -%}
<a href="{{ answer.url }}" class="answer-url"
{%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
{%- else -%} rel="noreferrer"
{%- endif -%}
>{{ urlparse(answer.url).hostname }}</a>
{% endif -%}
{%- if answer.answer_type == 'translations' -%}
{% with translations=answer.translations %}
{% include 'simple/answerers/translate.html' %}
{% endwith %}
{%- else -%}
<span>{{ answer.answer }}</span>
{%- if answer.url -%}
<a href="{{ answer.url }}" class="answer-url"
{%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
{%- else -%} rel="noreferrer"
{%- endif -%}
>{{ urlparse(answer.url).hostname }}</a>
{% endif -%}
{%- endif -%}
</div>
{%- endfor -%}
</div>