Merge 7edd75ff93 into 0253c10b52

[feat] engine: add adobe stock video and audio engines
The engine has been revised; there is now the option ``adobe_content_types`` with which it is possible to configure engines for video and audio from the adobe stock. BTW this patch adds documentation to the engine. To test all three engines in one use a search term like:: !asi !asv !asa sound Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-24 12:38:56 +01:00 · 2024-11-24 11:56:12 +01:00 · 2024-11-24 11:56:12 +01:00 · 2024-10-17 17:17:40 +02:00
12 changed files with 398 additions and 78 deletions
--- a/docs/dev/engines/online/adobe_stock.rst
+++ b/docs/dev/engines/online/adobe_stock.rst
@ -0,0 +1,13 @@
+.. _adobe stock engine:
+
+===========
+Adobe Stock
+===========
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+.. automodule:: searx.engines.adobe_stock
+   :members:
--- a/requirements.txt
+++ b/requirements.txt
@ -19,3 +19,4 @@ tomli==2.0.2; python_version < '3.11'
 msgspec==0.18.6
 eval_type_backport; python_version < '3.9'
 typer-slim==0.13.1
+isodate==0.7.2
--- a/searx/engines/adobe_stock.py
+++ b/searx/engines/adobe_stock.py
@ -0,0 +1,229 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""`Adobe Stock`_ is a service that gives access to millions of royalty-free
+assets. Assets types include photos, vectors, illustrations, templates, 3D
+assets, videos, motion graphics templates and audio tracks.
+
+.. Adobe Stock: https://stock.adobe.com/
+
+Configuration
+=============
+
+The engine has the following mandatory setting:
+
+- SearXNG's :ref:`engine categories`
+- Adobe-Stock's :py:obj:`adobe_order`
+- Adobe-Stock's :py:obj:`adobe_content_types`
+
+.. code:: yaml
+
+  - name: adobe stock
+    engine: adobe_stock
+    shortcut: asi
+    categories: [images]
+    adobe_order: relevance
+    adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
+
+  - name: adobe stock video
+    engine: adobe_stock
+    network: adobe stock
+    shortcut: asi
+    categories: [videos]
+    adobe_order: relevance
+    adobe_content_types: ["video"]
+
+Implementation
+==============
+
+"""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from datetime import datetime, timedelta
+from urllib.parse import urlencode
+
+import isodate
+
+if TYPE_CHECKING:
+    import logging
+
+    logger: logging.Logger
+
+about = {
+    "website": "https://stock.adobe.com/",
+    "wikidata_id": "Q5977430",
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": "JSON",
+}
+
+categories = []
+paging = True
+send_accept_language_header = True
+results_per_page = 10
+
+base_url = "https://stock.adobe.com"
+
+adobe_order: str = ""
+"""Sort order, can be one of:
+
+- ``relevance`` or
+- ``featured`` or
+- ``creation`` (most recent) or
+- ``nb_downloads`` (number of downloads)
+"""
+
+ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
+adobe_content_types: list = []
+"""A list of of content types.  The following content types are offered:
+
+- Images: ``image``
+- Videos: ``video``
+- Templates: ``template``
+- 3D: ``3d``
+- Audio ``audio``
+
+Additional subcategories:
+
+- Photos: ``photo``
+- Illustrations: ``illustration``
+- Vectors: ``zip_vector`` (Vectors),
+"""
+
+# Do we need support for "free_collection" and "include_stock_enterprise"?
+
+
+def init(_):
+    if not categories:
+        raise ValueError("adobe_stock engine: categories is unset")
+
+    # adobe_order
+    if not adobe_order:
+        raise ValueError("adobe_stock engine: adobe_order is unset")
+    if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
+        raise ValueError(f"unsupported adobe_order: {adobe_order}")
+
+    # adobe_content_types
+    if not adobe_content_types:
+        raise ValueError("adobe_stock engine: adobe_content_types is unset")
+
+    if isinstance(adobe_content_types, list):
+        for t in adobe_content_types:
+            if t not in ADOBE_VALID_TYPES:
+                raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
+    else:
+        raise ValueError(
+            "adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
+        )
+
+
+def request(query, params):
+
+    args = {
+        "k": query,
+        "limit": results_per_page,
+        "order": adobe_order,
+        "search_page": params["pageno"],
+        "search_type": "pagination",
+    }
+
+    for content_type in ADOBE_VALID_TYPES:
+        args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
+
+    params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
+
+    # headers required to bypass bot-detection
+    if params["searxng_locale"] == "all":
+        params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
+
+    return params
+
+
+def parse_image_item(item):
+    return {
+        "template": "images.html",
+        "url": item["content_url"],
+        "title": item["title"],
+        "content": item["asset_type"],
+        "img_src": item["content_thumb_extra_large_url"],
+        "thumbnail_src": item["thumbnail_url"],
+        "resolution": f"{item['content_original_width']}x{item['content_original_height']}",
+        "img_format": item["format"],
+        "author": item["author"],
+    }
+
+
+def parse_video_item(item):
+
+    # in video items, the title is more or less a "content description", we try
+    # to reduce the lenght of the title ..
+
+    title = item["title"]
+    content = ""
+    if "." in title.strip()[:-1]:
+        content = title
+        title = title.split(".", 1)[0]
+    elif "," in title:
+        content = title
+        title = title.split(",", 1)[0]
+    elif len(title) > 50:
+        content = title
+        title = ""
+        for w in content.split(" "):
+            title += f" {w}"
+            if len(title) > 50:
+                title = title.strip() + "\u2026"
+                break
+
+    return {
+        "template": "videos.html",
+        "url": item["content_url"],
+        "title": title,
+        "content": content,
+        # https://en.wikipedia.org/wiki/ISO_8601#Durations
+        "length": isodate.parse_duration(item["time_duration"]),
+        "publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
+        "thumbnail": item["thumbnail_url"],
+        "iframe_src": item["video_small_preview_url"],
+        "metadata": item["asset_type"],
+    }
+
+
+def parse_audio_item(item):
+    audio_data = item["audio_data"]
+    content = audio_data.get("description") or ""
+    if audio_data.get("album"):
+        content = audio_data["album"] + " - " + content
+
+    return {
+        "url": item["content_url"],
+        "title": item["title"],
+        "content": content,
+        # "thumbnail": base_url + item["thumbnail_url"],
+        "iframe_src": audio_data["preview"]["url"],
+        "publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
+        "length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
+        "author": item.get("artist_name"),
+    }
+
+
+def response(resp):
+    results = []
+
+    json_resp = resp.json()
+
+    if isinstance(json_resp["items"], list):
+        return None
+    for item in json_resp["items"].values():
+        if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
+            result = parse_image_item(item)
+        elif item["asset_type"].lower() == "video":
+            result = parse_video_item(item)
+        elif item["asset_type"].lower() == "audio":
+            result = parse_audio_item(item)
+        else:
+            logger.error("no handle for %s --> %s", item["asset_type"], item)
+            continue
+        results.append(result)
+
+    return results
--- a/searx/engines/deepl.py
+++ b/searx/engines/deepl.py
@ -1,8 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Deepl translation engine"""

-from json import loads
-
 about = {
    "website": 'https://deepl.com',
    "wikidata_id": 'Q43968444',
@ -41,16 +39,14 @@ def request(_query, params):

 def response(resp):
    results = []
-    result = loads(resp.text)
-    translations = result['translations']

-    infobox = "<dl>"
+    result = resp.json()

-    for translation in translations:
-        infobox += f"<dd>{translation['text']}</dd>"
+    if not result.get('translations'):
+        return results

-    infobox += "</dl>"
+    translations = [{'text': translation['text']} for translation in result['translations']]

-    results.append({'answer': infobox})
+    results.append({'answer': translations[0]['text'], 'answer_type': 'translations', 'translations': translations})

    return results
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@ -3,7 +3,6 @@
 Dictzone
 """

-from urllib.parse import urljoin
 from lxml import html
 from searx.utils import eval_xpath

@ -33,11 +32,10 @@ def request(query, params):  # pylint: disable=unused-argument


 def response(resp):
-    results = []
-
    dom = html.fromstring(resp.text)

-    for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
+    translations = []
+    for result in eval_xpath(dom, results_xpath)[1:]:
        try:
            from_result, to_results_raw = eval_xpath(result, './td')
        except:  # pylint: disable=bare-except
@ -49,12 +47,17 @@ def response(resp):
            if t.strip():
                to_results.append(to_result.text_content())

-        results.append(
+        translations.append(
            {
-                'url': urljoin(str(resp.url), '?%d' % k),
-                'title': from_result.text_content(),
-                'content': '; '.join(to_results),
+                'text': f"{from_result.text_content()} - {'; '.join(to_results)}",
            }
        )

-    return results
+    if translations:
+        result = {
+            'answer': translations[0]['text'],
+            'translations': translations,
+            'answer_type': 'translations',
+        }
+
+    return [result]
--- a/searx/engines/libretranslate.py
+++ b/searx/engines/libretranslate.py
@ -24,7 +24,7 @@ def request(_query, params):
    request_url = random.choice(base_url) if isinstance(base_url, list) else base_url
    params['url'] = f"{request_url}/translate"

-    args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query']}
+    args = {'source': params['from_lang'][1], 'target': params['to_lang'][1], 'q': params['query'], 'alternatives': 3}
    if api_key:
        args['api_key'] = api_key
    params['data'] = dumps(args)
@ -42,12 +42,11 @@ def response(resp):
    json_resp = resp.json()
    text = json_resp.get('translatedText')

-    from_lang = resp.search_params["from_lang"][1]
-    to_lang = resp.search_params["to_lang"][1]
-    query = resp.search_params["query"]
-    req_url = resp.search_params["req_url"]
+    if not text:
+        return results

-    if text:
-        results.append({"answer": text, "url": f"{req_url}/?source={from_lang}&target={to_lang}&q={query}"})
+    translations = [{'text': text}] + [{'text': alternative} for alternative in json_resp.get('alternatives', [])]
+
+    results.append({'answer': text, 'answer_type': 'translations', 'translations': translations})

    return results
--- a/searx/engines/lingva.py
+++ b/searx/engines/lingva.py
@ -1,8 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Lingva (alternative Google Translate frontend)"""

-from json import loads
-
 about = {
    "website": 'https://lingva.ml',
    "wikidata_id": None,
@ -29,7 +27,7 @@ def request(_query, params):
 def response(resp):
    results = []

-    result = loads(resp.text)
+    result = resp.json()
    info = result["info"]
    from_to_prefix = "%s-%s " % (resp.search_params['from_lang'][1], resp.search_params['to_lang'][1])

@ -38,27 +36,40 @@ def response(resp):

    if 'definitions' in info:  # pylint: disable=too-many-nested-blocks
        for definition in info['definitions']:
-            if 'list' in definition:
-                for item in definition['list']:
-                    if 'synonyms' in item:
-                        for synonym in item['synonyms']:
-                            results.append({"suggestion": from_to_prefix + synonym})
+            for item in definition.get('list', []):
+                for synonym in item.get('synonyms', []):
+                    results.append({"suggestion": from_to_prefix + synonym})

-    infobox = ""
+    data = []
+
+    for definition in info['definitions']:
+        for translation in definition['list']:
+            data.append(
+                {
+                    'text': result['translation'],
+                    'definitions': [translation['definition']] if translation['definition'] else [],
+                    'examples': [translation['example']] if translation['example'] else [],
+                    'synonyms': translation['synonyms'],
+                }
+            )

    for translation in info["extraTranslations"]:
        for word in translation["list"]:
-            infobox += f"<dl><dt>{word['word']}</dt>"
+            data.append(
+                {
+                    'text': word['word'],
+                    'definitions': word['meanings'],
+                }
+            )

-            for meaning in word["meanings"]:
-                infobox += f"<dd>{meaning}</dd>"
-
-            infobox += "</dl>"
+    if not data and result['translation']:
+        data.append({'text': result['translation']})

    results.append(
        {
-            'infobox': result["translation"],
-            'content': infobox,
+            'answer': data[0]['text'],
+            'answer_type': 'translations',
+            'translations': data,
        }
    )

--- a/searx/engines/mozhi.py
+++ b/searx/engines/mozhi.py
@ -4,7 +4,6 @@
 import random
 import re
 from urllib.parse import urlencode
-from flask_babel import gettext

 about = {
    "website": 'https://codeberg.org/aryak/mozhi',
@ -35,30 +34,27 @@ def request(_query, params):
 def response(resp):
    translation = resp.json()

-    infobox = ""
+    data = {'text': translation['translated-text'], 'definitions': [], 'examples': []}

    if translation['target_transliteration'] and not re.match(
        re_transliteration_unsupported, translation['target_transliteration']
    ):
-        infobox = f"<b>{translation['target_transliteration']}</b>"
+        data['transliteration'] = translation['target_transliteration']

    if translation['word_choices']:
        for word in translation['word_choices']:
-            infobox += f"<dl><dt>{word['word']}: {word['definition']}</dt>"
+            if word.get('definition'):
+                data['definitions'].append(word['definition'])

-            if word['examples_target']:
-                for example in word['examples_target']:
-                    infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
-                    infobox += f"<dd>{re.sub(r'<|>', '', example)}</dd>"
+            for example in word.get('examples_target', []):
+                data['examples'].append(re.sub(r"<|>", "", example).lstrip('- '))

-            infobox += "</dl>"
-
-    if translation['source_synonyms']:
-        infobox += f"<dl><dt>{gettext('Synonyms')}: {', '.join(translation['source_synonyms'])}</dt></dl>"
+    data['synonyms'] = translation.get('source_synonyms', [])

    result = {
-        'infobox': translation['translated-text'],
-        'content': infobox,
+        'answer': translation['translated-text'],
+        'answer_type': 'translations',
+        'translations': [data],
    }

    return [result]
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@ -35,18 +35,16 @@ def request(query, params):  # pylint: disable=unused-argument


 def response(resp):
-    results = []
-    results.append(
-        {
-            'url': web_url.format(
-                from_lang=resp.search_params['from_lang'][2],
-                to_lang=resp.search_params['to_lang'][2],
-                query=resp.search_params['query'],
-            ),
-            'title': '[{0}-{1}] {2}'.format(
-                resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query']
-            ),
-            'content': resp.json()['responseData']['translatedText'],
-        }
-    )
-    return results
+    json_resp = resp.json()
+    text = json_resp['responseData']['translatedText']
+
+    alternatives = [match['translation'] for match in json_resp['matches'] if match['translation'] != text]
+    translations = [{'text': translation} for translation in [text] + alternatives]
+
+    result = {
+        'answer': translations[0]['text'],
+        'answer_type': 'translations',
+        'translations': translations,
+    }
+
+    return [result]
--- a/searx/settings.yml
+++ b/searx/settings.yml
@ -325,6 +325,36 @@ engines:
    shortcut: 9g
    disabled: true

+  - name: adobe stock
+    engine: adobe_stock
+    shortcut: asi
+    categories: ["images"]
+    # https://docs.searxng.org/dev/engines/online/adobe_stock.html
+    adobe_order: relevance
+    adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
+    timeout: 6
+    disabled: true
+
+  - name: adobe stock video
+    engine: adobe_stock
+    shortcut: asv
+    network: adobe stock
+    categories: ["videos"]
+    adobe_order: relevance
+    adobe_content_types: ["video"]
+    timeout: 6
+    disabled: true
+
+  - name: adobe stock audio
+    engine: adobe_stock
+    shortcut: asa
+    network: adobe stock
+    categories: ["music"]
+    adobe_order: relevance
+    adobe_content_types: ["audio"]
+    timeout: 6
+    disabled: true
+
  - name: alpine linux packages
    engine: alpinelinux
    disabled: true
--- a/searx/templates/simple/answerers/translate.html
+++ b/searx/templates/simple/answerers/translate.html
@ -0,0 +1,38 @@
+<div class="answer-translations">
+{% for translation in translations %}
+  {% if loop.index > 1 %}
+  <hr />
+  {% endif %}
+  <h3>{{ translation.text }}</h3>
+  {% if translation.transliteration %}
+  <b>translation.transliteration</b>
+  {% endif %} {% if translation.definitions %}
+  <dl>
+    <dt>{{ _('Definitions') }}</dt>
+    <ul>
+    {% for definition in translation.definitions %}
+    <li>{{ definition }}</li>
+    {% endfor %}
+    <ul>
+  </dl>
+  {% endif %} {% if translation.examples %}
+  <dl>
+    <dt>{{ _('Examples') }}</dt>
+    <ul>
+    {% for example in translation.examples %}
+    <li>{{ example }}</li>
+    {% endfor %}
+    </ul>
+  </dl>
+  {% endif %} {% if translation.synonyms %}
+  <dl>
+    <dt>{{ _('Synonyms') }}</dt>
+    <ul>
+    {% for synonym in translation.synonyms %}
+    <li>{{ synonym }}</li>
+    {% endfor %}
+    </ul>
+  </dl>
+  {% endif %}
+{% endfor %}
+</div>
--- a/searx/templates/simple/results.html
+++ b/searx/templates/simple/results.html
@ -23,14 +23,20 @@
    <div id="answers" role="complementary" aria-labelledby="answers-title"><h4 class="title" id="answers-title">{{ _('Answers') }} : </h4>
        {%- for answer in answers.values() -%}
        <div class="answer">
-        <span>{{ answer.answer }}</span>
-          {%- if answer.url -%}
-          <a href="{{ answer.url }}" class="answer-url"
-             {%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
-             {%- else -%} rel="noreferrer"
-             {%- endif -%}
-             >{{ urlparse(answer.url).hostname }}</a>
-          {% endif -%}
+          {%- if answer.answer_type == 'translations' -%}
+            {% with translations=answer.translations %}
+              {% include 'simple/answerers/translate.html' %}
+            {% endwith %}
+          {%- else -%}
+            <span>{{ answer.answer }}</span>
+            {%- if answer.url -%}
+              <a href="{{ answer.url }}" class="answer-url"
+                 {%- if results_on_new_tab %} target="_blank" rel="noopener noreferrer"
+                 {%- else -%} rel="noreferrer"
+                 {%- endif -%}
+                 >{{ urlparse(answer.url).hostname }}</a>
+            {% endif -%}
+          {%- endif -%}
        </div>
        {%- endfor -%}
    </div>
Author	SHA1	Message	Date
Bnyro	853ecc9719	Merge `7edd75ff93` into `0253c10b52`	2024-11-24 12:38:56 +01:00
Markus Heiser	0253c10b52	[feat] engine: add adobe stock video and audio engines The engine has been revised; there is now the option ``adobe_content_types`` with which it is possible to configure engines for video and audio from the adobe stock. BTW this patch adds documentation to the engine. To test all three engines in one use a search term like:: !asi !asv !asa sound Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>	2024-11-24 11:56:12 +01:00
Bnyro	f20a7632f1	[feat] engine: add adobe stock photos	2024-11-24 11:56:12 +01:00
Bnyro	7edd75ff93	[refactor] translation engines: common interface	2024-10-17 17:17:40 +02:00