This commit is contained in:
GenericMale 2025-01-31 10:02:34 +01:00 committed by GitHub
commit 8e2722bf2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 315 additions and 33 deletions

View File

@ -143,10 +143,10 @@ suppress_warnings = ['myst.domains']
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"babel" : ("https://babel.readthedocs.io/en/latest/", None),
"flask": ("https://flask.palletsprojects.com/", None),
"flask": ("https://flask.palletsprojects.com/en/stable/", None),
"flask_babel": ("https://python-babel.github.io/flask-babel/", None),
# "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
"jinja": ("https://jinja.palletsprojects.com/", None),
"jinja": ("https://jinja.palletsprojects.com/en/stable/", None),
"linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
"redis": ('https://redis.readthedocs.io/en/stable/', None),

View File

@ -0,0 +1,8 @@
.. _tavily engine:
======
Tavily
======
.. automodule:: searx.engines.tavily
:members:

246
searx/engines/tavily.py Normal file
View File

@ -0,0 +1,246 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
.. sidebar:: info
Before reporting an issue with this engine,
please consult `API error codes`_.
Tavily_ search API (AI engine). This engine implements the REST API
(`POST /search`_) and does not make use of the `Tavily Python Wrapper`_.
From the API response, this engine generates *result items* (shown in the main
result list) and an *answer result* (shown on top of the main result list).
If the *answer* from Tavily contains an image, the *answer result* is turned
into an *infobox result*.
.. attention::
AI queries take considerably longer to process than queries to conventional
search engines. The ``timeout`` should therefore also be set considerably
higher, but it is not recommended to activate AI queries by default
(set ``disabled: true``), as otherwise all user searches will have to wait
for the AI.
.. _Tavily: https://tavily.com/
.. _Tavily Python Wrapper: https://pypi.org/project/tavily-python/
.. _POST /search: https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search
.. _Tavily API Credit Deduction:
https://docs.tavily.com/docs/rest-api/api-reference#tavily-api-credit-deduction-overview
.. _Getting started: https://docs.tavily.com/docs/welcome#getting-started
.. _API error codes: https://docs.tavily.com/docs/rest-api/api-reference#error-codes
Configuration
=============
The engine has the following mandatory setting:
- :py:obj:`api_key`
- :py:obj:`topic`
Optional settings are:
- :py:obj:`days`
- :py:obj:`search_depth`
- :py:obj:`max_results`
- :py:obj:`include_answer`
- :py:obj:`include_images`
- :py:obj:`include_image_descriptions`
- :py:obj:`include_domains`
- :py:obj:`exclude_domains`
Example configuration for general search queries:
.. code:: yaml
- name: tavily
engine: tavily
shortcut: tav
categories: [general, ai]
api_key: xxxxxxxx
topic: general
include_images: true
timeout: 15
disabled: true
Example configuration for news search:
.. code:: yaml
- name: tavily news
engine: tavily
shortcut: tavnews
categories: [news, ai]
api_key: xxxxxxxx
topic: news
timeout: 15
disabled: true
Implementation
==============
"""
from json import dumps
from datetime import datetime
from flask_babel import gettext
# about
about = {
"website": "https://tavily.com/",
"wikidata_id": None,
"official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference",
"use_official_api": True,
"require_api_key": True,
"results": "JSON",
}
search_url = "https://api.tavily.com/search"
paging = False
time_range_support = True
api_key: str = "unset"
"""Tavily API Key (`Getting started`_)."""
search_depth: str = "basic"
"""The depth of the search. It can be ``basic`` or ``advanced``. Default is
``basic`` unless specified otherwise in a given method.
- have an eye on your `Tavily API Credit Deduction`_!
"""
topic: str = ""
"""The category of the search. This will determine which of Tavily's agents
will be used for the search. Currently, only ``general`` and ``news`` are
supported."""
days: int = 3
"""The number of days back from the current date to include in the search results.
This specifies the time frame of data to be retrieved. Please note that this
feature is only available when using the ``news`` search topic. Default is 3."""
max_results: int = 5
"""The maximum number of search results to return. Default is 5."""
include_answer: bool = True
"""Include a short answer to the original query, generated by an LLM based on Tavily's
search results."""
include_images: bool = False
"""Include a list of query-related images in the response. Creates an infobox
with the first image (as far as there are any images in the response) and the answer,
if ``include_answer`` is also enabled.
"""
include_image_descriptions: bool = False
"""When ``include_images`` is set to True, this option adds descriptive text for
each image."""
include_domains: list[str] = []
"""A list of domains to specifically include in the search results. Default
is ``[]``, which includes all domains."""
exclude_domains: list[str] = []
"""A list of domains to specifically exclude from the search results. Default
is ``[]``, which doesn't exclude any domains.
"""
def request(query, params):
data = {
"query": query,
"api_key": api_key,
"search_depth": search_depth,
"topic": topic,
"time_range": params["time_range"],
"max_results": max_results,
"include_images": include_images,
"include_domains": include_domains,
"exclude_domains": exclude_domains,
}
if include_images:
data["include_image_descriptions"] = include_image_descriptions
if topic == "general":
data["include_answer"] = include_answer
elif topic == "news":
data["days"] = days
params["url"] = search_url
params["method"] = "POST"
params["headers"]["Content-type"] = "application/json"
params["data"] = dumps(data)
return params
def response(resp):
results = []
data = resp.json()
for result in data.get("results", []):
results.append(
{
"title": result["title"],
"url": result["url"],
"content": "[" + gettext("ai") + "] " + result["content"],
"publishedDate": _parse_date(result.get("published_date")),
}
)
img_list = data.get("images")
if img_list:
result = {
"infobox": "Tavily [" + gettext("ai") + "]",
"img_src": img_list[0],
}
content = data.get("answer")
if isinstance(img_list[0], dict):
result["img_src"] = img_list[0]["url"]
img_caption = gettext("Image caption") + ": " + img_list[0]["description"]
if not content:
result["content"] = img_caption
else:
result["content"] = content + "//" + img_caption
elif content:
result["content"] = content
results.append(result)
elif data["answer"]:
results.append({"answer": data["answer"]})
return results
def _parse_date(pubDate):
if pubDate is not None:
try:
return datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z")
except (ValueError, TypeError) as e:
logger.debug("ignore exception (publishedDate): %s", e)
return None
def init(engine_settings: dict):
msg = []
val = engine_settings.get("api_key") or api_key
if not val or val == "unset":
msg.append("missing api_key")
val = engine_settings.get("topic") or topic
if val not in ["general", "news"]:
msg.append(f"invalid topic: '{val}'")
val = engine_settings.get("search_depth") or search_depth
if val not in ["basic", "advanced"]:
msg.append(f"invalid search_depth: '{val}'")
if msg:
raise ValueError(f"[{engine_settings['name']}] engine's settings: {' / '.join(msg)}")

View File

@ -7,72 +7,77 @@ from searx import webutils
from searx import engines
__all__ = [
'CONSTANT_NAMES',
'CATEGORY_NAMES',
'CATEGORY_GROUPS',
'STYLE_NAMES',
'BRAND_CUSTOM_LINKS',
'WEATHER_TERMS',
'CATEGORY_GROUPS',
'CATEGORY_NAMES',
'CONSTANT_NAMES',
'SOCIAL_MEDIA_TERMS',
'STYLE_NAMES',
'WEATHER_TERMS',
]
CONSTANT_NAMES = {
# Constants defined in other modules
'NO_SUBGROUPING': webutils.NO_SUBGROUPING,
'DEFAULT_CATEGORY': engines.DEFAULT_CATEGORY,
'NO_SUBGROUPING': webutils.NO_SUBGROUPING,
}
CATEGORY_NAMES = {
'FILES': 'files',
'GENERAL': 'general',
'MUSIC': 'music',
'SOCIAL_MEDIA': 'social media',
'IMAGES': 'images',
'VIDEOS': 'videos',
'RADIO': 'radio',
'TV': 'tv',
'IT': 'it',
'NEWS': 'news',
'MAP': 'map',
'MUSIC': 'music',
'NEWS': 'news',
'ONIONS': 'onions',
'RADIO': 'radio',
'SCIENCE': 'science',
'SOCIAL_MEDIA': 'social media',
'TV': 'tv',
'VIDEOS': 'videos',
}
CATEGORY_GROUPS = {
# non-tab categories
'AI': 'ai',
'APPS': 'apps',
'DICTIONARIES': 'dictionaries',
'LYRICS': 'lyrics',
'MOVIES': 'movies',
'PACKAGES': 'packages',
'Q_A': 'q&a',
'REPOS': 'repos',
'SCIENTIFIC_PUBLICATIONS': 'scientific publications',
'SOFTWARE_WIKIS': 'software wikis',
'TRANSLATE': 'translate',
'WEATHER': 'weather',
'WEB': 'web',
'SCIENTIFIC PUBLICATIONS': 'scientific publications',
'WIKIMEDIA': 'wikimedia',
}
STYLE_NAMES = {
'AUTO': 'auto',
'LIGHT': 'light',
'DARK': 'dark',
'BLACK': 'black',
'DARK': 'dark',
'LIGHT': 'light',
}
BRAND_CUSTOM_LINKS = {
'UPTIME': 'Uptime',
'ABOUT': 'About',
'UPTIME': 'Uptime',
}
WEATHER_TERMS = {
'AVERAGE TEMP.': 'Average temp.',
'CLOUD COVER': 'Cloud cover',
'AVERAGE_TEMP.': 'Average temp.',
'CLOUD_COVER': 'Cloud cover',
'CONDITION': 'Condition',
'CURRENT CONDITION': 'Current condition',
'CURRENT_CONDITION': 'Current condition',
'EVENING': 'Evening',
'FEELS LIKE': 'Feels like',
'FEELS_LIKE': 'Feels like',
'HUMIDITY': 'Humidity',
'MAX TEMP.': 'Max temp.',
'MIN TEMP.': 'Min temp.',
'MAX_TEMP.': 'Max temp.',
'MIN_TEMP.': 'Min temp.',
'MORNING': 'Morning',
'NIGHT': 'Night',
'NOON': 'Noon',
@ -80,22 +85,22 @@ WEATHER_TERMS = {
'SUNRISE': 'Sunrise',
'SUNSET': 'Sunset',
'TEMPERATURE': 'Temperature',
'UV INDEX': 'UV index',
'UV_INDEX': 'UV index',
'VISIBILITY': 'Visibility',
'WIND': 'Wind',
}
SOCIAL_MEDIA_TERMS = {
'SUBSCRIBERS': 'subscribers',
'POSTS': 'posts',
'ACTIVE USERS': 'active users',
'ACTIVE_USERS': 'active users',
'AUTHOR': 'author',
'COMMENTS': 'comments',
'USER': 'user',
'COMMUNITY': 'community',
'POINTS': 'points',
'POSTS': 'posts',
'SUBSCRIBERS': 'subscribers',
'THREAD_ANSWERED': 'answered',
'THREAD_CLOSED': 'closed',
'THREAD_OPEN': 'open',
'TITLE': 'title',
'AUTHOR': 'author',
'THREAD OPEN': 'open',
'THREAD CLOSED': 'closed',
'THREAD ANSWERED': 'answered',
'USER': 'user',
}

View File

@ -1846,6 +1846,29 @@ engines:
shortcut: tm
disabled: true
# Tavily requires an API key as well as other configurations. Before you
# activate these engines you should read the documentation.
# --> https://docs.searxng.org/dev/engines/online/tavily.html
#
# - name: tavily
# engine: tavily
# shortcut: tav
# categories: [general, ai]
# api_key: unset
# topic: general
# include_images: true
# timeout: 15
# disabled: true
#
# - name: tavily news
# engine: tavily
# shortcut: tavnews
# categories: [news, ai]
# api_key: unset
# topic: news
# timeout: 15
# disabled: true
# Requires Tor
- name: torch
engine: xpath