mirror of
https://github.com/searxng/searxng.git
synced 2025-02-20 12:20:04 +00:00
Merge e4e6f21494
into 738906358b
This commit is contained in:
commit
8e8eb0e650
@ -143,10 +143,10 @@ suppress_warnings = ['myst.domains']
|
||||
intersphinx_mapping = {
|
||||
"python": ("https://docs.python.org/3/", None),
|
||||
"babel" : ("https://babel.readthedocs.io/en/latest/", None),
|
||||
"flask": ("https://flask.palletsprojects.com/", None),
|
||||
"flask": ("https://flask.palletsprojects.com/en/stable/", None),
|
||||
"flask_babel": ("https://python-babel.github.io/flask-babel/", None),
|
||||
# "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
|
||||
"jinja": ("https://jinja.palletsprojects.com/", None),
|
||||
"jinja": ("https://jinja.palletsprojects.com/en/stable/", None),
|
||||
"linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
|
||||
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
|
||||
"redis": ('https://redis.readthedocs.io/en/stable/', None),
|
||||
|
8
docs/dev/engines/online/tavily.rst
Normal file
8
docs/dev/engines/online/tavily.rst
Normal file
@ -0,0 +1,8 @@
|
||||
.. _tavily engine:
|
||||
|
||||
======
|
||||
Tavily
|
||||
======
|
||||
|
||||
.. automodule:: searx.engines.tavily
|
||||
:members:
|
246
searx/engines/tavily.py
Normal file
246
searx/engines/tavily.py
Normal file
@ -0,0 +1,246 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
|
||||
.. sidebar:: info
|
||||
|
||||
Before reporting an issue with this engine,
|
||||
please consult `API error codes`_.
|
||||
|
||||
Tavily_ search API (AI engine). This engine implements the REST API
|
||||
(`POST /search`_) and does not make use of the `Tavily Python Wrapper`_.
|
||||
|
||||
From the API response, this engine generates *result items* (shown in the main
|
||||
result list) and an *answer result* (shown on top of the main result list).
|
||||
If the *answer* from Tavily contains an image, the *answer result* is turned
|
||||
into an *infobox result*.
|
||||
|
||||
.. attention::
|
||||
|
||||
AI queries take considerably longer to process than queries to conventional
|
||||
search engines. The ``timeout`` should therefore also be set considerably
|
||||
higher, but it is not recommended to activate AI queries by default
|
||||
(set ``disabled: true``), as otherwise all user searches will have to wait
|
||||
for the AI.
|
||||
|
||||
.. _Tavily: https://tavily.com/
|
||||
.. _Tavily Python Wrapper: https://pypi.org/project/tavily-python/
|
||||
.. _POST /search: https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search
|
||||
.. _Tavily API Credit Deduction:
|
||||
https://docs.tavily.com/docs/rest-api/api-reference#tavily-api-credit-deduction-overview
|
||||
.. _Getting started: https://docs.tavily.com/docs/welcome#getting-started
|
||||
.. _API error codes: https://docs.tavily.com/docs/rest-api/api-reference#error-codes
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following mandatory setting:
|
||||
|
||||
- :py:obj:`api_key`
|
||||
- :py:obj:`topic`
|
||||
|
||||
Optional settings are:
|
||||
|
||||
- :py:obj:`days`
|
||||
- :py:obj:`search_depth`
|
||||
- :py:obj:`max_results`
|
||||
- :py:obj:`include_answer`
|
||||
- :py:obj:`include_images`
|
||||
- :py:obj:`include_image_descriptions`
|
||||
- :py:obj:`include_domains`
|
||||
- :py:obj:`exclude_domains`
|
||||
|
||||
Example configuration for general search queries:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: tavily
|
||||
engine: tavily
|
||||
shortcut: tav
|
||||
categories: [general, ai]
|
||||
api_key: xxxxxxxx
|
||||
topic: general
|
||||
include_images: true
|
||||
timeout: 15
|
||||
disabled: true
|
||||
|
||||
Example configuration for news search:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: tavily news
|
||||
engine: tavily
|
||||
shortcut: tavnews
|
||||
categories: [news, ai]
|
||||
api_key: xxxxxxxx
|
||||
topic: news
|
||||
timeout: 15
|
||||
disabled: true
|
||||
|
||||
|
||||
Implementation
|
||||
==============
|
||||
|
||||
"""
|
||||
|
||||
from json import dumps
|
||||
from datetime import datetime
|
||||
from flask_babel import gettext
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": "https://tavily.com/",
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference",
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
search_url = "https://api.tavily.com/search"
|
||||
paging = False
|
||||
time_range_support = True
|
||||
|
||||
api_key: str = "unset"
|
||||
"""Tavily API Key (`Getting started`_)."""
|
||||
|
||||
search_depth: str = "basic"
|
||||
"""The depth of the search. It can be ``basic`` or ``advanced``. Default is
|
||||
``basic`` unless specified otherwise in a given method.
|
||||
|
||||
- have an eye on your `Tavily API Credit Deduction`_!
|
||||
"""
|
||||
|
||||
topic: str = ""
|
||||
"""The category of the search. This will determine which of Tavily's agents
|
||||
will be used for the search. Currently, only ``general`` and ``news`` are
|
||||
supported."""
|
||||
|
||||
days: int = 3
|
||||
"""The number of days back from the current date to include in the search results.
|
||||
This specifies the time frame of data to be retrieved. Please note that this
|
||||
feature is only available when using the ``news`` search topic. Default is 3."""
|
||||
|
||||
max_results: int = 5
|
||||
"""The maximum number of search results to return. Default is 5."""
|
||||
|
||||
include_answer: bool = True
|
||||
"""Include a short answer to the original query, generated by an LLM based on Tavily's
|
||||
search results."""
|
||||
|
||||
include_images: bool = False
|
||||
"""Include a list of query-related images in the response. Creates an infobox
|
||||
with the first image (as far as there are any images in the response) and the answer,
|
||||
if ``include_answer`` is also enabled.
|
||||
"""
|
||||
|
||||
include_image_descriptions: bool = False
|
||||
"""When ``include_images`` is set to True, this option adds descriptive text for
|
||||
each image."""
|
||||
|
||||
include_domains: list[str] = []
|
||||
"""A list of domains to specifically include in the search results. Default
|
||||
is ``[]``, which includes all domains."""
|
||||
|
||||
exclude_domains: list[str] = []
|
||||
"""A list of domains to specifically exclude from the search results. Default
|
||||
is ``[]``, which doesn't exclude any domains.
|
||||
"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
data = {
|
||||
"query": query,
|
||||
"api_key": api_key,
|
||||
"search_depth": search_depth,
|
||||
"topic": topic,
|
||||
"time_range": params["time_range"],
|
||||
"max_results": max_results,
|
||||
"include_images": include_images,
|
||||
"include_domains": include_domains,
|
||||
"exclude_domains": exclude_domains,
|
||||
}
|
||||
|
||||
if include_images:
|
||||
data["include_image_descriptions"] = include_image_descriptions
|
||||
|
||||
if topic == "general":
|
||||
data["include_answer"] = include_answer
|
||||
|
||||
elif topic == "news":
|
||||
data["days"] = days
|
||||
|
||||
params["url"] = search_url
|
||||
params["method"] = "POST"
|
||||
params["headers"]["Content-type"] = "application/json"
|
||||
params["data"] = dumps(data)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
data = resp.json()
|
||||
|
||||
for result in data.get("results", []):
|
||||
results.append(
|
||||
{
|
||||
"title": result["title"],
|
||||
"url": result["url"],
|
||||
"content": "[" + gettext("ai") + "] " + result["content"],
|
||||
"publishedDate": _parse_date(result.get("published_date")),
|
||||
}
|
||||
)
|
||||
|
||||
img_list = data.get("images")
|
||||
if img_list:
|
||||
result = {
|
||||
"infobox": "Tavily [" + gettext("ai") + "]",
|
||||
"img_src": img_list[0],
|
||||
}
|
||||
|
||||
content = data.get("answer")
|
||||
if isinstance(img_list[0], dict):
|
||||
result["img_src"] = img_list[0]["url"]
|
||||
img_caption = gettext("Image caption") + ": " + img_list[0]["description"]
|
||||
if not content:
|
||||
result["content"] = img_caption
|
||||
else:
|
||||
result["content"] = content + "//" + img_caption
|
||||
elif content:
|
||||
result["content"] = content
|
||||
|
||||
results.append(result)
|
||||
|
||||
elif data["answer"]:
|
||||
results.append({"answer": data["answer"]})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _parse_date(pubDate):
|
||||
if pubDate is not None:
|
||||
try:
|
||||
return datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z")
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.debug("ignore exception (publishedDate): %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def init(engine_settings: dict):
|
||||
msg = []
|
||||
|
||||
val = engine_settings.get("api_key") or api_key
|
||||
if not val or val == "unset":
|
||||
msg.append("missing api_key")
|
||||
|
||||
val = engine_settings.get("topic") or topic
|
||||
if val not in ["general", "news"]:
|
||||
msg.append(f"invalid topic: '{val}'")
|
||||
|
||||
val = engine_settings.get("search_depth") or search_depth
|
||||
if val not in ["basic", "advanced"]:
|
||||
msg.append(f"invalid search_depth: '{val}'")
|
||||
|
||||
if msg:
|
||||
raise ValueError(f"[{engine_settings['name']}] engine's settings: {' / '.join(msg)}")
|
@ -7,72 +7,77 @@ from searx import webutils
|
||||
from searx import engines
|
||||
|
||||
__all__ = [
|
||||
'CONSTANT_NAMES',
|
||||
'CATEGORY_NAMES',
|
||||
'CATEGORY_GROUPS',
|
||||
'STYLE_NAMES',
|
||||
'BRAND_CUSTOM_LINKS',
|
||||
'WEATHER_TERMS',
|
||||
'CATEGORY_GROUPS',
|
||||
'CATEGORY_NAMES',
|
||||
'CONSTANT_NAMES',
|
||||
'SOCIAL_MEDIA_TERMS',
|
||||
'STYLE_NAMES',
|
||||
'WEATHER_TERMS',
|
||||
]
|
||||
|
||||
CONSTANT_NAMES = {
|
||||
# Constants defined in other modules
|
||||
'NO_SUBGROUPING': webutils.NO_SUBGROUPING,
|
||||
'DEFAULT_CATEGORY': engines.DEFAULT_CATEGORY,
|
||||
'NO_SUBGROUPING': webutils.NO_SUBGROUPING,
|
||||
}
|
||||
|
||||
CATEGORY_NAMES = {
|
||||
'FILES': 'files',
|
||||
'GENERAL': 'general',
|
||||
'MUSIC': 'music',
|
||||
'SOCIAL_MEDIA': 'social media',
|
||||
'IMAGES': 'images',
|
||||
'VIDEOS': 'videos',
|
||||
'RADIO': 'radio',
|
||||
'TV': 'tv',
|
||||
'IT': 'it',
|
||||
'NEWS': 'news',
|
||||
'MAP': 'map',
|
||||
'MUSIC': 'music',
|
||||
'NEWS': 'news',
|
||||
'ONIONS': 'onions',
|
||||
'RADIO': 'radio',
|
||||
'SCIENCE': 'science',
|
||||
'SOCIAL_MEDIA': 'social media',
|
||||
'TV': 'tv',
|
||||
'VIDEOS': 'videos',
|
||||
}
|
||||
|
||||
CATEGORY_GROUPS = {
|
||||
# non-tab categories
|
||||
'AI': 'ai',
|
||||
'APPS': 'apps',
|
||||
'DICTIONARIES': 'dictionaries',
|
||||
'LYRICS': 'lyrics',
|
||||
'MOVIES': 'movies',
|
||||
'PACKAGES': 'packages',
|
||||
'Q_A': 'q&a',
|
||||
'REPOS': 'repos',
|
||||
'SCIENTIFIC_PUBLICATIONS': 'scientific publications',
|
||||
'SOFTWARE_WIKIS': 'software wikis',
|
||||
'TRANSLATE': 'translate',
|
||||
'WEATHER': 'weather',
|
||||
'WEB': 'web',
|
||||
'SCIENTIFIC PUBLICATIONS': 'scientific publications',
|
||||
'WIKIMEDIA': 'wikimedia',
|
||||
}
|
||||
|
||||
STYLE_NAMES = {
|
||||
'AUTO': 'auto',
|
||||
'LIGHT': 'light',
|
||||
'DARK': 'dark',
|
||||
'BLACK': 'black',
|
||||
'DARK': 'dark',
|
||||
'LIGHT': 'light',
|
||||
}
|
||||
|
||||
BRAND_CUSTOM_LINKS = {
|
||||
'UPTIME': 'Uptime',
|
||||
'ABOUT': 'About',
|
||||
'UPTIME': 'Uptime',
|
||||
}
|
||||
|
||||
WEATHER_TERMS = {
|
||||
'AVERAGE TEMP.': 'Average temp.',
|
||||
'CLOUD COVER': 'Cloud cover',
|
||||
'AVERAGE_TEMP.': 'Average temp.',
|
||||
'CLOUD_COVER': 'Cloud cover',
|
||||
'CONDITION': 'Condition',
|
||||
'CURRENT CONDITION': 'Current condition',
|
||||
'CURRENT_CONDITION': 'Current condition',
|
||||
'EVENING': 'Evening',
|
||||
'FEELS LIKE': 'Feels like',
|
||||
'FEELS_LIKE': 'Feels like',
|
||||
'HUMIDITY': 'Humidity',
|
||||
'MAX TEMP.': 'Max temp.',
|
||||
'MIN TEMP.': 'Min temp.',
|
||||
'MAX_TEMP.': 'Max temp.',
|
||||
'MIN_TEMP.': 'Min temp.',
|
||||
'MORNING': 'Morning',
|
||||
'NIGHT': 'Night',
|
||||
'NOON': 'Noon',
|
||||
@ -80,22 +85,22 @@ WEATHER_TERMS = {
|
||||
'SUNRISE': 'Sunrise',
|
||||
'SUNSET': 'Sunset',
|
||||
'TEMPERATURE': 'Temperature',
|
||||
'UV INDEX': 'UV index',
|
||||
'UV_INDEX': 'UV index',
|
||||
'VISIBILITY': 'Visibility',
|
||||
'WIND': 'Wind',
|
||||
}
|
||||
|
||||
SOCIAL_MEDIA_TERMS = {
|
||||
'SUBSCRIBERS': 'subscribers',
|
||||
'POSTS': 'posts',
|
||||
'ACTIVE USERS': 'active users',
|
||||
'ACTIVE_USERS': 'active users',
|
||||
'AUTHOR': 'author',
|
||||
'COMMENTS': 'comments',
|
||||
'USER': 'user',
|
||||
'COMMUNITY': 'community',
|
||||
'POINTS': 'points',
|
||||
'POSTS': 'posts',
|
||||
'SUBSCRIBERS': 'subscribers',
|
||||
'THREAD_ANSWERED': 'answered',
|
||||
'THREAD_CLOSED': 'closed',
|
||||
'THREAD_OPEN': 'open',
|
||||
'TITLE': 'title',
|
||||
'AUTHOR': 'author',
|
||||
'THREAD OPEN': 'open',
|
||||
'THREAD CLOSED': 'closed',
|
||||
'THREAD ANSWERED': 'answered',
|
||||
'USER': 'user',
|
||||
}
|
||||
|
@ -1846,6 +1846,29 @@ engines:
|
||||
shortcut: tm
|
||||
disabled: true
|
||||
|
||||
# Tavily requires an API key as well as other configurations. Before you
|
||||
# activate these engines you should read the documentation.
|
||||
# --> https://docs.searxng.org/dev/engines/online/tavily.html
|
||||
#
|
||||
# - name: tavily
|
||||
# engine: tavily
|
||||
# shortcut: tav
|
||||
# categories: [general, ai]
|
||||
# api_key: unset
|
||||
# topic: general
|
||||
# include_images: true
|
||||
# timeout: 15
|
||||
# disabled: true
|
||||
#
|
||||
# - name: tavily news
|
||||
# engine: tavily
|
||||
# shortcut: tavnews
|
||||
# categories: [news, ai]
|
||||
# api_key: unset
|
||||
# topic: news
|
||||
# timeout: 15
|
||||
# disabled: true
|
||||
|
||||
# Requires Tor
|
||||
- name: torch
|
||||
engine: xpath
|
||||
|
Loading…
Reference in New Issue
Block a user