mirror of
https://github.com/searxng/searxng.git
synced 2026-01-02 17:10:01 +00:00
[refactor] typification of SearXNG (MainResult) / result items (part 2)
The class ReslutContainer has been revised, it can now handle the typed Result items of classes: - MainResult - LegacyResult (a dict wrapper for backward compatibility) Due to the now complete typing of theses three clases, instead of the *getitem* accesses, the fields can now be accessed directly via attributes (which is also supported by the IDE). Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
committed by
Markus Heiser
parent
d6ce29f7f0
commit
8769b7c6d6
@@ -13,13 +13,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["Result", "EngineResults", "AnswerSet", "Answer", "Translations"]
|
||||
__all__ = ["Result", "MainResult", "EngineResults", "AnswerSet", "Answer", "Translations"]
|
||||
|
||||
import abc
|
||||
|
||||
from searx import enginelib
|
||||
|
||||
from ._base import Result, LegacyResult
|
||||
from ._base import Result, MainResult, LegacyResult
|
||||
from .answer import AnswerSet, Answer, Translations
|
||||
|
||||
|
||||
@@ -30,13 +30,18 @@ class ResultList(list, abc.ABC):
|
||||
"""The collection of result types (which have already been implemented)."""
|
||||
|
||||
Answer = Answer
|
||||
MainResult = MainResult
|
||||
Result = Result
|
||||
Translations = Translations
|
||||
|
||||
# for backward compatibility
|
||||
LegacyResult = LegacyResult
|
||||
|
||||
def __init__(self):
|
||||
# pylint: disable=useless-parent-delegation
|
||||
super().__init__()
|
||||
|
||||
def add(self, result: Result):
|
||||
def add(self, result: Result | LegacyResult):
|
||||
"""Add a :py:`Result` item to the result list."""
|
||||
self.append(result)
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
.. autoclass:: Result
|
||||
:members:
|
||||
|
||||
.. _LegacyResult:
|
||||
|
||||
.. autoclass:: LegacyResult
|
||||
:members:
|
||||
"""
|
||||
@@ -22,9 +24,88 @@ __all__ = ["Result"]
|
||||
import re
|
||||
import urllib.parse
|
||||
import warnings
|
||||
import typing
|
||||
|
||||
import msgspec
|
||||
|
||||
from searx import logger as log
|
||||
|
||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||
|
||||
|
||||
def _normalize_url_fields(result: Result | LegacyResult):
|
||||
|
||||
# As soon we need LegacyResult not any longer, we can move this function to
|
||||
# method Result.normalize_result_fields
|
||||
|
||||
if result.url and not result.parsed_url:
|
||||
if not isinstance(result.url, str):
|
||||
log.debug('result: invalid URL: %s', str(result))
|
||||
result.url = ""
|
||||
result.parsed_url = None
|
||||
else:
|
||||
result.parsed_url = urllib.parse.urlparse(result.url)
|
||||
|
||||
if result.parsed_url:
|
||||
result.parsed_url = result.parsed_url._replace(
|
||||
# if the result has no scheme, use http as default
|
||||
scheme=result.parsed_url.scheme or "http",
|
||||
# normalize ``www.example.com`` to ``example.com``
|
||||
netloc=result.parsed_url.netloc.replace("www.", ""),
|
||||
# normalize ``example.com/path/`` to ``example.com/path``
|
||||
path=result.parsed_url.path.rstrip("/"),
|
||||
)
|
||||
result.url = result.parsed_url.geturl()
|
||||
|
||||
if isinstance(result, LegacyResult) and getattr(result, "infobox", None):
|
||||
# As soon we have InfoboxResult, we can move this function to method
|
||||
# InfoboxResult.normalize_result_fields
|
||||
|
||||
infobox_urls: list[dict[str, str]] = getattr(result, "urls", [])
|
||||
for item in infobox_urls:
|
||||
_url = item.get("url")
|
||||
if not _url:
|
||||
continue
|
||||
_url = urllib.parse.urlparse(_url)
|
||||
item["url"] = _url._replace(
|
||||
scheme=_url.scheme or "http",
|
||||
netloc=_url.netloc.replace("www.", ""),
|
||||
path=_url.path.rstrip("/"),
|
||||
).geturl()
|
||||
|
||||
infobox_id = getattr(result, "id", None)
|
||||
if infobox_id:
|
||||
_url = urllib.parse.urlparse(infobox_id)
|
||||
result.id = _url._replace(
|
||||
scheme=_url.scheme or "http",
|
||||
netloc=_url.netloc.replace("www.", ""),
|
||||
path=_url.path.rstrip("/"),
|
||||
).geturl()
|
||||
|
||||
|
||||
def _normalize_text_fields(result: MainResult | LegacyResult):
|
||||
|
||||
# As soon we need LegacyResult not any longer, we can move this function to
|
||||
# method MainResult.normalize_result_fields
|
||||
|
||||
# Actually, a type check should not be necessary if the engine is
|
||||
# implemented correctly. Historically, however, we have always had a type
|
||||
# check here.
|
||||
|
||||
if result.title and not isinstance(result.title, str):
|
||||
log.debug("result: invalid type of field 'title': %s", str(result))
|
||||
result.title = str(result)
|
||||
if result.content and not isinstance(result.content, str):
|
||||
log.debug("result: invalid type of field 'content': %s", str(result))
|
||||
result.content = str(result)
|
||||
|
||||
# normalize title and content
|
||||
result.title = WHITESPACE_REGEX.sub(" ", result.title).strip()
|
||||
result.content = WHITESPACE_REGEX.sub(" ", result.content).strip()
|
||||
if result.content == result.title:
|
||||
# avoid duplicate content between the content and title fields
|
||||
result.content = ""
|
||||
|
||||
|
||||
class Result(msgspec.Struct, kw_only=True):
|
||||
"""Base class of all result types :ref:`result types`."""
|
||||
@@ -54,21 +135,20 @@ class Result(msgspec.Struct, kw_only=True):
|
||||
"""
|
||||
|
||||
def normalize_result_fields(self):
|
||||
"""Normalize a result ..
|
||||
"""Normalize fields ``url`` and ``parse_sql``.
|
||||
|
||||
- if field ``url`` is set and field ``parse_url`` is unset, init
|
||||
``parse_url`` from field ``url``. This method can be extended in the
|
||||
inheritance.
|
||||
- If field ``url`` is set and field ``parse_url`` is unset, init
|
||||
``parse_url`` from field ``url``. The ``url`` field is initialized
|
||||
with the resulting value in ``parse_url``, if ``url`` and
|
||||
``parse_url`` are not equal.
|
||||
|
||||
- ``www.example.com`` and ``example.com`` are equivalent and are normalized
|
||||
to ``example.com``.
|
||||
|
||||
- ``example.com/path/`` and ``example.com/path`` are equivalent and are
|
||||
normalized to ``example.com/path``.
|
||||
"""
|
||||
|
||||
if not self.parsed_url and self.url:
|
||||
self.parsed_url = urllib.parse.urlparse(self.url)
|
||||
|
||||
# if the result has no scheme, use http as default
|
||||
if not self.parsed_url.scheme:
|
||||
self.parsed_url = self.parsed_url._replace(scheme="http")
|
||||
self.url = self.parsed_url.geturl()
|
||||
_normalize_url_fields(self)
|
||||
|
||||
def __post_init__(self):
|
||||
pass
|
||||
@@ -84,7 +164,6 @@ class Result(msgspec.Struct, kw_only=True):
|
||||
The hash value is used in contexts, e.g. when checking for equality to
|
||||
identify identical results from different sources (engines).
|
||||
"""
|
||||
|
||||
return id(self)
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -113,12 +192,19 @@ class Result(msgspec.Struct, kw_only=True):
|
||||
def as_dict(self):
|
||||
return {f: getattr(self, f) for f in self.__struct_fields__}
|
||||
|
||||
def defaults_from(self, other: Result):
|
||||
"""Fields not set in *self* will be updated from the field values of the
|
||||
*other*.
|
||||
"""
|
||||
for field_name in self.__struct_fields__:
|
||||
self_val = getattr(self, field_name, False)
|
||||
other_val = getattr(other, field_name, False)
|
||||
if self_val:
|
||||
setattr(self, field_name, other_val)
|
||||
|
||||
|
||||
class MainResult(Result): # pylint: disable=missing-class-docstring
|
||||
|
||||
# open_group and close_group should not manged in the Result class (we should rop it from here!)
|
||||
open_group: bool = False
|
||||
close_group: bool = False
|
||||
"""Base class of all result types displayed in :ref:`area main results`."""
|
||||
|
||||
title: str = ""
|
||||
"""Link title of the result item."""
|
||||
@@ -132,6 +218,43 @@ class MainResult(Result): # pylint: disable=missing-class-docstring
|
||||
thumbnail: str = ""
|
||||
"""URL of a thumbnail that is displayed in the result item."""
|
||||
|
||||
priority: typing.Literal["", "high", "low"] = ""
|
||||
"""The priority can be set via :ref:`hostnames plugin`, for example."""
|
||||
|
||||
engines: set[str] = set()
|
||||
"""In a merged results list, the names of the engines that found this result
|
||||
are listed in this field."""
|
||||
|
||||
# open_group and close_group should not manged in the Result
|
||||
# class (we should drop it from here!)
|
||||
open_group: bool = False
|
||||
close_group: bool = False
|
||||
positions: list[int] = []
|
||||
score: float = 0
|
||||
category: str = ""
|
||||
|
||||
def __hash__(self) -> int:
|
||||
"""Ordinary url-results are equal if their values for
|
||||
:py:obj:`Result.template`, :py:obj:`Result.parsed_url` (without scheme)
|
||||
and :py:obj:`MainResult.img_src` are equal.
|
||||
"""
|
||||
if not self.parsed_url:
|
||||
raise ValueError(f"missing a value in field 'parsed_url': {self}")
|
||||
|
||||
url = self.parsed_url
|
||||
return hash(
|
||||
f"{self.template}"
|
||||
+ f"|{url.netloc}|{url.path}|{url.params}|{url.query}|{url.fragment}"
|
||||
+ f"|{self.img_src}"
|
||||
)
|
||||
|
||||
def normalize_result_fields(self):
|
||||
super().normalize_result_fields()
|
||||
|
||||
_normalize_text_fields(self)
|
||||
if self.engine:
|
||||
self.engines.add(self.engine)
|
||||
|
||||
|
||||
class LegacyResult(dict):
|
||||
"""A wrapper around a legacy result item. The SearXNG core uses this class
|
||||
@@ -150,7 +273,27 @@ class LegacyResult(dict):
|
||||
"""
|
||||
|
||||
UNSET = object()
|
||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||
|
||||
# emulate field types from type class Result
|
||||
url: str | None
|
||||
template: str
|
||||
engine: str
|
||||
parsed_url: urllib.parse.ParseResult | None
|
||||
|
||||
# emulate field types from type class MainResult
|
||||
title: str
|
||||
content: str
|
||||
img_src: str
|
||||
thumbnail: str
|
||||
priority: typing.Literal["", "high", "low"]
|
||||
engines: set[str]
|
||||
positions: list[int]
|
||||
score: float
|
||||
category: str
|
||||
|
||||
# infobox result
|
||||
urls: list[dict[str, str]]
|
||||
attributes: list[dict[str, str]]
|
||||
|
||||
def as_dict(self):
|
||||
return self
|
||||
@@ -159,14 +302,26 @@ class LegacyResult(dict):
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
# Init fields with defaults / compare with defaults of the fields in class Result
|
||||
self.engine = self.get("engine", "")
|
||||
self.template = self.get("template", "default.html")
|
||||
self.url = self.get("url", None)
|
||||
self.parsed_url = self.get("parsed_url", None)
|
||||
# emulate field types from type class Result
|
||||
self["url"] = self.get("url")
|
||||
self["template"] = self.get("template", "default.html")
|
||||
self["engine"] = self.get("engine", "")
|
||||
self["parsed_url"] = self.get("parsed_url")
|
||||
|
||||
self.content = self.get("content", "")
|
||||
self.title = self.get("title", "")
|
||||
# emulate field types from type class MainResult
|
||||
self["title"] = self.get("title", "")
|
||||
self["content"] = self.get("content", "")
|
||||
self["img_src"] = self.get("img_src", "")
|
||||
self["thumbnail"] = self.get("thumbnail", "")
|
||||
self["priority"] = self.get("priority", "")
|
||||
self["engines"] = self.get("engines", set())
|
||||
self["positions"] = self.get("positions", "")
|
||||
self["score"] = self.get("score", 0)
|
||||
self["category"] = self.get("category", "")
|
||||
|
||||
if "infobox" in self:
|
||||
self["urls"] = self.get("urls", [])
|
||||
self["attributes"] = self.get("attributes", [])
|
||||
|
||||
# Legacy types that have already been ported to a type ..
|
||||
|
||||
@@ -178,13 +333,47 @@ class LegacyResult(dict):
|
||||
)
|
||||
self.template = "answer/legacy.html"
|
||||
|
||||
if self.template == "keyvalue.html":
|
||||
warnings.warn(
|
||||
f"engine {self.engine} is using deprecated `dict` for key/value results"
|
||||
f" / use a class from searx.result_types",
|
||||
DeprecationWarning,
|
||||
)
|
||||
|
||||
def __getattr__(self, name: str, default=UNSET) -> typing.Any:
|
||||
if default == self.UNSET and name not in self:
|
||||
raise AttributeError(f"LegacyResult object has no field named: {name}")
|
||||
return self[name]
|
||||
|
||||
def __setattr__(self, name: str, val):
|
||||
self[name] = val
|
||||
|
||||
def __hash__(self) -> int: # type: ignore
|
||||
|
||||
if "answer" in self:
|
||||
# deprecated ..
|
||||
return hash(self["answer"])
|
||||
|
||||
if self.template == "images.html":
|
||||
# image results are equal if their values for template, the url and
|
||||
# the img_src are equal.
|
||||
return hash(f"{self.template}|{self.url}|{self.img_src}")
|
||||
|
||||
if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]):
|
||||
# it is a commun url-result ..
|
||||
return hash(self.url)
|
||||
# Ordinary url-results are equal if their values for template,
|
||||
# parsed_url (without schema) and img_src` are equal.
|
||||
|
||||
# Code copied from with MainResult.__hash__:
|
||||
if not self.parsed_url:
|
||||
raise ValueError(f"missing a value in field 'parsed_url': {self}")
|
||||
|
||||
url = self.parsed_url
|
||||
return hash(
|
||||
f"{self.template}"
|
||||
+ f"|{url.netloc}|{url.path}|{url.params}|{url.query}|{url.fragment}"
|
||||
+ f"|{self.img_src}"
|
||||
)
|
||||
|
||||
return id(self)
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -195,30 +384,13 @@ class LegacyResult(dict):
|
||||
|
||||
return f"LegacyResult: {super().__repr__()}"
|
||||
|
||||
def __getattr__(self, name: str, default=UNSET):
|
||||
|
||||
if default == self.UNSET and name not in self:
|
||||
raise AttributeError(f"LegacyResult object has no field named: {name}")
|
||||
return self[name]
|
||||
|
||||
def __setattr__(self, name: str, val):
|
||||
|
||||
self[name] = val
|
||||
|
||||
def normalize_result_fields(self):
|
||||
_normalize_url_fields(self)
|
||||
_normalize_text_fields(self)
|
||||
if self.engine:
|
||||
self.engines.add(self.engine)
|
||||
|
||||
self.title = self.WHITESPACE_REGEX.sub(" ", self.title)
|
||||
|
||||
if not self.parsed_url and self.url:
|
||||
self.parsed_url = urllib.parse.urlparse(self.url)
|
||||
|
||||
# if the result has no scheme, use http as default
|
||||
if not self.parsed_url.scheme:
|
||||
self.parsed_url = self.parsed_url._replace(scheme="http")
|
||||
self.url = self.parsed_url.geturl()
|
||||
|
||||
if self.content:
|
||||
self.content = self.WHITESPACE_REGEX.sub(" ", self.content)
|
||||
if self.content == self.title:
|
||||
# avoid duplicate content between the content and title fields
|
||||
self.content = ""
|
||||
def defaults_from(self, other: LegacyResult):
|
||||
for k, v in other.items():
|
||||
if not self.get(k):
|
||||
self[k] = v
|
||||
|
||||
Reference in New Issue
Block a user