[fix] recoll engine: remove HTML markup from result snippets (#5472)

Recoll inserts markup tags in snippets to indicate matching terms in a
search query.  We remove them so that they don't show to users.
This commit is contained in:
Hermógenes Oliveira
2025-11-24 02:54:45 -03:00
committed by GitHub
parent 2f0e52d6eb
commit 5fcee9bc30

View File

@@ -41,6 +41,7 @@ from datetime import date, timedelta
from urllib.parse import urlencode
from searx.result_types import EngineResults
from searx.utils import html_to_text
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
@@ -133,11 +134,14 @@ def response(resp: "SXNG_Response") -> EngineResults:
if mtype in ["image"] and subtype in ["bmp", "gif", "jpeg", "png"]:
thumbnail = url
# remove HTML from snippet
content = html_to_text(result.get("snippet", ""))
res.add(
res.types.File(
title=result.get("label", ""),
url=url,
content=result.get("snippet", ""),
content=content,
size=result.get("size", ""),
filename=result.get("filename", ""),
abstract=result.get("abstract", ""),