From 5fcee9bc307f6d3592ebcb1db4f4f8834df6f495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herm=C3=B3genes=20Oliveira?= Date: Mon, 24 Nov 2025 02:54:45 -0300 Subject: [PATCH] [fix] recoll engine: remove HTML markup from result snippets (#5472) Recoll inserts markup tags in snippets to indicate matching terms in a search query. We remove them so that they don't show to users. --- searx/engines/recoll.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index c9e85344c..d58f60b2c 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -41,6 +41,7 @@ from datetime import date, timedelta from urllib.parse import urlencode from searx.result_types import EngineResults +from searx.utils import html_to_text if t.TYPE_CHECKING: from searx.extended_types import SXNG_Response @@ -133,11 +134,14 @@ def response(resp: "SXNG_Response") -> EngineResults: if mtype in ["image"] and subtype in ["bmp", "gif", "jpeg", "png"]: thumbnail = url + # remove HTML from snippet + content = html_to_text(result.get("snippet", "")) + res.add( res.types.File( title=result.get("label", ""), url=url, - content=result.get("snippet", ""), + content=content, size=result.get("size", ""), filename=result.get("filename", ""), abstract=result.get("abstract", ""),