mirror of
https://github.com/searxng/searxng.git
synced 2025-12-22 11:40:01 +00:00
[fix] recoll engine: remove HTML markup from result snippets (#5472)
Recoll inserts markup tags in snippets to indicate matching terms in a search query. We remove them so that they don't show to users.
This commit is contained in:
committed by
GitHub
parent
2f0e52d6eb
commit
5fcee9bc30
@@ -41,6 +41,7 @@ from datetime import date, timedelta
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.utils import html_to_text
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.extended_types import SXNG_Response
|
||||
@@ -133,11 +134,14 @@ def response(resp: "SXNG_Response") -> EngineResults:
|
||||
if mtype in ["image"] and subtype in ["bmp", "gif", "jpeg", "png"]:
|
||||
thumbnail = url
|
||||
|
||||
# remove HTML from snippet
|
||||
content = html_to_text(result.get("snippet", ""))
|
||||
|
||||
res.add(
|
||||
res.types.File(
|
||||
title=result.get("label", ""),
|
||||
url=url,
|
||||
content=result.get("snippet", ""),
|
||||
content=content,
|
||||
size=result.get("size", ""),
|
||||
filename=result.get("filename", ""),
|
||||
abstract=result.get("abstract", ""),
|
||||
|
||||
Reference in New Issue
Block a user