mirror of
https://github.com/searxng/searxng.git
synced 2025-12-22 19:50:00 +00:00
[fix] recoll engine: remove HTML markup from result snippets (#5472)
Recoll inserts markup tags in snippets to indicate matching terms in a search query. We remove them so that they don't show to users.
This commit is contained in:
committed by
GitHub
parent
2f0e52d6eb
commit
5fcee9bc30
@@ -41,6 +41,7 @@ from datetime import date, timedelta
|
|||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from searx.result_types import EngineResults
|
from searx.result_types import EngineResults
|
||||||
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
if t.TYPE_CHECKING:
|
if t.TYPE_CHECKING:
|
||||||
from searx.extended_types import SXNG_Response
|
from searx.extended_types import SXNG_Response
|
||||||
@@ -133,11 +134,14 @@ def response(resp: "SXNG_Response") -> EngineResults:
|
|||||||
if mtype in ["image"] and subtype in ["bmp", "gif", "jpeg", "png"]:
|
if mtype in ["image"] and subtype in ["bmp", "gif", "jpeg", "png"]:
|
||||||
thumbnail = url
|
thumbnail = url
|
||||||
|
|
||||||
|
# remove HTML from snippet
|
||||||
|
content = html_to_text(result.get("snippet", ""))
|
||||||
|
|
||||||
res.add(
|
res.add(
|
||||||
res.types.File(
|
res.types.File(
|
||||||
title=result.get("label", ""),
|
title=result.get("label", ""),
|
||||||
url=url,
|
url=url,
|
||||||
content=result.get("snippet", ""),
|
content=content,
|
||||||
size=result.get("size", ""),
|
size=result.get("size", ""),
|
||||||
filename=result.get("filename", ""),
|
filename=result.get("filename", ""),
|
||||||
abstract=result.get("abstract", ""),
|
abstract=result.get("abstract", ""),
|
||||||
|
|||||||
Reference in New Issue
Block a user