[mod] replace js_variable_to_python by js_obj_str_to_python (#2792) (#5477)

This patch is based on PR #2792 (old PR from 2023)

- js_obj_str_to_python handle more cases
- bring tests from chompjs ..
- comment out tests do not pass

The tests from chompjs give some overview of what is not implemented.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser
2025-11-25 12:51:08 +01:00
committed by GitHub
parent 0ee78c19dd
commit 54a97e1043
6 changed files with 410 additions and 61 deletions

View File

@@ -50,7 +50,7 @@ def response(resp):
pos = script.index(end_tag) + len(end_tag) - 1
script = script[:pos]
json_resp = utils.js_variable_to_python(script)
json_resp = utils.js_obj_str_to_python(script)
results = []

View File

@@ -134,7 +134,7 @@ from searx.utils import (
eval_xpath,
eval_xpath_list,
eval_xpath_getindex,
js_variable_to_python,
js_obj_str_to_python,
get_embeded_stream_url,
)
from searx.enginelib.traits import EngineTraits
@@ -262,7 +262,7 @@ def response(resp: SXNG_Response) -> EngineResults:
# data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
js_object = "[{" + extr(resp.text, "data: [{", "}}],") + "}}]"
json_data = js_variable_to_python(js_object)
json_data = js_obj_str_to_python(js_object)
# json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
json_resp = json_data[1]['data']['body']['response']
@@ -439,9 +439,9 @@ def fetch_traits(engine_traits: EngineTraits):
resp = get('https://search.brave.com/settings')
if not resp.ok: # type: ignore
if not resp.ok:
print("ERROR: response from Brave is not OK.")
dom = html.fromstring(resp.text) # type: ignore
dom = html.fromstring(resp.text)
for option in dom.xpath('//section//option[@value="en-us"]/../option'):
@@ -468,12 +468,12 @@ def fetch_traits(engine_traits: EngineTraits):
resp = get('https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js')
if not resp.ok: # type: ignore
if not resp.ok:
print("ERROR: response from Brave is not OK.")
country_js = resp.text[resp.text.index("options:{all") + len('options:') :] # type: ignore
country_js = resp.text[resp.text.index("options:{all") + len('options:') :]
country_js = country_js[: country_js.index("},k={default")]
country_tags = js_variable_to_python(country_js)
country_tags = js_obj_str_to_python(country_js)
for k, v in country_tags.items():
if k == 'all':

View File

@@ -407,7 +407,7 @@ def fetch_traits(engine_traits: EngineTraits):
"""
# pylint: disable=too-many-branches, too-many-statements, disable=import-outside-toplevel
from searx.utils import js_variable_to_python
from searx.utils import js_obj_str_to_python
# fetch regions
@@ -455,7 +455,7 @@ def fetch_traits(engine_traits: EngineTraits):
js_code = extr(resp.text, 'languages:', ',regions') # type: ignore
languages = js_variable_to_python(js_code)
languages: dict[str, str] = js_obj_str_to_python(js_code)
for eng_lang, name in languages.items():
if eng_lang == 'wt_WT':

View File

@@ -15,7 +15,7 @@ from searx.utils import (
extr,
html_to_text,
parse_duration_string,
js_variable_to_python,
js_obj_str_to_python,
get_embeded_stream_url,
)
@@ -125,7 +125,7 @@ def parse_images(data):
match = extr(data, '<script>var imageSearchTabData=', '</script>')
if match:
json = js_variable_to_python(match.strip())
json = js_obj_str_to_python(match.strip())
items = json.get('content', {}).get('items', [])
for item in items: