mirror of
https://github.com/searxng/searxng.git
synced 2025-12-23 12:10:00 +00:00
This patch is based on PR #2792 (old PR from 2023) - js_obj_str_to_python handle more cases - bring tests from chompjs .. - comment out tests do not pass The tests from chompjs give some overview of what is not implemented. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
@@ -50,7 +50,7 @@ def response(resp):
|
|||||||
pos = script.index(end_tag) + len(end_tag) - 1
|
pos = script.index(end_tag) + len(end_tag) - 1
|
||||||
script = script[:pos]
|
script = script[:pos]
|
||||||
|
|
||||||
json_resp = utils.js_variable_to_python(script)
|
json_resp = utils.js_obj_str_to_python(script)
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
|||||||
@@ -134,7 +134,7 @@ from searx.utils import (
|
|||||||
eval_xpath,
|
eval_xpath,
|
||||||
eval_xpath_list,
|
eval_xpath_list,
|
||||||
eval_xpath_getindex,
|
eval_xpath_getindex,
|
||||||
js_variable_to_python,
|
js_obj_str_to_python,
|
||||||
get_embeded_stream_url,
|
get_embeded_stream_url,
|
||||||
)
|
)
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
@@ -262,7 +262,7 @@ def response(resp: SXNG_Response) -> EngineResults:
|
|||||||
# data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
|
# data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
|
||||||
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
js_object = "[{" + extr(resp.text, "data: [{", "}}],") + "}}]"
|
js_object = "[{" + extr(resp.text, "data: [{", "}}],") + "}}]"
|
||||||
json_data = js_variable_to_python(js_object)
|
json_data = js_obj_str_to_python(js_object)
|
||||||
|
|
||||||
# json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
|
# json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
|
||||||
json_resp = json_data[1]['data']['body']['response']
|
json_resp = json_data[1]['data']['body']['response']
|
||||||
@@ -439,9 +439,9 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
|
|
||||||
resp = get('https://search.brave.com/settings')
|
resp = get('https://search.brave.com/settings')
|
||||||
|
|
||||||
if not resp.ok: # type: ignore
|
if not resp.ok:
|
||||||
print("ERROR: response from Brave is not OK.")
|
print("ERROR: response from Brave is not OK.")
|
||||||
dom = html.fromstring(resp.text) # type: ignore
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
for option in dom.xpath('//section//option[@value="en-us"]/../option'):
|
for option in dom.xpath('//section//option[@value="en-us"]/../option'):
|
||||||
|
|
||||||
@@ -468,12 +468,12 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
|
|
||||||
resp = get('https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js')
|
resp = get('https://cdn.search.brave.com/serp/v2/_app/immutable/chunks/parameters.734c106a.js')
|
||||||
|
|
||||||
if not resp.ok: # type: ignore
|
if not resp.ok:
|
||||||
print("ERROR: response from Brave is not OK.")
|
print("ERROR: response from Brave is not OK.")
|
||||||
|
|
||||||
country_js = resp.text[resp.text.index("options:{all") + len('options:') :] # type: ignore
|
country_js = resp.text[resp.text.index("options:{all") + len('options:') :]
|
||||||
country_js = country_js[: country_js.index("},k={default")]
|
country_js = country_js[: country_js.index("},k={default")]
|
||||||
country_tags = js_variable_to_python(country_js)
|
country_tags = js_obj_str_to_python(country_js)
|
||||||
|
|
||||||
for k, v in country_tags.items():
|
for k, v in country_tags.items():
|
||||||
if k == 'all':
|
if k == 'all':
|
||||||
|
|||||||
@@ -407,7 +407,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
# pylint: disable=too-many-branches, too-many-statements, disable=import-outside-toplevel
|
# pylint: disable=too-many-branches, too-many-statements, disable=import-outside-toplevel
|
||||||
from searx.utils import js_variable_to_python
|
from searx.utils import js_obj_str_to_python
|
||||||
|
|
||||||
# fetch regions
|
# fetch regions
|
||||||
|
|
||||||
@@ -455,7 +455,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||||||
|
|
||||||
js_code = extr(resp.text, 'languages:', ',regions') # type: ignore
|
js_code = extr(resp.text, 'languages:', ',regions') # type: ignore
|
||||||
|
|
||||||
languages = js_variable_to_python(js_code)
|
languages: dict[str, str] = js_obj_str_to_python(js_code)
|
||||||
for eng_lang, name in languages.items():
|
for eng_lang, name in languages.items():
|
||||||
|
|
||||||
if eng_lang == 'wt_WT':
|
if eng_lang == 'wt_WT':
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from searx.utils import (
|
|||||||
extr,
|
extr,
|
||||||
html_to_text,
|
html_to_text,
|
||||||
parse_duration_string,
|
parse_duration_string,
|
||||||
js_variable_to_python,
|
js_obj_str_to_python,
|
||||||
get_embeded_stream_url,
|
get_embeded_stream_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -125,7 +125,7 @@ def parse_images(data):
|
|||||||
|
|
||||||
match = extr(data, '<script>var imageSearchTabData=', '</script>')
|
match = extr(data, '<script>var imageSearchTabData=', '</script>')
|
||||||
if match:
|
if match:
|
||||||
json = js_variable_to_python(match.strip())
|
json = js_obj_str_to_python(match.strip())
|
||||||
items = json.get('content', {}).get('items', [])
|
items = json.get('content', {}).get('items', [])
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
|
|||||||
147
searx/utils.py
147
searx/utils.py
@@ -49,9 +49,14 @@ _BLOCKED_TAGS = ('script', 'style')
|
|||||||
_ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
|
_ECMA_UNESCAPE4_RE = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
|
||||||
_ECMA_UNESCAPE2_RE = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
|
_ECMA_UNESCAPE2_RE = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
|
||||||
|
|
||||||
_JS_QUOTE_KEYS_RE = re.compile(r'([\{\s,])(\w+)(:)')
|
_JS_STRING_DELIMITERS = re.compile(r'(["\'`])')
|
||||||
_JS_VOID_RE = re.compile(r'void\s+[0-9]+|void\s*\([0-9]+\)')
|
_JS_QUOTE_KEYS_RE = re.compile(r'([\{\s,])([\$_\w][\$_\w0-9]*)(:)')
|
||||||
_JS_DECIMAL_RE = re.compile(r":\s*\.")
|
_JS_VOID_OR_UNDEFINED_RE = re.compile(r'void\s+[0-9]+|void\s*\([0-9]+\)|undefined')
|
||||||
|
_JS_DECIMAL_RE = re.compile(r"([\[\,:])\s*(\-?)\s*([0-9_]*)\.([0-9_]*)")
|
||||||
|
_JS_DECIMAL2_RE = re.compile(r"([\[\,:])\s*(\-?)\s*([0-9_]+)")
|
||||||
|
_JS_EXTRA_COMA_RE = re.compile(r"\s*,\s*([\]\}])")
|
||||||
|
_JS_STRING_ESCAPE_RE = re.compile(r'\\(.)')
|
||||||
|
_JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
|
||||||
|
|
||||||
_XPATH_CACHE: dict[str, XPath] = {}
|
_XPATH_CACHE: dict[str, XPath] = {}
|
||||||
_LANG_TO_LC_CACHE: dict[str, dict[str, str]] = {}
|
_LANG_TO_LC_CACHE: dict[str, dict[str, str]] = {}
|
||||||
@@ -741,12 +746,53 @@ def detect_language(text: str, threshold: float = 0.3, only_search_languages: bo
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def js_variable_to_python(js_variable: str) -> t.Any:
|
def _j2p_process_escape(match: re.Match[str]) -> str:
|
||||||
|
# deal with ECMA escape characters
|
||||||
|
_escape = match.group(1) or match.group(2)
|
||||||
|
return (
|
||||||
|
Rf'\{_escape}'
|
||||||
|
if _escape in _JSON_PASSTHROUGH_ESCAPES
|
||||||
|
else R'\u00' if _escape == 'x' else '' if _escape == '\n' else _escape
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _j2p_decimal(match: re.Match[str]) -> str:
|
||||||
|
return (
|
||||||
|
match.group(1)
|
||||||
|
+ match.group(2)
|
||||||
|
+ (match.group(3).replace("_", "") or "0")
|
||||||
|
+ "."
|
||||||
|
+ (match.group(4).replace("_", "") or "0")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _j2p_decimal2(match: re.Match[str]) -> str:
|
||||||
|
return match.group(1) + match.group(2) + match.group(3).replace("_", "")
|
||||||
|
|
||||||
|
|
||||||
|
def js_obj_str_to_python(js_obj_str: str) -> t.Any:
|
||||||
"""Convert a javascript variable into JSON and then load the value
|
"""Convert a javascript variable into JSON and then load the value
|
||||||
|
|
||||||
It does not deal with all cases, but it is good enough for now.
|
It does not deal with all cases, but it is good enough for now.
|
||||||
chompjs has a better implementation.
|
chompjs has a better implementation.
|
||||||
"""
|
"""
|
||||||
|
s = js_obj_str_to_json_str(js_obj_str)
|
||||||
|
# load the JSON and return the result
|
||||||
|
if s == "":
|
||||||
|
raise ValueError("js_obj_str can't be an empty string")
|
||||||
|
try:
|
||||||
|
return json.loads(s)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.debug("Internal error: js_obj_str_to_python creates invalid JSON:\n%s", s)
|
||||||
|
raise ValueError("js_obj_str_to_python creates invalid JSON") from e
|
||||||
|
|
||||||
|
|
||||||
|
def js_obj_str_to_json_str(js_obj_str: str) -> str:
|
||||||
|
if not isinstance(js_obj_str, str):
|
||||||
|
raise ValueError("js_obj_str must be of type str")
|
||||||
|
if js_obj_str == "":
|
||||||
|
raise ValueError("js_obj_str can't be an empty string")
|
||||||
|
|
||||||
# when in_string is not None, it contains the character that has opened the string
|
# when in_string is not None, it contains the character that has opened the string
|
||||||
# either simple quote or double quote
|
# either simple quote or double quote
|
||||||
in_string = None
|
in_string = None
|
||||||
@@ -754,61 +800,78 @@ def js_variable_to_python(js_variable: str) -> t.Any:
|
|||||||
# r"""{ a:"f\"irst", c:'sec"ond'}"""
|
# r"""{ a:"f\"irst", c:'sec"ond'}"""
|
||||||
# becomes
|
# becomes
|
||||||
# ['{ a:', '"', 'f\\', '"', 'irst', '"', ', c:', "'", 'sec', '"', 'ond', "'", '}']
|
# ['{ a:', '"', 'f\\', '"', 'irst', '"', ', c:', "'", 'sec', '"', 'ond', "'", '}']
|
||||||
parts = re.split(r'(["\'])', js_variable)
|
parts = _JS_STRING_DELIMITERS.split(js_obj_str)
|
||||||
# previous part (to check the escape character antislash)
|
# does the previous part ends with a backslash?
|
||||||
previous_p = ""
|
blackslash_just_before = False
|
||||||
for i, p in enumerate(parts):
|
for i, p in enumerate(parts):
|
||||||
# parse characters inside a ECMA string
|
if p == in_string and not blackslash_just_before:
|
||||||
if in_string:
|
# * the current part matches the character which has opened the string
|
||||||
# we are in a JS string: replace the colon by a temporary character
|
# * there is no antislash just before
|
||||||
# so quote_keys_regex doesn't have to deal with colon inside the JS strings
|
# --> the current part close the current string
|
||||||
parts[i] = parts[i].replace(':', chr(1))
|
in_string = None
|
||||||
if in_string == "'":
|
# replace simple quote and ` by double quote
|
||||||
|
# since JSON supports only double quote for string
|
||||||
|
parts[i] = '"'
|
||||||
|
|
||||||
|
elif in_string:
|
||||||
|
# --> we are in a JS string
|
||||||
|
# replace the colon by a temporary character
|
||||||
|
# so _JS_QUOTE_KEYS_RE doesn't have to deal with colon inside the JS strings
|
||||||
|
p = p.replace(':', chr(1))
|
||||||
|
# replace JS escape sequences by JSON escape sequences
|
||||||
|
p = _JS_STRING_ESCAPE_RE.sub(_j2p_process_escape, p)
|
||||||
# the JS string is delimited by simple quote.
|
# the JS string is delimited by simple quote.
|
||||||
# This is not supported by JSON.
|
# This is not supported by JSON.
|
||||||
# simple quote delimited string are converted to double quote delimited string
|
# simple quote delimited string are converted to double quote delimited string
|
||||||
# here, inside a JS string, we escape the double quote
|
# here, inside a JS string, we escape the double quote
|
||||||
parts[i] = parts[i].replace('"', r'\"')
|
if in_string == "'":
|
||||||
|
p = p.replace('"', r'\"')
|
||||||
|
parts[i] = p
|
||||||
|
# deal with the sequence blackslash then quote
|
||||||
|
# since js_obj_str splits on quote, we detect this case:
|
||||||
|
# * the previous part ends with a black slash
|
||||||
|
# * the current part is a single quote
|
||||||
|
# when detected the blackslash is removed on the previous part
|
||||||
|
if blackslash_just_before and p[:1] == "'":
|
||||||
|
parts[i - 1] = parts[i - 1][:-1]
|
||||||
|
|
||||||
# deal with delimiters and escape character
|
elif in_string is None and p in ('"', "'", "`"):
|
||||||
if not in_string and p in ('"', "'"):
|
# we are not in string but p is string delimiter
|
||||||
# we are not in string
|
# --> that's the start of a new string
|
||||||
# but p is double or simple quote
|
|
||||||
# that's the start of a new string
|
|
||||||
# replace simple quote by double quote
|
|
||||||
# (JSON doesn't support simple quote)
|
|
||||||
parts[i] = '"'
|
|
||||||
in_string = p
|
in_string = p
|
||||||
continue
|
|
||||||
if p == in_string:
|
|
||||||
# we are in a string and the current part MAY close the string
|
|
||||||
if len(previous_p) > 0 and previous_p[-1] == '\\':
|
|
||||||
# there is an antislash just before: the ECMA string continue
|
|
||||||
continue
|
|
||||||
# the current p close the string
|
|
||||||
# replace simple quote by double quote
|
# replace simple quote by double quote
|
||||||
|
# since JSON supports only double quote for string
|
||||||
parts[i] = '"'
|
parts[i] = '"'
|
||||||
in_string = None
|
|
||||||
|
|
||||||
if not in_string:
|
elif in_string is None:
|
||||||
# replace void 0 by null
|
# we are not in a string
|
||||||
|
# replace by null these values:
|
||||||
|
# * void 0
|
||||||
|
# * void(0)
|
||||||
|
# * undefined
|
||||||
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/void
|
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/void
|
||||||
# we are sure there is no string in p
|
p = _JS_VOID_OR_UNDEFINED_RE.sub("null", p)
|
||||||
parts[i] = _JS_VOID_RE.sub("null", p)
|
# make sure there is a leading zero in front of float
|
||||||
# update previous_p
|
p = _JS_DECIMAL_RE.sub(_j2p_decimal, p)
|
||||||
previous_p = p
|
p = _JS_DECIMAL2_RE.sub(_j2p_decimal2, p)
|
||||||
|
# remove extra coma in a list or an object
|
||||||
|
# for example [1,2,3,] becomes [1,2,3]
|
||||||
|
p = _JS_EXTRA_COMA_RE.sub(lambda match: match.group(1), p)
|
||||||
|
parts[i] = p
|
||||||
|
|
||||||
|
# update for the next iteration
|
||||||
|
blackslash_just_before = len(p) > 0 and p[-1] == '\\'
|
||||||
|
|
||||||
# join the string
|
# join the string
|
||||||
s = ''.join(parts)
|
s = ''.join(parts)
|
||||||
# add quote around the key
|
# add quote arround the key
|
||||||
# { a: 12 }
|
# { a: 12 }
|
||||||
# becomes
|
# becomes
|
||||||
# { "a": 12 }
|
# { "a": 12 }
|
||||||
s = _JS_QUOTE_KEYS_RE.sub(r'\1"\2"\3', s)
|
s = _JS_QUOTE_KEYS_RE.sub(r'\1"\2"\3', s)
|
||||||
s = _JS_DECIMAL_RE.sub(":0.", s)
|
# replace the surogate character by colon and strip whitespaces
|
||||||
# replace the surogate character by colon
|
s = s.replace(chr(1), ':').strip()
|
||||||
s = s.replace(chr(1), ':')
|
return s
|
||||||
# load the JSON and return the result
|
|
||||||
return json.loads(s)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_duration_string(duration_str: str) -> timedelta | None:
|
def parse_duration_string(duration_str: str) -> timedelta | None:
|
||||||
|
|||||||
286
tests/unit/test_js_variable_to_python.py
Normal file
286
tests/unit/test_js_variable_to_python.py
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Tests for the function ``searx.utils.js_obj_str_to_python``
|
||||||
|
|
||||||
|
The tests are copied from:
|
||||||
|
|
||||||
|
https://github.com/Nykakin/chompjs/blob/c1501b5cd82c0044539875331745b820e7bfd067/chompjs/test_parser.py
|
||||||
|
|
||||||
|
The commented-out tests are not yet supported by the current implementation.
|
||||||
|
"""
|
||||||
|
# pylint: disable=missing-class-docstring, invalid-name
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from parameterized import parameterized
|
||||||
|
|
||||||
|
from searx.utils import js_obj_str_to_python
|
||||||
|
|
||||||
|
from tests import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestParser(SearxTestCase):
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("{'hello': 'world'}", {'hello': 'world'}),
|
||||||
|
("{'hello': 'world', 'my': 'master'}", {'hello': 'world', 'my': 'master'}),
|
||||||
|
(
|
||||||
|
"{'hello': 'world', 'my': {'master': 'of Orion'}, 'test': 'xx'}",
|
||||||
|
{'hello': 'world', 'my': {'master': 'of Orion'}, 'test': 'xx'},
|
||||||
|
),
|
||||||
|
("{}", {}),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_parse_object(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("[]", []),
|
||||||
|
("[[[]]]", [[[]]]),
|
||||||
|
("[[[1]]]", [[[1]]]),
|
||||||
|
("[1]", [1]),
|
||||||
|
("[1, 2, 3, 4]", [1, 2, 3, 4]),
|
||||||
|
("['h', 'e', 'l', 'l', 'o']", ['h', 'e', 'l', 'l', 'o']),
|
||||||
|
("[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]", [[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_parse_list(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("{'hello': [], 'world': [0]}", {'hello': [], 'world': [0]}),
|
||||||
|
("{'hello': [1, 2, 3, 4]}", {'hello': [1, 2, 3, 4]}),
|
||||||
|
("[{'a':12}, {'b':33}]", [{'a': 12}, {'b': 33}]),
|
||||||
|
(
|
||||||
|
"[false, {'true': true, `pies`: \"kot\"}, false,]",
|
||||||
|
[False, {"true": True, 'pies': 'kot'}, False],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"{a:1,b:1,c:1,d:1,e:1,f:1,g:1,h:1,i:1,j:1}",
|
||||||
|
{k: 1 for k in 'abcdefghij'},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"{'a':[{'b':1},{'c':[{'d':{'f':{'g':[1,2]}}},{'e':1}]}]}",
|
||||||
|
{'a': [{'b': 1}, {'c': [{'d': {'f': {'g': [1, 2]}}}, {'e': 1}]}]},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_parse_mixed(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("{'hello': 12, 'world': 10002.21}", {'hello': 12, 'world': 10002.21}),
|
||||||
|
("[12, -323, 0.32, -32.22, .2, - 4]", [12, -323, 0.32, -32.22, 0.2, -4]),
|
||||||
|
('{"a": -12, "b": - 5}', {'a': -12, 'b': -5}),
|
||||||
|
("{'a': true, 'b': false, 'c': null}", {'a': True, 'b': False, 'c': None}),
|
||||||
|
("[\"\\uD834\\uDD1E\"]", ['𝄞']),
|
||||||
|
("{'a': '123\\'456\\n'}", {'a': "123'456\n"}),
|
||||||
|
("['\u00e9']", ['é']),
|
||||||
|
('{"cache":{"\u002ftest\u002f": 0}}', {'cache': {'/test/': 0}}),
|
||||||
|
('{"a": 3.125e7}', {'a': 3.125e7}),
|
||||||
|
('''{"a": "b\\'"}''', {'a': "b'"}),
|
||||||
|
('{"a": .99, "b": -.1}', {"a": 0.99, "b": -0.1}),
|
||||||
|
('["/* ... */", "// ..."]', ["/* ... */", "// ..."]),
|
||||||
|
('{"inclusions":["/*","/"]}', {'inclusions': ['/*', '/']}),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_parse_standard_values(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
def test_parse_nan(self):
|
||||||
|
js = '{"A": NaN}'
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertTrue(math.isnan(py["A"]))
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("{abc: 100, dev: 200}", {'abc': 100, 'dev': 200}),
|
||||||
|
("{abcdefghijklmnopqrstuvwxyz: 12}", {"abcdefghijklmnopqrstuvwxyz": 12}),
|
||||||
|
# (
|
||||||
|
# "{age: function(yearBorn,thisYear) {return thisYear - yearBorn;}}",
|
||||||
|
# {"age": "function(yearBorn,thisYear) {return thisYear - yearBorn;}"}
|
||||||
|
# ),
|
||||||
|
# (
|
||||||
|
# "{\"abc\": function() {return '])))))))))))))))';}}",
|
||||||
|
# {"abc": "function() {return '])))))))))))))))';}"},
|
||||||
|
# ),
|
||||||
|
('{"a": undefined}', {"a": None}), # chompjs returns {"a": "undefined"}
|
||||||
|
('[undefined, undefined]', [None, None]), # chompjs returns ["undefined", "undefined"]
|
||||||
|
("{_a: 1, $b: 2}", {"_a": 1, "$b": 2}),
|
||||||
|
# ("{regex: /a[^d]{1,12}/i}", {'regex': '/a[^d]{1,12}/i'}),
|
||||||
|
# ("{'a': function(){return '\"'}}", {'a': 'function(){return \'"\'}'}),
|
||||||
|
("{1: 1, 2: 2, 3: 3, 4: 4}", {'1': 1, '2': 2, '3': 3, '4': 4}),
|
||||||
|
("{'a': 121.}", {'a': 121.0}),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_parse_strange_values(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
# ('{"a": {"b": [12, 13, 14]}}text text', {"a": {"b": [12, 13, 14]}}),
|
||||||
|
# ('var test = {"a": {"b": [12, 13, 14]}}', {"a": {"b": [12, 13, 14]}}),
|
||||||
|
('{"a":\r\n10}', {'a': 10}),
|
||||||
|
("{'foo': 0,\r\n}", {'foo': 0}),
|
||||||
|
("{truefalse: 0, falsefalse: 1, nullnull: 2}", {'truefalse': 0, 'falsefalse': 1, 'nullnull': 2}),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_strange_input(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("[0]", [0]),
|
||||||
|
("[1]", [1]),
|
||||||
|
("[12]", [12]),
|
||||||
|
("[12_12]", [1212]),
|
||||||
|
# ("[0x12]", [18]),
|
||||||
|
# ("[0xab]", [171]),
|
||||||
|
# ("[0xAB]", [171]),
|
||||||
|
# ("[0X12]", [18]),
|
||||||
|
# ("[0Xab]", [171]),
|
||||||
|
# ("[0XAB]", [171]),
|
||||||
|
# ("[01234]", [668]),
|
||||||
|
# ("[0o1234]", [668]),
|
||||||
|
# ("[0O1234]", [668]),
|
||||||
|
# ("[0b1111]", [15]),
|
||||||
|
# ("[0B1111]", [15]),
|
||||||
|
("[-0]", [-0]),
|
||||||
|
("[-1]", [-1]),
|
||||||
|
("[-12]", [-12]),
|
||||||
|
("[-12_12]", [-1212]),
|
||||||
|
# ("[-0x12]", [-18]),
|
||||||
|
# ("[-0xab]", [-171]),
|
||||||
|
# ("[-0xAB]", [-171]),
|
||||||
|
# ("[-0X12]", [-18]),
|
||||||
|
# ("[-0Xab]", [-171]),
|
||||||
|
# ("[-0XAB]", [-171]),
|
||||||
|
# ("[-01234]", [-668]),
|
||||||
|
# ("[-0o1234]", [-668]),
|
||||||
|
# ("[-0O1234]", [-668]),
|
||||||
|
# ("[-0b1111]", [-15]),
|
||||||
|
# ("[-0B1111]", [-15]),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_integer_numeric_values(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("[0.32]", [0.32]),
|
||||||
|
("[-0.32]", [-0.32]),
|
||||||
|
("[.32]", [0.32]),
|
||||||
|
("[-.32]", [-0.32]),
|
||||||
|
("[12.]", [12.0]),
|
||||||
|
("[-12.]", [-12.0]),
|
||||||
|
("[12.32]", [12.32]),
|
||||||
|
("[-12.12]", [-12.12]),
|
||||||
|
("[3.1415926]", [3.1415926]),
|
||||||
|
("[.123456789]", [0.123456789]),
|
||||||
|
("[.0123]", [0.0123]),
|
||||||
|
("[0.0123]", [0.0123]),
|
||||||
|
("[-.0123]", [-0.0123]),
|
||||||
|
("[-0.0123]", [-0.0123]),
|
||||||
|
("[3.1E+12]", [3.1e12]),
|
||||||
|
("[3.1e+12]", [3.1e12]),
|
||||||
|
("[.1e-23]", [0.1e-23]),
|
||||||
|
("[.1e-23]", [0.1e-23]),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_float_numeric_values(self, js, expected_py):
|
||||||
|
py = js_obj_str_to_python(js)
|
||||||
|
self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
# @parameterized.expand([
|
||||||
|
# ('["Test\\nDrive"]\n{"Test": "Drive"}', [['Test\nDrive'], {'Test': 'Drive'}]),
|
||||||
|
# ])
|
||||||
|
# def test_jsonlines(self, js, expected_py):
|
||||||
|
# py = js_obj_str_to_python(js)
|
||||||
|
# self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParserExceptions(SearxTestCase):
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
('}{', ValueError),
|
||||||
|
('', ValueError),
|
||||||
|
(None, ValueError),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_exceptions(self, js, expected_exception):
|
||||||
|
with self.assertRaises(expected_exception):
|
||||||
|
js_obj_str_to_python(js)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
("{whose: 's's', category_name: '>'}", ValueError),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_malformed_input(self, in_data, expected_exception):
|
||||||
|
with self.assertRaises(expected_exception):
|
||||||
|
js_obj_str_to_python(in_data)
|
||||||
|
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
(
|
||||||
|
'{"test": """}',
|
||||||
|
ValueError,
|
||||||
|
'js_obj_str_to_python creates invalid JSON',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_error_messages(self, js, expected_exception, expected_exception_text):
|
||||||
|
with self.assertRaisesRegex(expected_exception, expected_exception_text):
|
||||||
|
js_obj_str_to_python(js)
|
||||||
|
|
||||||
|
|
||||||
|
# class TestOptions(SearxTestCase):
|
||||||
|
# @parameterized.expand(
|
||||||
|
# [
|
||||||
|
# ('{\\\"a\\\": 12}', {'a': 12}),
|
||||||
|
# ]
|
||||||
|
# )
|
||||||
|
# def test_unicode_escape(self, js, expected_py):
|
||||||
|
# py = js_obj_str_to_python(js)
|
||||||
|
# self.assertEqual(py, expected_py)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseJsonObjects(SearxTestCase):
|
||||||
|
@parameterized.expand(
|
||||||
|
[
|
||||||
|
# ("", []),
|
||||||
|
# ("aaaaaaaaaaaaaaaa", []),
|
||||||
|
# (" ", []),
|
||||||
|
(" {'a': 12}", [{'a': 12}]),
|
||||||
|
# ("[1, 2, 3, 4]xxxxxxxxxxxxxxxxxxxxxxxx", [[1, 2, 3, 4]]),
|
||||||
|
# ("[12] [13] [14]", [[12], [13], [14]]),
|
||||||
|
# ("[10] {'a': [1, 1, 1,]}", [[10], {'a': [1, 1, 1]}]),
|
||||||
|
# ("[1][1][1]", [[1], [1], [1]]),
|
||||||
|
# ("[1] [2] {'a': ", [[1], [2]]),
|
||||||
|
# ("[]", [[]]),
|
||||||
|
# ("[][][][]", [[], [], [], []]),
|
||||||
|
("{}", [{}]),
|
||||||
|
# ("{}{}{}{}", [{}, {}, {}, {}]),
|
||||||
|
# ("{{}}{{}}", []),
|
||||||
|
# ("[[]][[]]", [[[]], [[]]]),
|
||||||
|
# ("{am: 'ab'}\n{'ab': 'xx'}", [{'am': 'ab'}, {'ab': 'xx'}]),
|
||||||
|
# (
|
||||||
|
# 'function(a, b, c){ /* ... */ }({"a": 12}, Null, [1, 2, 3])',
|
||||||
|
# [{}, {'a': 12}, [1, 2, 3]],
|
||||||
|
# ),
|
||||||
|
# ('{"a": 12, broken}{"c": 100}', [{'c': 100}]),
|
||||||
|
# ('[12,,,,21][211,,,][12,12][12,,,21]', [[12, 12]]),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_parse_json_objects(self, js, expected_py):
|
||||||
|
py_in_list = [js_obj_str_to_python(js)]
|
||||||
|
self.assertEqual(py_in_list, expected_py)
|
||||||
Reference in New Issue
Block a user