[mod] comprehensive revision of the searxng_extra/update/ scripts

- pylint all scripts
- fix some errors reported by pyright
- from searx.data import data_dir (Path.open)
- fix import from pygments.formatters.html

NOTE: none functional changes!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2024-03-10 15:33:23 +01:00 committed by Markus Heiser
parent 0ffec440b2
commit ce4aaf6cad
10 changed files with 51 additions and 61 deletions

View File

@ -11,11 +11,10 @@ Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
""" """
# pylint: disable=use-dict-literal # pylint: disable=use-dict-literal
from os.path import join
import requests import requests
from searx import searx_dir from searx.data import data_dir
DATA_FILE = data_dir / 'ahmia_blacklist.txt'
URL = 'https://ahmia.fi/blacklist/banned/' URL = 'https://ahmia.fi/blacklist/banned/'
@ -23,15 +22,12 @@ def fetch_ahmia_blacklist():
resp = requests.get(URL, timeout=3.0) resp = requests.get(URL, timeout=3.0)
if resp.status_code != 200: if resp.status_code != 200:
# pylint: disable=broad-exception-raised # pylint: disable=broad-exception-raised
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) # type: ignore
return resp.text.split() return resp.text.split()
def get_ahmia_blacklist_filename():
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
if __name__ == '__main__': if __name__ == '__main__':
blacklist = fetch_ahmia_blacklist() blacklist = fetch_ahmia_blacklist()
with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f: blacklist.sort()
with DATA_FILE.open("w", encoding='utf-8') as f:
f.write('\n'.join(blacklist)) f.write('\n'.join(blacklist))

View File

@ -15,12 +15,11 @@ import re
import unicodedata import unicodedata
import json import json
# set path
from os.path import join
from searx import searx_dir
from searx.locales import LOCALE_NAMES, locales_initialize from searx.locales import LOCALE_NAMES, locales_initialize
from searx.engines import wikidata, set_loggers from searx.engines import wikidata, set_loggers
from searx.data import data_dir
DATA_FILE = data_dir / 'currencies.json'
set_loggers(wikidata, 'wikidata') set_loggers(wikidata, 'wikidata')
locales_initialize() locales_initialize()
@ -133,10 +132,6 @@ def fetch_db():
return db return db
def get_filename():
return join(join(searx_dir, "data"), "currencies.json")
def main(): def main():
db = fetch_db() db = fetch_db()
@ -156,8 +151,8 @@ def main():
if len(db['names'][name]) == 1: if len(db['names'][name]) == 1:
db['names'][name] = db['names'][name][0] db['names'][name] = db['names'][name][0]
with open(get_filename(), 'w', encoding='utf8') as f: with DATA_FILE.open('w', encoding='utf8') as f:
json.dump(db, f, ensure_ascii=False, indent=4) json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -24,6 +24,9 @@ from searx import searx_dir
from searx.utils import gen_useragent, detect_language from searx.utils import gen_useragent, detect_language
import searx.search import searx.search
import searx.network import searx.network
from searx.data import data_dir
DATA_FILE = data_dir / 'engine_descriptions.json'
set_loggers(wikidata, 'wikidata') set_loggers(wikidata, 'wikidata')
locales_initialize() locales_initialize()
@ -362,8 +365,8 @@ def main():
fetch_website_descriptions() fetch_website_descriptions()
output = get_output() output = get_output()
with open(get_engine_descriptions_filename(), 'w', encoding='utf8') as f: with DATA_FILE.open('w', encoding='utf8') as f:
f.write(json.dumps(output, indent=1, separators=(',', ':'), ensure_ascii=False)) f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -144,9 +144,9 @@ def write_languages_file(sxng_tag_list):
item = ( item = (
sxng_tag, sxng_tag,
sxng_locale.get_language_name().title(), sxng_locale.get_language_name().title(), # type: ignore
sxng_locale.get_territory_name() or '', sxng_locale.get_territory_name() or '',
sxng_locale.english_name.split(' (')[0], sxng_locale.english_name.split(' (')[0] if sxng_locale.english_name else '',
UnicodeEscape(flag), UnicodeEscape(flag),
) )
@ -154,7 +154,7 @@ def write_languages_file(sxng_tag_list):
language_codes = tuple(language_codes) language_codes = tuple(language_codes)
with open(languages_file, 'w', encoding='utf-8') as new_file: with languages_file.open('w', encoding='utf-8') as new_file:
file_content = "{header} {language_codes}{footer}".format( file_content = "{header} {language_codes}{footer}".format(
header=languages_file_header, header=languages_file_header,
language_codes=pformat(language_codes, width=120, indent=4)[1:-1], language_codes=pformat(language_codes, width=120, indent=4)[1:-1],

View File

@ -8,20 +8,17 @@ from :py:obj:`BANGS_URL`.
""" """
from pathlib import Path
import json import json
import httpx import httpx
from searx import searx_dir
from searx.external_bang import LEAF_KEY from searx.external_bang import LEAF_KEY
from searx.data import data_dir
DATA_FILE = data_dir / 'external_bangs.json'
BANGS_URL = 'https://duckduckgo.com/bang.js' BANGS_URL = 'https://duckduckgo.com/bang.js'
"""JSON file which contains the bangs.""" """JSON file which contains the bangs."""
BANGS_DATA_FILE = Path(searx_dir) / 'data' / 'external_bangs.json'
HTTPS_COLON = 'https:' HTTPS_COLON = 'https:'
HTTP_COLON = 'http:' HTTP_COLON = 'http:'
@ -36,8 +33,8 @@ def main():
'version': 0, 'version': 0,
'trie': trie, 'trie': trie,
} }
with open(BANGS_DATA_FILE, 'w', encoding="utf8") as f: with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4) json.dump(output, f, indent=4, sort_keys=True, ensure_ascii=False)
def merge_when_no_leaf(node): def merge_when_no_leaf(node):

View File

@ -11,13 +11,14 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
import json import json
import re import re
from os.path import join
from urllib.parse import urlparse, urljoin from urllib.parse import urlparse, urljoin
from packaging.version import parse from packaging.version import parse
import requests import requests
from lxml import html from lxml import html
from searx import searx_dir from searx.data import data_dir
DATA_FILE = data_dir / 'useragents.json'
URL = 'https://ftp.mozilla.org/pub/firefox/releases/' URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
RELEASE_PATH = '/pub/firefox/releases/' RELEASE_PATH = '/pub/firefox/releases/'
@ -41,7 +42,7 @@ def fetch_firefox_versions():
resp = requests.get(URL, timeout=2.0) resp = requests.get(URL, timeout=2.0)
if resp.status_code != 200: if resp.status_code != 200:
# pylint: disable=broad-exception-raised # pylint: disable=broad-exception-raised
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) # type: ignore
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
versions = [] versions = []
@ -74,11 +75,7 @@ def fetch_firefox_last_versions():
return result return result
def get_useragents_filename():
return join(join(searx_dir, "data"), "useragents.json")
if __name__ == '__main__': if __name__ == '__main__':
useragents["versions"] = fetch_firefox_last_versions() useragents["versions"] = fetch_firefox_last_versions()
with open(get_useragents_filename(), "w", encoding='utf-8') as f: with DATA_FILE.open('w', encoding='utf-8') as f:
json.dump(useragents, f, indent=4, ensure_ascii=False) json.dump(useragents, f, indent=4, sort_keys=True, ensure_ascii=False)

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Update locale names in :origin:`searx/data/locales.json` used by """Update locale names in :origin:`searx/data/locales.json` used by
:ref:`searx.locales` :ref:`searx.locales`
@ -6,12 +7,12 @@
- :py:obj:`searx.locales.RTL_LOCALES` - :py:obj:`searx.locales.RTL_LOCALES`
- :py:obj:`searx.locales.LOCALE_NAMES` - :py:obj:`searx.locales.LOCALE_NAMES`
""" """
# pylint: disable=invalid-name
from __future__ import annotations from __future__ import annotations
from typing import Set from typing import Set
import json import json
from pathlib import Path from pathlib import Path
import os
import babel import babel
import babel.languages import babel.languages
@ -61,7 +62,7 @@ def main():
"RTL_LOCALES": sorted(RTL_LOCALES), "RTL_LOCALES": sorted(RTL_LOCALES),
} }
with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f: with LOCALE_DATA_FILE.open('w', encoding='utf-8') as f:
json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False) json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
@ -84,7 +85,6 @@ def get_locale_descr(locale: babel.Locale, tr_locale):
return native_language return native_language
return native_language + ' (' + english_language + ')' return native_language + ' (' + english_language + ')'
else:
result = native_language + ', ' + native_territory + ' (' + english_language result = native_language + ', ' + native_territory + ' (' + english_language
if english_territory: if english_territory:
return result + ', ' + english_territory + ')' return result + ', ' + english_territory + ')'

View File

@ -45,13 +45,14 @@ Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
import json import json
import collections import collections
from pathlib import Path
from searx import searx_dir
from searx.network import set_timeout_for_thread from searx.network import set_timeout_for_thread
from searx.engines import wikidata, set_loggers from searx.engines import wikidata, set_loggers
from searx.sxng_locales import sxng_locales from searx.sxng_locales import sxng_locales
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
from searx.data import data_dir
DATA_FILE = data_dir / 'osm_keys_tags.json'
set_loggers(wikidata, 'wikidata') set_loggers(wikidata, 'wikidata')
@ -203,10 +204,6 @@ def optimize_keys(data):
return data return data
def get_osm_tags_filename():
return Path(searx_dir) / "data" / "osm_keys_tags.json"
if __name__ == '__main__': if __name__ == '__main__':
set_timeout_for_thread(60) set_timeout_for_thread(60)
@ -214,5 +211,5 @@ if __name__ == '__main__':
'keys': optimize_keys(get_keys()), 'keys': optimize_keys(get_keys()),
'tags': optimize_tags(get_tags()), 'tags': optimize_tags(get_tags()),
} }
with open(get_osm_tags_filename(), 'w', encoding="utf8") as f: with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(result, f, indent=4, ensure_ascii=False, sort_keys=True) json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)

View File

@ -1,14 +1,16 @@
#!/usr/bin/env python #!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
"""Update pygments style """Update pygments style
Call this script after each upgrade of pygments Call this script after each upgrade of pygments
""" """
# pylint: disable=too-few-public-methods
from pathlib import Path from pathlib import Path
import pygments import pygments
from pygments.formatters import HtmlFormatter from pygments.formatters.html import HtmlFormatter
from searx import searx_dir from searx import searx_dir
@ -41,7 +43,7 @@ END_DARK_THEME = """
""" """
class Formatter(HtmlFormatter): class Formatter(HtmlFormatter): # pylint: disable=missing-class-docstring
@property @property
def _pre_style(self): def _pre_style(self):
return 'line-height: 100%;' return 'line-height: 100%;'
@ -67,5 +69,5 @@ def generat_css(light_style, dark_style) -> str:
if __name__ == '__main__': if __name__ == '__main__':
print("update: %s" % LESS_FILE) print("update: %s" % LESS_FILE)
with open(LESS_FILE, 'w') as f: with LESS_FILE.open('w', encoding='utf8') as f:
f.write(generat_css('default', 'lightbulb')) f.write(generat_css('default', 'lightbulb'))

View File

@ -18,6 +18,9 @@ from os.path import join
from searx import searx_dir from searx import searx_dir
from searx.engines import wikidata, set_loggers from searx.engines import wikidata, set_loggers
from searx.data import data_dir
DATA_FILE = data_dir / 'wikidata_units.json'
set_loggers(wikidata, 'wikidata') set_loggers(wikidata, 'wikidata')
@ -58,9 +61,9 @@ def get_data():
def get_wikidata_units_filename(): def get_wikidata_units_filename():
return join(join(searx_dir, "data"), "wikidata_units.json") return join(join(searx_dir, "data"), "")
if __name__ == '__main__': if __name__ == '__main__':
with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(get_data(), f, indent=4, ensure_ascii=False) json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)