From ce4aaf6cad9c0a57f5f12ef80ec4e1b45bcf87d4 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 10 Mar 2024 15:33:23 +0100 Subject: [PATCH] [mod] comprehensive revision of the searxng_extra/update/ scripts - pylint all scripts - fix some errors reported by pyright - from searx.data import data_dir (Path.open) - fix import from pygments.formatters.html NOTE: none functional changes! Signed-off-by: Markus Heiser --- searxng_extra/update/update_ahmia_blacklist.py | 14 +++++--------- searxng_extra/update/update_currencies.py | 15 +++++---------- .../update/update_engine_descriptions.py | 7 +++++-- searxng_extra/update/update_engine_traits.py | 6 +++--- searxng_extra/update/update_external_bangs.py | 11 ++++------- searxng_extra/update/update_firefox_version.py | 15 ++++++--------- searxng_extra/update/update_locales.py | 14 +++++++------- searxng_extra/update/update_osm_keys_tags.py | 13 +++++-------- searxng_extra/update/update_pygments.py | 8 +++++--- searxng_extra/update/update_wikidata_units.py | 9 ++++++--- 10 files changed, 51 insertions(+), 61 deletions(-) diff --git a/searxng_extra/update/update_ahmia_blacklist.py b/searxng_extra/update/update_ahmia_blacklist.py index a11413f14..8bee4a808 100755 --- a/searxng_extra/update/update_ahmia_blacklist.py +++ b/searxng_extra/update/update_ahmia_blacklist.py @@ -11,11 +11,10 @@ Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data """ # pylint: disable=use-dict-literal -from os.path import join - import requests -from searx import searx_dir +from searx.data import data_dir +DATA_FILE = data_dir / 'ahmia_blacklist.txt' URL = 'https://ahmia.fi/blacklist/banned/' @@ -23,15 +22,12 @@ def fetch_ahmia_blacklist(): resp = requests.get(URL, timeout=3.0) if resp.status_code != 200: # pylint: disable=broad-exception-raised - raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) + raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) # type: ignore return resp.text.split() -def get_ahmia_blacklist_filename(): - return join(join(searx_dir, "data"), "ahmia_blacklist.txt") - - if __name__ == '__main__': blacklist = fetch_ahmia_blacklist() - with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f: + blacklist.sort() + with DATA_FILE.open("w", encoding='utf-8') as f: f.write('\n'.join(blacklist)) diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index c604e5420..a949c4696 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -15,12 +15,11 @@ import re import unicodedata import json -# set path -from os.path import join - -from searx import searx_dir from searx.locales import LOCALE_NAMES, locales_initialize from searx.engines import wikidata, set_loggers +from searx.data import data_dir + +DATA_FILE = data_dir / 'currencies.json' set_loggers(wikidata, 'wikidata') locales_initialize() @@ -133,10 +132,6 @@ def fetch_db(): return db -def get_filename(): - return join(join(searx_dir, "data"), "currencies.json") - - def main(): db = fetch_db() @@ -156,8 +151,8 @@ def main(): if len(db['names'][name]) == 1: db['names'][name] = db['names'][name][0] - with open(get_filename(), 'w', encoding='utf8') as f: - json.dump(db, f, ensure_ascii=False, indent=4) + with DATA_FILE.open('w', encoding='utf8') as f: + json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False) if __name__ == '__main__': diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py index 301ce798d..fc793ea2f 100755 --- a/searxng_extra/update/update_engine_descriptions.py +++ b/searxng_extra/update/update_engine_descriptions.py @@ -24,6 +24,9 @@ from searx import searx_dir from searx.utils import gen_useragent, detect_language import searx.search import searx.network +from searx.data import data_dir + +DATA_FILE = data_dir / 'engine_descriptions.json' set_loggers(wikidata, 'wikidata') locales_initialize() @@ -362,8 +365,8 @@ def main(): fetch_website_descriptions() output = get_output() - with open(get_engine_descriptions_filename(), 'w', encoding='utf8') as f: - f.write(json.dumps(output, indent=1, separators=(',', ':'), ensure_ascii=False)) + with DATA_FILE.open('w', encoding='utf8') as f: + f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False)) if __name__ == "__main__": diff --git a/searxng_extra/update/update_engine_traits.py b/searxng_extra/update/update_engine_traits.py index faab198d2..eb4484f62 100755 --- a/searxng_extra/update/update_engine_traits.py +++ b/searxng_extra/update/update_engine_traits.py @@ -144,9 +144,9 @@ def write_languages_file(sxng_tag_list): item = ( sxng_tag, - sxng_locale.get_language_name().title(), + sxng_locale.get_language_name().title(), # type: ignore sxng_locale.get_territory_name() or '', - sxng_locale.english_name.split(' (')[0], + sxng_locale.english_name.split(' (')[0] if sxng_locale.english_name else '', UnicodeEscape(flag), ) @@ -154,7 +154,7 @@ def write_languages_file(sxng_tag_list): language_codes = tuple(language_codes) - with open(languages_file, 'w', encoding='utf-8') as new_file: + with languages_file.open('w', encoding='utf-8') as new_file: file_content = "{header} {language_codes}{footer}".format( header=languages_file_header, language_codes=pformat(language_codes, width=120, indent=4)[1:-1], diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py index a12d5b2f8..9896d1d7d 100755 --- a/searxng_extra/update/update_external_bangs.py +++ b/searxng_extra/update/update_external_bangs.py @@ -8,20 +8,17 @@ from :py:obj:`BANGS_URL`. """ -from pathlib import Path import json - import httpx -from searx import searx_dir from searx.external_bang import LEAF_KEY +from searx.data import data_dir +DATA_FILE = data_dir / 'external_bangs.json' BANGS_URL = 'https://duckduckgo.com/bang.js' """JSON file which contains the bangs.""" -BANGS_DATA_FILE = Path(searx_dir) / 'data' / 'external_bangs.json' - HTTPS_COLON = 'https:' HTTP_COLON = 'http:' @@ -36,8 +33,8 @@ def main(): 'version': 0, 'trie': trie, } - with open(BANGS_DATA_FILE, 'w', encoding="utf8") as f: - json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4) + with DATA_FILE.open('w', encoding="utf8") as f: + json.dump(output, f, indent=4, sort_keys=True, ensure_ascii=False) def merge_when_no_leaf(node): diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py index 2e730764a..ad6d9b842 100755 --- a/searxng_extra/update/update_firefox_version.py +++ b/searxng_extra/update/update_firefox_version.py @@ -11,13 +11,14 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ... import json import re -from os.path import join from urllib.parse import urlparse, urljoin from packaging.version import parse import requests from lxml import html -from searx import searx_dir +from searx.data import data_dir + +DATA_FILE = data_dir / 'useragents.json' URL = 'https://ftp.mozilla.org/pub/firefox/releases/' RELEASE_PATH = '/pub/firefox/releases/' @@ -41,7 +42,7 @@ def fetch_firefox_versions(): resp = requests.get(URL, timeout=2.0) if resp.status_code != 200: # pylint: disable=broad-exception-raised - raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) + raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) # type: ignore dom = html.fromstring(resp.text) versions = [] @@ -74,11 +75,7 @@ def fetch_firefox_last_versions(): return result -def get_useragents_filename(): - return join(join(searx_dir, "data"), "useragents.json") - - if __name__ == '__main__': useragents["versions"] = fetch_firefox_last_versions() - with open(get_useragents_filename(), "w", encoding='utf-8') as f: - json.dump(useragents, f, indent=4, ensure_ascii=False) + with DATA_FILE.open('w', encoding='utf-8') as f: + json.dump(useragents, f, indent=4, sort_keys=True, ensure_ascii=False) diff --git a/searxng_extra/update/update_locales.py b/searxng_extra/update/update_locales.py index d97a5dd88..4b8f6222c 100755 --- a/searxng_extra/update/update_locales.py +++ b/searxng_extra/update/update_locales.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later """Update locale names in :origin:`searx/data/locales.json` used by :ref:`searx.locales` @@ -6,12 +7,12 @@ - :py:obj:`searx.locales.RTL_LOCALES` - :py:obj:`searx.locales.LOCALE_NAMES` """ +# pylint: disable=invalid-name from __future__ import annotations from typing import Set import json from pathlib import Path -import os import babel import babel.languages @@ -61,7 +62,7 @@ def main(): "RTL_LOCALES": sorted(RTL_LOCALES), } - with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f: + with LOCALE_DATA_FILE.open('w', encoding='utf-8') as f: json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False) @@ -84,11 +85,10 @@ def get_locale_descr(locale: babel.Locale, tr_locale): return native_language return native_language + ' (' + english_language + ')' - else: - result = native_language + ', ' + native_territory + ' (' + english_language - if english_territory: - return result + ', ' + english_territory + ')' - return result + ')' + result = native_language + ', ' + native_territory + ' (' + english_language + if english_territory: + return result + ', ' + english_territory + ')' + return result + ')' def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]: diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py index 75a55145d..d350756ec 100755 --- a/searxng_extra/update/update_osm_keys_tags.py +++ b/searxng_extra/update/update_osm_keys_tags.py @@ -45,13 +45,14 @@ Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ... import json import collections -from pathlib import Path -from searx import searx_dir from searx.network import set_timeout_for_thread from searx.engines import wikidata, set_loggers from searx.sxng_locales import sxng_locales from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK +from searx.data import data_dir + +DATA_FILE = data_dir / 'osm_keys_tags.json' set_loggers(wikidata, 'wikidata') @@ -203,10 +204,6 @@ def optimize_keys(data): return data -def get_osm_tags_filename(): - return Path(searx_dir) / "data" / "osm_keys_tags.json" - - if __name__ == '__main__': set_timeout_for_thread(60) @@ -214,5 +211,5 @@ if __name__ == '__main__': 'keys': optimize_keys(get_keys()), 'tags': optimize_tags(get_tags()), } - with open(get_osm_tags_filename(), 'w', encoding="utf8") as f: - json.dump(result, f, indent=4, ensure_ascii=False, sort_keys=True) + with DATA_FILE.open('w', encoding="utf8") as f: + json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False) diff --git a/searxng_extra/update/update_pygments.py b/searxng_extra/update/update_pygments.py index 69a8ee2db..5d11b28dc 100755 --- a/searxng_extra/update/update_pygments.py +++ b/searxng_extra/update/update_pygments.py @@ -1,14 +1,16 @@ #!/usr/bin/env python +# lint: pylint # SPDX-License-Identifier: AGPL-3.0-or-later """Update pygments style Call this script after each upgrade of pygments """ +# pylint: disable=too-few-public-methods from pathlib import Path import pygments -from pygments.formatters import HtmlFormatter +from pygments.formatters.html import HtmlFormatter from searx import searx_dir @@ -41,7 +43,7 @@ END_DARK_THEME = """ """ -class Formatter(HtmlFormatter): +class Formatter(HtmlFormatter): # pylint: disable=missing-class-docstring @property def _pre_style(self): return 'line-height: 100%;' @@ -67,5 +69,5 @@ def generat_css(light_style, dark_style) -> str: if __name__ == '__main__': print("update: %s" % LESS_FILE) - with open(LESS_FILE, 'w') as f: + with LESS_FILE.open('w', encoding='utf8') as f: f.write(generat_css('default', 'lightbulb')) diff --git a/searxng_extra/update/update_wikidata_units.py b/searxng_extra/update/update_wikidata_units.py index e999b6cfd..6a7ceb1b8 100755 --- a/searxng_extra/update/update_wikidata_units.py +++ b/searxng_extra/update/update_wikidata_units.py @@ -18,6 +18,9 @@ from os.path import join from searx import searx_dir from searx.engines import wikidata, set_loggers +from searx.data import data_dir + +DATA_FILE = data_dir / 'wikidata_units.json' set_loggers(wikidata, 'wikidata') @@ -58,9 +61,9 @@ def get_data(): def get_wikidata_units_filename(): - return join(join(searx_dir, "data"), "wikidata_units.json") + return join(join(searx_dir, "data"), "") if __name__ == '__main__': - with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: - json.dump(get_data(), f, indent=4, ensure_ascii=False) + with DATA_FILE.open('w', encoding="utf8") as f: + json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)