mirror of
https://github.com/searxng/searxng.git
synced 2025-02-20 12:20:04 +00:00
Compare commits
14 Commits
010a631fa6
...
2acf7dcbf3
Author | SHA1 | Date | |
---|---|---|---|
|
2acf7dcbf3 | ||
|
738906358b | ||
|
fc8938c968 | ||
|
3b9e06fbd2 | ||
|
4934922156 | ||
|
bee39e4ec0 | ||
|
3f4e0b0859 | ||
|
a235c54f8c | ||
|
df3344e5d5 | ||
|
6697cb6950 | ||
|
2a421825be | ||
|
44f5c299be | ||
|
319b88c786 | ||
|
df97a9a4e0 |
@ -175,3 +175,4 @@ features or generally made searx better:
|
||||
- Daniel Kukula `<https://github.com/dkuku>`
|
||||
- Patrick Evans `https://github.com/holysoles`
|
||||
- Daniel Mowitz `<https://daniel.mowitz.rocks>`
|
||||
- SentientTapeDrive `<https://github.com/SentientTapeDrive>`_ `<https://thefubar.company>`_
|
||||
|
105
docs/dev/engines/online/kagi.rst
Normal file
105
docs/dev/engines/online/kagi.rst
Normal file
@ -0,0 +1,105 @@
|
||||
.. _kagi engine:
|
||||
|
||||
Kagi
|
||||
====
|
||||
|
||||
The Kagi engine scrapes search results from Kagi's HTML search interface.
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
Configuration
|
||||
~~~~~~~~~~~~
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: kagi
|
||||
engine: kagi
|
||||
shortcut: kg
|
||||
categories: [general, web]
|
||||
timeout: 4.0
|
||||
api_key: "YOUR-KAGI-TOKEN" # required
|
||||
about:
|
||||
website: https://kagi.com
|
||||
use_official_api: false
|
||||
require_api_key: true
|
||||
results: HTML
|
||||
|
||||
|
||||
Parameters
|
||||
~~~~~~~~~~
|
||||
|
||||
``api_key`` : required
|
||||
The Kagi API token used for authentication. Can be obtained from your Kagi account settings.
|
||||
|
||||
``pageno`` : optional
|
||||
The page number for paginated results. Defaults to 1.
|
||||
|
||||
Example Request
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
.. code:: python
|
||||
|
||||
params = {
|
||||
'api_key': 'YOUR-KAGI-TOKEN',
|
||||
'pageno': 1,
|
||||
'headers': {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'DNT': '1'
|
||||
}
|
||||
}
|
||||
query = 'test query'
|
||||
request_params = kagi.request(query, params)
|
||||
|
||||
Example Response
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
.. code:: python
|
||||
|
||||
[
|
||||
# Search result
|
||||
{
|
||||
'url': 'https://example.com/',
|
||||
'title': 'Example Title',
|
||||
'content': 'Example content snippet...',
|
||||
'domain': 'example.com'
|
||||
}
|
||||
]
|
||||
|
||||
Implementation
|
||||
-------------
|
||||
|
||||
The engine performs the following steps:
|
||||
|
||||
1. Constructs a GET request to ``https://kagi.com/html/search`` with:
|
||||
- ``q`` parameter for the search query
|
||||
- ``token`` parameter for authentication
|
||||
- ``batch`` parameter for pagination
|
||||
|
||||
2. Parses the HTML response using XPath to extract:
|
||||
- Result titles
|
||||
- URLs
|
||||
- Content snippets
|
||||
- Domain information
|
||||
|
||||
3. Handles various error cases:
|
||||
- 401: Invalid API token
|
||||
- 429: Rate limit exceeded
|
||||
- Other non-200 status codes
|
||||
|
||||
Dependencies
|
||||
-----------
|
||||
|
||||
- lxml: For HTML parsing and XPath evaluation
|
||||
- urllib.parse: For URL handling and encoding
|
||||
- searx.utils: For text extraction and XPath helpers
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
- The engine requires a valid Kagi API token to function
|
||||
- Results are scraped from Kagi's HTML interface rather than using an official API
|
||||
- Rate limiting may apply based on your Kagi subscription level
|
||||
- The engine sets specific browser-like headers to ensure reliable scraping
|
@ -33,7 +33,7 @@ class SXNGAnswerer(Answerer):
|
||||
|
||||
return AnswererInfo(
|
||||
name=gettext(self.__doc__),
|
||||
description=gettext(f"Compute {'/'.join(self.keywords)} of the arguments"),
|
||||
description=gettext("Compute {func} of the arguments".format(func='/'.join(self.keywords))),
|
||||
keywords=self.keywords,
|
||||
examples=["avg 123 548 2.04 24.2"],
|
||||
)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5359,6 +5359,7 @@
|
||||
"pt": "pa'anga",
|
||||
"ru": "тонганская паанга",
|
||||
"sk": "Tonžská paʻanga",
|
||||
"sl": "tongovska paanga",
|
||||
"sr": "тонганска панга",
|
||||
"sv": "Tongansk pa'anga",
|
||||
"th": "ปาอางา",
|
||||
@ -5990,6 +5991,23 @@
|
||||
"uk": "Східно-карибський долар",
|
||||
"vi": "Đô la Đông Caribe"
|
||||
},
|
||||
"XCG": {
|
||||
"ar": "الجلدر الكاريبي",
|
||||
"ca": "florí caribeny",
|
||||
"de": "Karibischer Gulden",
|
||||
"en": "Caribbean guilder",
|
||||
"eo": "Karibia guldeno",
|
||||
"es": "florín caribeño",
|
||||
"fr": "Florin caribéen",
|
||||
"hr": "Karipski gulden",
|
||||
"hu": "karibi forint",
|
||||
"it": "fiorino caraibico",
|
||||
"nl": "Caribische gulden",
|
||||
"pap": "Florin karibense",
|
||||
"pt": "Florim do Caribe",
|
||||
"ru": "Карибский гульден",
|
||||
"sl": "karibski goldinar"
|
||||
},
|
||||
"XDR": {
|
||||
"ar": "حقوق السحب الخاصة",
|
||||
"bg": "Специални права на тираж",
|
||||
@ -6109,6 +6127,7 @@
|
||||
"vi": "Franc CFP"
|
||||
},
|
||||
"XPT": {
|
||||
"ar": "استثمار البلاتين",
|
||||
"de": "Platinpreis",
|
||||
"en": "platinum as an investment"
|
||||
},
|
||||
@ -6375,6 +6394,7 @@
|
||||
"CLP$": "CLP",
|
||||
"COL$": "COP",
|
||||
"COU$": "COU",
|
||||
"Cg": "XCG",
|
||||
"D": "GMD",
|
||||
"DA": "DZD",
|
||||
"DEN": "MKD",
|
||||
@ -6439,6 +6459,7 @@
|
||||
"NT$": "TWD",
|
||||
"NZ$": "NZD",
|
||||
"Nfk": "ERN",
|
||||
"Noha heinen krasss": "EUR",
|
||||
"Nu": "BTN",
|
||||
"N₨": "NPR",
|
||||
"P": "BWP",
|
||||
@ -6469,6 +6490,7 @@
|
||||
"Ush": "UGX",
|
||||
"VT": "VUV",
|
||||
"WS$": "WST",
|
||||
"XCG": "XCG",
|
||||
"XDR": "XDR",
|
||||
"Z$": "ZWL",
|
||||
"ZK": "ZMW",
|
||||
@ -7041,6 +7063,8 @@
|
||||
"canadiske dollar": "CAD",
|
||||
"cape verde escudo": "CVE",
|
||||
"cape verdean escudo": "CVE",
|
||||
"caribbean guilder": "XCG",
|
||||
"caribische gulden": "XCG",
|
||||
"cayman adaları doları": "KYD",
|
||||
"cayman islands dollar": "KYD",
|
||||
"caymaneilandse dollar": "KYD",
|
||||
@ -8382,6 +8406,7 @@
|
||||
"filler": "HUF",
|
||||
"fillér": "HUF",
|
||||
"fiorino arubano": "AWG",
|
||||
"fiorino caraibico": "XCG",
|
||||
"fiorino delle antille olandesi": "ANG",
|
||||
"fiorino di aruba": "AWG",
|
||||
"fiorino ungherese": "HUF",
|
||||
@ -8393,6 +8418,7 @@
|
||||
"florim das antilhas holandesas": "ANG",
|
||||
"florim das antilhas neerlandesas": "ANG",
|
||||
"florim de aruba": "AWG",
|
||||
"florim do caribe": "XCG",
|
||||
"florim húngaro": "HUF",
|
||||
"florin": "AWG",
|
||||
"florin antiano": "ANG",
|
||||
@ -8406,14 +8432,17 @@
|
||||
"florin arubeño": "AWG",
|
||||
"florin arubez": "AWG",
|
||||
"florin arubiano": "AWG",
|
||||
"florin caribéen": "XCG",
|
||||
"florin d'aruba": "AWG",
|
||||
"florin de las antilhas neerlandesas": "ANG",
|
||||
"florin des antilles néerlandaises": "ANG",
|
||||
"florin d’aruba": "AWG",
|
||||
"florin hungaro": "HUF",
|
||||
"florin húngaro": "HUF",
|
||||
"florin karibense": "XCG",
|
||||
"florint": "HUF",
|
||||
"florint húngaro": "HUF",
|
||||
"florí caribeny": "XCG",
|
||||
"florí d'aruba": "AWG",
|
||||
"florí de les antilles neerlandeses": "ANG",
|
||||
"florí d’aruba": "AWG",
|
||||
@ -8421,6 +8450,7 @@
|
||||
"florín antillano neerlandés": "ANG",
|
||||
"florín arubeno": "AWG",
|
||||
"florín arubeño": "AWG",
|
||||
"florín caribeño": "XCG",
|
||||
"florín das antillas neerlandesas": "ANG",
|
||||
"florín de aruba": "AWG",
|
||||
"florín hungaro": "HUF",
|
||||
@ -9162,6 +9192,11 @@
|
||||
"kapverdské escudo": "CVE",
|
||||
"kapverdski eskudo": "CVE",
|
||||
"karbovanet": "UAH",
|
||||
"karibi forint": "XCG",
|
||||
"karibia guldeno": "XCG",
|
||||
"karibischer gulden": "XCG",
|
||||
"karibski goldinar": "XCG",
|
||||
"karipski gulden": "XCG",
|
||||
"karod": "NPR",
|
||||
"kartvela lario": "GEL",
|
||||
"katar riyal": "QAR",
|
||||
@ -11966,6 +12001,7 @@
|
||||
"tongaška pa’anga": "TOP",
|
||||
"tongos pa'anga": "TOP",
|
||||
"tongos paanga": "TOP",
|
||||
"tongovska paanga": "TOP",
|
||||
"tonška pa’anga": "TOP",
|
||||
"tonžská pa'anga": "TOP",
|
||||
"tonžská paanga": "TOP",
|
||||
@ -13202,6 +13238,7 @@
|
||||
"канадски долар": "CAD",
|
||||
"канадский доллар": "CAD",
|
||||
"канадський долар": "CAD",
|
||||
"карибский гульден": "XCG",
|
||||
"катар риалы": "QAR",
|
||||
"катарски риал": "QAR",
|
||||
"катарски ријал": "QAR",
|
||||
@ -14234,6 +14271,7 @@
|
||||
"שקל ישראלי חדש": "ILS",
|
||||
"؋": "AFN",
|
||||
"ارياري": "MGA",
|
||||
"استثمار البلاتين": "XPT",
|
||||
"استثمار الذهب": "XAU",
|
||||
"استثمار الفضة": "XAG",
|
||||
"الاستثمار في الذهب": "XAU",
|
||||
@ -14243,6 +14281,7 @@
|
||||
"البوليفيانو": "BOB",
|
||||
"البيزو الكوبي": "CUP",
|
||||
"البيزو المكسيكي": "MXN",
|
||||
"الجلدر الكاريبي": "XCG",
|
||||
"الجنية البريطاني": "GBP",
|
||||
"الجنية المصري": "EGP",
|
||||
"الجنيه الاسترليني": "GBP",
|
||||
|
File diff suppressed because one or more lines are too long
@ -19,11 +19,11 @@
|
||||
"cbz",
|
||||
"djvu",
|
||||
"doc",
|
||||
"docx",
|
||||
"epub",
|
||||
"fb2",
|
||||
"htm",
|
||||
"html",
|
||||
"jpg",
|
||||
"lit",
|
||||
"lrf",
|
||||
"mht",
|
||||
@ -42,6 +42,7 @@
|
||||
"newest_added",
|
||||
"oldest",
|
||||
"oldest_added",
|
||||
"random",
|
||||
"smallest"
|
||||
]
|
||||
},
|
||||
@ -195,7 +196,7 @@
|
||||
"es": "es-es",
|
||||
"et": "et-et",
|
||||
"eu": "eu-eu",
|
||||
"fa": "fa-fa",
|
||||
"fa": "prs-prs",
|
||||
"fi": "fi-fi",
|
||||
"fil": "fil-fil",
|
||||
"fr": "fr-fr",
|
||||
@ -203,12 +204,14 @@
|
||||
"gd": "gd-gd",
|
||||
"gl": "gl-gl",
|
||||
"gu": "gu-gu",
|
||||
"ha": "ha-latn",
|
||||
"he": "he-he",
|
||||
"hi": "hi-hi",
|
||||
"hr": "hr-hr",
|
||||
"hu": "hu-hu",
|
||||
"hy": "hy-hy",
|
||||
"id": "id-id",
|
||||
"ig": "ig-ig",
|
||||
"is": "is-is",
|
||||
"it": "it-it",
|
||||
"ja": "ja-ja",
|
||||
@ -218,6 +221,8 @@
|
||||
"kn": "kn-kn",
|
||||
"ko": "ko-ko",
|
||||
"kok": "kok-kok",
|
||||
"ku": "ku-arab",
|
||||
"ky": "ky-ky",
|
||||
"lb": "lb-lb",
|
||||
"lo": "lo-lo",
|
||||
"lt": "lt-lt",
|
||||
@ -225,6 +230,7 @@
|
||||
"mi": "mi-mi",
|
||||
"mk": "mk-mk",
|
||||
"ml": "ml-ml",
|
||||
"mn": "mn-cyrl-mn",
|
||||
"mr": "mr-mr",
|
||||
"ms": "ms-ms",
|
||||
"mt": "mt-mt",
|
||||
@ -232,22 +238,33 @@
|
||||
"ne": "ne-ne",
|
||||
"nl": "nl-nl",
|
||||
"nn": "nn-nn",
|
||||
"nso": "nso-nso",
|
||||
"or": "or-or",
|
||||
"pa_Arab": "pa-arab",
|
||||
"pa_Guru": "pa-guru",
|
||||
"pl": "pl-pl",
|
||||
"pt": "pt-br",
|
||||
"qu": "quz-quz",
|
||||
"quc": "quc-quc",
|
||||
"ro": "ro-ro",
|
||||
"ru": "ru-ru",
|
||||
"rw": "rw-rw",
|
||||
"sd_Arab": "sd-arab",
|
||||
"si": "si-si",
|
||||
"sk": "sk-sk",
|
||||
"sl": "sl-sl",
|
||||
"sq": "sq-sq",
|
||||
"sr_Cyrl": "sr-cyrl",
|
||||
"sr_Latn": "sr-latn",
|
||||
"sv": "sv-sv",
|
||||
"sw": "sw-sw",
|
||||
"ta": "ta-ta",
|
||||
"te": "te-te",
|
||||
"tg": "tg-cyrl",
|
||||
"th": "th-th",
|
||||
"ti": "ti-ti",
|
||||
"tk": "tk-tk",
|
||||
"tn": "tn-tn",
|
||||
"tr": "tr-tr",
|
||||
"tt": "tt-tt",
|
||||
"ug": "ug-ug",
|
||||
@ -255,9 +272,13 @@
|
||||
"ur": "ur-ur",
|
||||
"uz_Latn": "uz-latn",
|
||||
"vi": "vi-vi",
|
||||
"wo": "wo-wo",
|
||||
"xh": "xh-xh",
|
||||
"yo": "yo-yo",
|
||||
"zh": "zh-hans",
|
||||
"zh_Hans": "zh-hans",
|
||||
"zh_Hant": "zh-hant"
|
||||
"zh_Hant": "zh-hant",
|
||||
"zu": "zu-zu"
|
||||
},
|
||||
"regions": {
|
||||
"am-ET": "am-et",
|
||||
@ -473,12 +494,14 @@
|
||||
"kk-KZ": "kk-kz",
|
||||
"km-KH": "km-kh",
|
||||
"ko-KR": "ko-kr",
|
||||
"ky-KG": "ky-kg",
|
||||
"lb-LU": "lb-lu",
|
||||
"lo-LA": "lo-la",
|
||||
"lt-LT": "lt-lt",
|
||||
"lv-LV": "lv-lv",
|
||||
"mi-NZ": "mi-nz",
|
||||
"mk-MK": "mk-mk",
|
||||
"mn-MN": "mn-mn",
|
||||
"ms-BN": "ms-bn",
|
||||
"ms-MY": "ms-my",
|
||||
"ms-SG": "ms-sg",
|
||||
@ -512,6 +535,8 @@
|
||||
"ru-KZ": "ru-kz",
|
||||
"ru-RU": "ru-ru",
|
||||
"ru-UA": "ru-ua",
|
||||
"rw-RW": "rw-rw",
|
||||
"si-LK": "si-lk",
|
||||
"sk-SK": "sk-sk",
|
||||
"sl-SI": "sl-si",
|
||||
"sq-AL": "sq-al",
|
||||
@ -520,14 +545,23 @@
|
||||
"sr-RS": "sr-rs",
|
||||
"sv-FI": "sv-fi",
|
||||
"sv-SE": "sv-se",
|
||||
"sw-KE": "sw-ke",
|
||||
"sw-TZ": "sw-tz",
|
||||
"sw-UG": "sw-ug",
|
||||
"ta-LK": "ta-lk",
|
||||
"ta-SG": "ta-sg",
|
||||
"tg-TJ": "tg-tj",
|
||||
"th-TH": "th-th",
|
||||
"ti-ER": "ti-er",
|
||||
"tk-TM": "tk-tm",
|
||||
"tn-BW": "tn-bw",
|
||||
"tr-CY": "tr-cy",
|
||||
"tr-TR": "tr-tr",
|
||||
"uk-UA": "uk-ua",
|
||||
"ur-PK": "ur-pk",
|
||||
"vi-VN": "vi-vn",
|
||||
"wo-SN": "wo-sn",
|
||||
"yo-NG": "yo-ng",
|
||||
"zh-CN": "zh-cn",
|
||||
"zh-HK": "en-hk",
|
||||
"zh-MO": "zh-mo",
|
||||
@ -560,7 +594,7 @@
|
||||
"es": "es-es",
|
||||
"et": "et-et",
|
||||
"eu": "eu-eu",
|
||||
"fa": "fa-fa",
|
||||
"fa": "prs-prs",
|
||||
"fi": "fi-fi",
|
||||
"fil": "fil-fil",
|
||||
"fr": "fr-fr",
|
||||
@ -568,12 +602,14 @@
|
||||
"gd": "gd-gd",
|
||||
"gl": "gl-gl",
|
||||
"gu": "gu-gu",
|
||||
"ha": "ha-latn",
|
||||
"he": "he-he",
|
||||
"hi": "hi-hi",
|
||||
"hr": "hr-hr",
|
||||
"hu": "hu-hu",
|
||||
"hy": "hy-hy",
|
||||
"id": "id-id",
|
||||
"ig": "ig-ig",
|
||||
"is": "is-is",
|
||||
"it": "it-it",
|
||||
"ja": "ja-ja",
|
||||
@ -583,6 +619,8 @@
|
||||
"kn": "kn-kn",
|
||||
"ko": "ko-ko",
|
||||
"kok": "kok-kok",
|
||||
"ku": "ku-arab",
|
||||
"ky": "ky-ky",
|
||||
"lb": "lb-lb",
|
||||
"lo": "lo-lo",
|
||||
"lt": "lt-lt",
|
||||
@ -590,6 +628,7 @@
|
||||
"mi": "mi-mi",
|
||||
"mk": "mk-mk",
|
||||
"ml": "ml-ml",
|
||||
"mn": "mn-cyrl-mn",
|
||||
"mr": "mr-mr",
|
||||
"ms": "ms-ms",
|
||||
"mt": "mt-mt",
|
||||
@ -597,22 +636,33 @@
|
||||
"ne": "ne-ne",
|
||||
"nl": "nl-nl",
|
||||
"nn": "nn-nn",
|
||||
"nso": "nso-nso",
|
||||
"or": "or-or",
|
||||
"pa_Arab": "pa-arab",
|
||||
"pa_Guru": "pa-guru",
|
||||
"pl": "pl-pl",
|
||||
"pt": "pt-br",
|
||||
"qu": "quz-quz",
|
||||
"quc": "quc-quc",
|
||||
"ro": "ro-ro",
|
||||
"ru": "ru-ru",
|
||||
"rw": "rw-rw",
|
||||
"sd_Arab": "sd-arab",
|
||||
"si": "si-si",
|
||||
"sk": "sk-sk",
|
||||
"sl": "sl-sl",
|
||||
"sq": "sq-sq",
|
||||
"sr_Cyrl": "sr-cyrl",
|
||||
"sr_Latn": "sr-latn",
|
||||
"sv": "sv-sv",
|
||||
"sw": "sw-sw",
|
||||
"ta": "ta-ta",
|
||||
"te": "te-te",
|
||||
"tg": "tg-cyrl",
|
||||
"th": "th-th",
|
||||
"ti": "ti-ti",
|
||||
"tk": "tk-tk",
|
||||
"tn": "tn-tn",
|
||||
"tr": "tr-tr",
|
||||
"tt": "tt-tt",
|
||||
"ug": "ug-ug",
|
||||
@ -620,9 +670,13 @@
|
||||
"ur": "ur-ur",
|
||||
"uz_Latn": "uz-latn",
|
||||
"vi": "vi-vi",
|
||||
"wo": "wo-wo",
|
||||
"xh": "xh-xh",
|
||||
"yo": "yo-yo",
|
||||
"zh": "zh-hans",
|
||||
"zh_Hans": "zh-hans",
|
||||
"zh_Hant": "zh-hant"
|
||||
"zh_Hant": "zh-hant",
|
||||
"zu": "zu-zu"
|
||||
},
|
||||
"regions": {
|
||||
"am-ET": "am-et",
|
||||
@ -838,12 +892,14 @@
|
||||
"kk-KZ": "kk-kz",
|
||||
"km-KH": "km-kh",
|
||||
"ko-KR": "ko-kr",
|
||||
"ky-KG": "ky-kg",
|
||||
"lb-LU": "lb-lu",
|
||||
"lo-LA": "lo-la",
|
||||
"lt-LT": "lt-lt",
|
||||
"lv-LV": "lv-lv",
|
||||
"mi-NZ": "mi-nz",
|
||||
"mk-MK": "mk-mk",
|
||||
"mn-MN": "mn-mn",
|
||||
"ms-BN": "ms-bn",
|
||||
"ms-MY": "ms-my",
|
||||
"ms-SG": "ms-sg",
|
||||
@ -877,6 +933,8 @@
|
||||
"ru-KZ": "ru-kz",
|
||||
"ru-RU": "ru-ru",
|
||||
"ru-UA": "ru-ua",
|
||||
"rw-RW": "rw-rw",
|
||||
"si-LK": "si-lk",
|
||||
"sk-SK": "sk-sk",
|
||||
"sl-SI": "sl-si",
|
||||
"sq-AL": "sq-al",
|
||||
@ -885,14 +943,23 @@
|
||||
"sr-RS": "sr-rs",
|
||||
"sv-FI": "sv-fi",
|
||||
"sv-SE": "sv-se",
|
||||
"sw-KE": "sw-ke",
|
||||
"sw-TZ": "sw-tz",
|
||||
"sw-UG": "sw-ug",
|
||||
"ta-LK": "ta-lk",
|
||||
"ta-SG": "ta-sg",
|
||||
"tg-TJ": "tg-tj",
|
||||
"th-TH": "th-th",
|
||||
"ti-ER": "ti-er",
|
||||
"tk-TM": "tk-tm",
|
||||
"tn-BW": "tn-bw",
|
||||
"tr-CY": "tr-cy",
|
||||
"tr-TR": "tr-tr",
|
||||
"uk-UA": "uk-ua",
|
||||
"ur-PK": "ur-pk",
|
||||
"vi-VN": "vi-vn",
|
||||
"wo-SN": "wo-sn",
|
||||
"yo-NG": "yo-ng",
|
||||
"zh-CN": "zh-cn",
|
||||
"zh-HK": "en-hk",
|
||||
"zh-MO": "zh-mo",
|
||||
@ -925,7 +992,7 @@
|
||||
"es": "es-es",
|
||||
"et": "et-et",
|
||||
"eu": "eu-eu",
|
||||
"fa": "fa-fa",
|
||||
"fa": "prs-prs",
|
||||
"fi": "fi-fi",
|
||||
"fil": "fil-fil",
|
||||
"fr": "fr-fr",
|
||||
@ -933,12 +1000,14 @@
|
||||
"gd": "gd-gd",
|
||||
"gl": "gl-gl",
|
||||
"gu": "gu-gu",
|
||||
"ha": "ha-latn",
|
||||
"he": "he-he",
|
||||
"hi": "hi-hi",
|
||||
"hr": "hr-hr",
|
||||
"hu": "hu-hu",
|
||||
"hy": "hy-hy",
|
||||
"id": "id-id",
|
||||
"ig": "ig-ig",
|
||||
"is": "is-is",
|
||||
"it": "it-it",
|
||||
"ja": "ja-ja",
|
||||
@ -948,6 +1017,8 @@
|
||||
"kn": "kn-kn",
|
||||
"ko": "ko-ko",
|
||||
"kok": "kok-kok",
|
||||
"ku": "ku-arab",
|
||||
"ky": "ky-ky",
|
||||
"lb": "lb-lb",
|
||||
"lo": "lo-lo",
|
||||
"lt": "lt-lt",
|
||||
@ -955,6 +1026,7 @@
|
||||
"mi": "mi-mi",
|
||||
"mk": "mk-mk",
|
||||
"ml": "ml-ml",
|
||||
"mn": "mn-cyrl-mn",
|
||||
"mr": "mr-mr",
|
||||
"ms": "ms-ms",
|
||||
"mt": "mt-mt",
|
||||
@ -962,22 +1034,33 @@
|
||||
"ne": "ne-ne",
|
||||
"nl": "nl-nl",
|
||||
"nn": "nn-nn",
|
||||
"nso": "nso-nso",
|
||||
"or": "or-or",
|
||||
"pa_Arab": "pa-arab",
|
||||
"pa_Guru": "pa-guru",
|
||||
"pl": "pl-pl",
|
||||
"pt": "pt-br",
|
||||
"qu": "quz-quz",
|
||||
"quc": "quc-quc",
|
||||
"ro": "ro-ro",
|
||||
"ru": "ru-ru",
|
||||
"rw": "rw-rw",
|
||||
"sd_Arab": "sd-arab",
|
||||
"si": "si-si",
|
||||
"sk": "sk-sk",
|
||||
"sl": "sl-sl",
|
||||
"sq": "sq-sq",
|
||||
"sr_Cyrl": "sr-cyrl",
|
||||
"sr_Latn": "sr-latn",
|
||||
"sv": "sv-sv",
|
||||
"sw": "sw-sw",
|
||||
"ta": "ta-ta",
|
||||
"te": "te-te",
|
||||
"tg": "tg-cyrl",
|
||||
"th": "th-th",
|
||||
"ti": "ti-ti",
|
||||
"tk": "tk-tk",
|
||||
"tn": "tn-tn",
|
||||
"tr": "tr-tr",
|
||||
"tt": "tt-tt",
|
||||
"ug": "ug-ug",
|
||||
@ -985,9 +1068,13 @@
|
||||
"ur": "ur-ur",
|
||||
"uz_Latn": "uz-latn",
|
||||
"vi": "vi-vi",
|
||||
"wo": "wo-wo",
|
||||
"xh": "xh-xh",
|
||||
"yo": "yo-yo",
|
||||
"zh": "zh-hans",
|
||||
"zh_Hans": "zh-hans",
|
||||
"zh_Hant": "zh-hant"
|
||||
"zh_Hant": "zh-hant",
|
||||
"zu": "zu-zu"
|
||||
},
|
||||
"regions": {
|
||||
"am-ET": "am-et",
|
||||
@ -1203,12 +1290,14 @@
|
||||
"kk-KZ": "kk-kz",
|
||||
"km-KH": "km-kh",
|
||||
"ko-KR": "ko-kr",
|
||||
"ky-KG": "ky-kg",
|
||||
"lb-LU": "lb-lu",
|
||||
"lo-LA": "lo-la",
|
||||
"lt-LT": "lt-lt",
|
||||
"lv-LV": "lv-lv",
|
||||
"mi-NZ": "mi-nz",
|
||||
"mk-MK": "mk-mk",
|
||||
"mn-MN": "mn-mn",
|
||||
"ms-BN": "ms-bn",
|
||||
"ms-MY": "ms-my",
|
||||
"ms-SG": "ms-sg",
|
||||
@ -1242,6 +1331,8 @@
|
||||
"ru-KZ": "ru-kz",
|
||||
"ru-RU": "ru-ru",
|
||||
"ru-UA": "ru-ua",
|
||||
"rw-RW": "rw-rw",
|
||||
"si-LK": "si-lk",
|
||||
"sk-SK": "sk-sk",
|
||||
"sl-SI": "sl-si",
|
||||
"sq-AL": "sq-al",
|
||||
@ -1250,14 +1341,23 @@
|
||||
"sr-RS": "sr-rs",
|
||||
"sv-FI": "sv-fi",
|
||||
"sv-SE": "sv-se",
|
||||
"sw-KE": "sw-ke",
|
||||
"sw-TZ": "sw-tz",
|
||||
"sw-UG": "sw-ug",
|
||||
"ta-LK": "ta-lk",
|
||||
"ta-SG": "ta-sg",
|
||||
"tg-TJ": "tg-tj",
|
||||
"th-TH": "th-th",
|
||||
"ti-ER": "ti-er",
|
||||
"tk-TM": "tk-tm",
|
||||
"tn-BW": "tn-bw",
|
||||
"tr-CY": "tr-cy",
|
||||
"tr-TR": "tr-tr",
|
||||
"uk-UA": "uk-ua",
|
||||
"ur-PK": "ur-pk",
|
||||
"vi-VN": "vi-vn",
|
||||
"wo-SN": "wo-sn",
|
||||
"yo-NG": "yo-ng",
|
||||
"zh-CN": "en-hk",
|
||||
"zh-HK": "en-hk",
|
||||
"zh-MO": "zh-mo",
|
||||
@ -1290,7 +1390,7 @@
|
||||
"es": "es-es",
|
||||
"et": "et-et",
|
||||
"eu": "eu-eu",
|
||||
"fa": "fa-fa",
|
||||
"fa": "prs-prs",
|
||||
"fi": "fi-fi",
|
||||
"fil": "fil-fil",
|
||||
"fr": "fr-fr",
|
||||
@ -1298,12 +1398,14 @@
|
||||
"gd": "gd-gd",
|
||||
"gl": "gl-gl",
|
||||
"gu": "gu-gu",
|
||||
"ha": "ha-latn",
|
||||
"he": "he-he",
|
||||
"hi": "hi-hi",
|
||||
"hr": "hr-hr",
|
||||
"hu": "hu-hu",
|
||||
"hy": "hy-hy",
|
||||
"id": "id-id",
|
||||
"ig": "ig-ig",
|
||||
"is": "is-is",
|
||||
"it": "it-it",
|
||||
"ja": "ja-ja",
|
||||
@ -1313,6 +1415,8 @@
|
||||
"kn": "kn-kn",
|
||||
"ko": "ko-ko",
|
||||
"kok": "kok-kok",
|
||||
"ku": "ku-arab",
|
||||
"ky": "ky-ky",
|
||||
"lb": "lb-lb",
|
||||
"lo": "lo-lo",
|
||||
"lt": "lt-lt",
|
||||
@ -1320,6 +1424,7 @@
|
||||
"mi": "mi-mi",
|
||||
"mk": "mk-mk",
|
||||
"ml": "ml-ml",
|
||||
"mn": "mn-cyrl-mn",
|
||||
"mr": "mr-mr",
|
||||
"ms": "ms-ms",
|
||||
"mt": "mt-mt",
|
||||
@ -1327,22 +1432,33 @@
|
||||
"ne": "ne-ne",
|
||||
"nl": "nl-nl",
|
||||
"nn": "nn-nn",
|
||||
"nso": "nso-nso",
|
||||
"or": "or-or",
|
||||
"pa_Arab": "pa-arab",
|
||||
"pa_Guru": "pa-guru",
|
||||
"pl": "pl-pl",
|
||||
"pt": "pt-br",
|
||||
"qu": "quz-quz",
|
||||
"quc": "quc-quc",
|
||||
"ro": "ro-ro",
|
||||
"ru": "ru-ru",
|
||||
"rw": "rw-rw",
|
||||
"sd_Arab": "sd-arab",
|
||||
"si": "si-si",
|
||||
"sk": "sk-sk",
|
||||
"sl": "sl-sl",
|
||||
"sq": "sq-sq",
|
||||
"sr_Cyrl": "sr-cyrl",
|
||||
"sr_Latn": "sr-latn",
|
||||
"sv": "sv-sv",
|
||||
"sw": "sw-sw",
|
||||
"ta": "ta-ta",
|
||||
"te": "te-te",
|
||||
"tg": "tg-cyrl",
|
||||
"th": "th-th",
|
||||
"ti": "ti-ti",
|
||||
"tk": "tk-tk",
|
||||
"tn": "tn-tn",
|
||||
"tr": "tr-tr",
|
||||
"tt": "tt-tt",
|
||||
"ug": "ug-ug",
|
||||
@ -1350,9 +1466,13 @@
|
||||
"ur": "ur-ur",
|
||||
"uz_Latn": "uz-latn",
|
||||
"vi": "vi-vi",
|
||||
"wo": "wo-wo",
|
||||
"xh": "xh-xh",
|
||||
"yo": "yo-yo",
|
||||
"zh": "zh-hans",
|
||||
"zh_Hans": "zh-hans",
|
||||
"zh_Hant": "zh-hant"
|
||||
"zh_Hant": "zh-hant",
|
||||
"zu": "zu-zu"
|
||||
},
|
||||
"regions": {
|
||||
"am-ET": "am-et",
|
||||
@ -1568,12 +1688,14 @@
|
||||
"kk-KZ": "kk-kz",
|
||||
"km-KH": "km-kh",
|
||||
"ko-KR": "ko-kr",
|
||||
"ky-KG": "ky-kg",
|
||||
"lb-LU": "lb-lu",
|
||||
"lo-LA": "lo-la",
|
||||
"lt-LT": "lt-lt",
|
||||
"lv-LV": "lv-lv",
|
||||
"mi-NZ": "mi-nz",
|
||||
"mk-MK": "mk-mk",
|
||||
"mn-MN": "mn-mn",
|
||||
"ms-BN": "ms-bn",
|
||||
"ms-MY": "ms-my",
|
||||
"ms-SG": "ms-sg",
|
||||
@ -1607,6 +1729,8 @@
|
||||
"ru-KZ": "ru-kz",
|
||||
"ru-RU": "ru-ru",
|
||||
"ru-UA": "ru-ua",
|
||||
"rw-RW": "rw-rw",
|
||||
"si-LK": "si-lk",
|
||||
"sk-SK": "sk-sk",
|
||||
"sl-SI": "sl-si",
|
||||
"sq-AL": "sq-al",
|
||||
@ -1615,14 +1739,23 @@
|
||||
"sr-RS": "sr-rs",
|
||||
"sv-FI": "sv-fi",
|
||||
"sv-SE": "sv-se",
|
||||
"sw-KE": "sw-ke",
|
||||
"sw-TZ": "sw-tz",
|
||||
"sw-UG": "sw-ug",
|
||||
"ta-LK": "ta-lk",
|
||||
"ta-SG": "ta-sg",
|
||||
"tg-TJ": "tg-tj",
|
||||
"th-TH": "th-th",
|
||||
"ti-ER": "ti-er",
|
||||
"tk-TM": "tk-tm",
|
||||
"tn-BW": "tn-bw",
|
||||
"tr-CY": "tr-cy",
|
||||
"tr-TR": "tr-tr",
|
||||
"uk-UA": "uk-ua",
|
||||
"ur-PK": "ur-pk",
|
||||
"vi-VN": "vi-vn",
|
||||
"wo-SN": "wo-sn",
|
||||
"yo-NG": "yo-ng",
|
||||
"zh-CN": "zh-cn",
|
||||
"zh-HK": "en-hk",
|
||||
"zh-MO": "zh-mo",
|
||||
|
@ -2574,11 +2574,6 @@
|
||||
"symbol": "ʰ",
|
||||
"to_si_factor": 0.2617993878
|
||||
},
|
||||
"Q11644875": {
|
||||
"si_name": "Q12438",
|
||||
"symbol": "Tf",
|
||||
"to_si_factor": 9806.65
|
||||
},
|
||||
"Q1165639": {
|
||||
"si_name": "Q89992008",
|
||||
"symbol": "daraf",
|
||||
@ -3179,6 +3174,21 @@
|
||||
"symbol": "₿",
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q131824443": {
|
||||
"si_name": "Q12438",
|
||||
"symbol": "tf",
|
||||
"to_si_factor": 9806.65
|
||||
},
|
||||
"Q131824444": {
|
||||
"si_name": "Q12438",
|
||||
"symbol": "LTf",
|
||||
"to_si_factor": 9964.01641818352
|
||||
},
|
||||
"Q131824445": {
|
||||
"si_name": "Q12438",
|
||||
"symbol": "STf",
|
||||
"to_si_factor": 8896.443230521
|
||||
},
|
||||
"Q1322380": {
|
||||
"si_name": "Q11574",
|
||||
"symbol": "Ts",
|
||||
@ -3420,7 +3430,7 @@
|
||||
"to_si_factor": null
|
||||
},
|
||||
"Q1628990": {
|
||||
"si_name": "Q12874593",
|
||||
"si_name": "Q12831618",
|
||||
"symbol": "hph",
|
||||
"to_si_factor": 745.7
|
||||
},
|
||||
|
148
searx/engines/kagi.py
Normal file
148
searx/engines/kagi.py
Normal file
@ -0,0 +1,148 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Kagi Search
|
||||
Scrapes Kagi's HTML search results.
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from searx import logger
|
||||
|
||||
logger = logger.getChild('kagi')
|
||||
|
||||
about = {
|
||||
"website": 'https://kagi.com',
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": True,
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
categories = ['general', 'web']
|
||||
paging = True
|
||||
time_range_support = False
|
||||
safesearch = False
|
||||
|
||||
base_url = 'https://kagi.com/html/search'
|
||||
|
||||
api_key = None # Set in settings.yml
|
||||
|
||||
# Global cookie storage for Kagi authentication
|
||||
kagi_cookies = {'kagi_session': None, '_kagi_search_': None}
|
||||
|
||||
|
||||
def request(query, params):
|
||||
if not api_key:
|
||||
raise SearxEngineAPIException('missing Kagi API key')
|
||||
|
||||
page = params['pageno']
|
||||
|
||||
if 'cookies' not in params:
|
||||
params['cookies'] = {}
|
||||
params['cookies'].update(kagi_cookies)
|
||||
|
||||
if kagi_cookies['kagi_session'] and kagi_cookies['_kagi_search_']:
|
||||
logger.debug(
|
||||
"Using Kagi cookies for authentication - session: %s, search: %s",
|
||||
kagi_cookies['kagi_session'],
|
||||
kagi_cookies['_kagi_search_'],
|
||||
)
|
||||
search_url = base_url + '?' + urlencode({'q': query, 'batch': page})
|
||||
else:
|
||||
missing = []
|
||||
if not kagi_cookies['kagi_session']:
|
||||
missing.append('kagi_session')
|
||||
if not kagi_cookies['_kagi_search_']:
|
||||
missing.append('_kagi_search_')
|
||||
logger.debug("Missing cookies %s, using API key for initial authentication", missing)
|
||||
search_url = base_url + '?' + urlencode({'q': query, 'token': api_key, 'batch': page})
|
||||
|
||||
params['url'] = search_url
|
||||
params['headers'].update(
|
||||
{
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
||||
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||||
'Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'DNT': '1',
|
||||
}
|
||||
)
|
||||
params['allow_redirects'] = True
|
||||
params['verify'] = True
|
||||
params['max_redirects'] = 1
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
if 'set-cookie' in resp.headers:
|
||||
cookies = resp.headers.get_list('set-cookie')
|
||||
for cookie in cookies:
|
||||
try:
|
||||
cookie_parts = cookie.split('=', 1)
|
||||
if len(cookie_parts) != 2:
|
||||
continue
|
||||
|
||||
name = cookie_parts[0].strip()
|
||||
value = cookie_parts[1].split(';')[0].strip()
|
||||
|
||||
if name == 'kagi_session':
|
||||
if value != kagi_cookies['kagi_session']:
|
||||
kagi_cookies['kagi_session'] = value
|
||||
resp.search_params['cookies']['kagi_session'] = value
|
||||
logger.debug("Updated kagi_session cookie: %s", value)
|
||||
elif name == '_kagi_search_': # Exact match for search cookie
|
||||
if value != kagi_cookies['_kagi_search_']:
|
||||
kagi_cookies['_kagi_search_'] = value
|
||||
resp.search_params['cookies']['_kagi_search_'] = value
|
||||
logger.debug("Updated _kagi_search_ cookie: %s", value)
|
||||
except ValueError as e:
|
||||
logger.warning("Failed to parse Kagi cookie: %s", str(e))
|
||||
|
||||
logger.debug(
|
||||
"Global Kagi cookies - session: %s, search: %s", kagi_cookies['kagi_session'], kagi_cookies['_kagi_search_']
|
||||
)
|
||||
logger.debug(
|
||||
"Request Kagi cookies - session: %s, search: %s",
|
||||
resp.search_params['cookies'].get('kagi_session'),
|
||||
resp.search_params['cookies'].get('_kagi_search_'),
|
||||
)
|
||||
|
||||
if resp.status_code == 401:
|
||||
kagi_cookies['kagi_session'] = None
|
||||
kagi_cookies['_kagi_search_'] = None
|
||||
resp.search_params['cookies'].clear()
|
||||
logger.debug("Cleared invalid Kagi cookies")
|
||||
|
||||
raise SearxEngineAPIException('Invalid Kagi authentication')
|
||||
if resp.status_code == 429:
|
||||
raise SearxEngineAPIException('Kagi rate limit exceeded')
|
||||
if resp.status_code != 200:
|
||||
raise SearxEngineAPIException(f'Unexpected HTTP status code: {resp.status_code}')
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
for result in eval_xpath_list(dom, '//div[contains(@class, "_0_SRI")]'):
|
||||
try:
|
||||
title_tag = eval_xpath(result, './/a[contains(@class, "__sri_title_link")]')[0]
|
||||
title = extract_text(title_tag)
|
||||
url = title_tag.get('href')
|
||||
content_tag = eval_xpath(result, './/div[contains(@class, "__sri-desc")]')
|
||||
content = extract_text(content_tag[0]) if content_tag else ''
|
||||
domain = eval_xpath(result, './/span[contains(@class, "host")]/text()')
|
||||
if domain:
|
||||
domain = domain[0]
|
||||
|
||||
search_result = {'url': url, 'title': title, 'content': content, 'domain': domain}
|
||||
results.append(search_result)
|
||||
|
||||
except (IndexError, KeyError):
|
||||
continue
|
||||
|
||||
return results
|
@ -110,6 +110,28 @@ class Result(msgspec.Struct, kw_only=True):
|
||||
|
||||
return iter(self.__struct_fields__)
|
||||
|
||||
def as_dict(self):
|
||||
return {f: getattr(self, f) for f in self.__struct_fields__}
|
||||
|
||||
|
||||
class MainResult(Result): # pylint: disable=missing-class-docstring
|
||||
|
||||
# open_group and close_group should not manged in the Result class (we should rop it from here!)
|
||||
open_group: bool = False
|
||||
close_group: bool = False
|
||||
|
||||
title: str = ""
|
||||
"""Link title of the result item."""
|
||||
|
||||
content: str = ""
|
||||
"""Extract or description of the result item"""
|
||||
|
||||
img_src: str = ""
|
||||
"""URL of a image that is displayed in the result item."""
|
||||
|
||||
thumbnail: str = ""
|
||||
"""URL of a thumbnail that is displayed in the result item."""
|
||||
|
||||
|
||||
class LegacyResult(dict):
|
||||
"""A wrapper around a legacy result item. The SearXNG core uses this class
|
||||
@ -130,10 +152,12 @@ class LegacyResult(dict):
|
||||
UNSET = object()
|
||||
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
|
||||
|
||||
def as_dict(self):
|
||||
return self
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
self.__dict__ = self
|
||||
|
||||
# Init fields with defaults / compare with defaults of the fields in class Result
|
||||
self.engine = self.get("engine", "")
|
||||
|
@ -2484,6 +2484,19 @@ engines:
|
||||
shortcut: pgo
|
||||
disabled: true
|
||||
|
||||
- name: kagi
|
||||
engine: kagi
|
||||
shortcut: kg
|
||||
categories: [general, web]
|
||||
disabled: true
|
||||
timeout: 4.0
|
||||
api_key: ""
|
||||
about:
|
||||
website: https://kagi.com
|
||||
use_official_api: false
|
||||
require_api_key: true
|
||||
results: HTML
|
||||
|
||||
# Doku engine lets you access to any Doku wiki instance:
|
||||
# A public one or a privete/corporate one.
|
||||
# - name: ubuntuwiki
|
||||
|
@ -1,6 +1,6 @@
|
||||
<table>
|
||||
{% for key, value in result.items() %}
|
||||
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions', 'pretty_url', 'parsed_url'] %}
|
||||
{% if key in ['engine', 'engines', 'template', 'score', 'category', 'positions', 'parsed_url'] %}
|
||||
{% continue %}
|
||||
{% endif %}
|
||||
<tr>
|
||||
|
@ -694,9 +694,7 @@ def search():
|
||||
if 'title' in result and result['title']:
|
||||
result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
|
||||
|
||||
if 'url' in result:
|
||||
result['pretty_url'] = webutils.prettify_url(result['url'])
|
||||
if result.get('publishedDate'): # do not try to get a date from an empty string or a None type
|
||||
if getattr(result, 'publishedDate', None): # do not try to get a date from an empty string or a None type
|
||||
try: # test if publishedDate >= 1900 (datetime module bug)
|
||||
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
|
||||
except ValueError:
|
||||
@ -706,15 +704,15 @@ def search():
|
||||
|
||||
# set result['open_group'] = True when the template changes from the previous result
|
||||
# set result['close_group'] = True when the template changes on the next result
|
||||
if current_template != result.get('template'):
|
||||
result['open_group'] = True
|
||||
if current_template != result.template:
|
||||
result.open_group = True
|
||||
if previous_result:
|
||||
previous_result['close_group'] = True # pylint: disable=unsupported-assignment-operation
|
||||
current_template = result.get('template')
|
||||
previous_result.close_group = True # pylint: disable=unsupported-assignment-operation
|
||||
current_template = result.template
|
||||
previous_result = result
|
||||
|
||||
if previous_result:
|
||||
previous_result['close_group'] = True
|
||||
previous_result.close_group = True
|
||||
|
||||
# 4.a RSS
|
||||
|
||||
|
@ -123,17 +123,18 @@ def write_csv_response(csv: CSVWriter, rc: ResultContainer) -> None: # pylint:
|
||||
|
||||
"""
|
||||
|
||||
results = rc.get_ordered_results()
|
||||
keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type')
|
||||
csv.writerow(keys)
|
||||
|
||||
for row in results:
|
||||
for res in rc.get_ordered_results():
|
||||
row = res.as_dict()
|
||||
row['host'] = row['parsed_url'].netloc
|
||||
row['type'] = 'result'
|
||||
csv.writerow([row.get(key, '') for key in keys])
|
||||
|
||||
for a in rc.answers:
|
||||
row = {'title': a, 'type': 'answer'}
|
||||
row = a.as_dict()
|
||||
row['host'] = row['parsed_url'].netloc
|
||||
csv.writerow([row.get(key, '') for key in keys])
|
||||
|
||||
for a in rc.suggestions:
|
||||
@ -158,18 +159,17 @@ class JSONEncoder(json.JSONEncoder): # pylint: disable=missing-class-docstring
|
||||
|
||||
def get_json_response(sq: SearchQuery, rc: ResultContainer) -> str:
|
||||
"""Returns the JSON string of the results to a query (``application/json``)"""
|
||||
results = rc.number_of_results
|
||||
x = {
|
||||
data = {
|
||||
'query': sq.query,
|
||||
'number_of_results': results,
|
||||
'results': rc.get_ordered_results(),
|
||||
'answers': list(rc.answers),
|
||||
'number_of_results': rc.number_of_results,
|
||||
'results': [_.as_dict() for _ in rc.get_ordered_results()],
|
||||
'answers': [_.as_dict() for _ in rc.answers],
|
||||
'corrections': list(rc.corrections),
|
||||
'infoboxes': rc.infoboxes,
|
||||
'suggestions': list(rc.suggestions),
|
||||
'unresponsive_engines': get_translated_errors(rc.unresponsive_engines),
|
||||
}
|
||||
response = json.dumps(x, cls=JSONEncoder)
|
||||
response = json.dumps(data, cls=JSONEncoder)
|
||||
return response
|
||||
|
||||
|
||||
|
152
tests/unit/test_engine_kagi.py
Normal file
152
tests/unit/test_engine_kagi.py
Normal file
@ -0,0 +1,152 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
import mock
|
||||
from lxml import html
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
from searx.engines import kagi
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
from tests import SearxTestCase
|
||||
|
||||
|
||||
class TestKagiEngine(SearxTestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.test_html = """
|
||||
<div class="_0_main-search-results">
|
||||
<div class="_0_SRI search-result">
|
||||
<div class="_0_TITLE __sri-title">
|
||||
<h3 class="__sri-title-box">
|
||||
<a class="__sri_title_link _ext_t" href="https://example1.com">Result 1</a>
|
||||
</h3>
|
||||
</div>
|
||||
<div class="__sri-url-box">
|
||||
<span class="host">example1.com</span>
|
||||
</div>
|
||||
<div class="__sri-body">
|
||||
<div class="__sri-desc">Content 1</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="_0_SRI search-result">
|
||||
<div class="_0_TITLE __sri-title">
|
||||
<h3 class="__sri-title-box">
|
||||
<a class="__sri_title_link _ext_t" href="https://example2.com">Result 2</a>
|
||||
</h3>
|
||||
</div>
|
||||
<div class="__sri-url-box">
|
||||
<span class="host">example2.com</span>
|
||||
</div>
|
||||
<div class="__sri-body">
|
||||
<div class="__sri-desc">Content 2</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
def test_request(self):
|
||||
# Test with missing API token
|
||||
kagi.token = None
|
||||
params = {'pageno': 1, 'headers': {}}
|
||||
self.assertRaises(SearxEngineAPIException, kagi.request, 'test query', params)
|
||||
|
||||
# Test with valid API token but no cookie
|
||||
kagi.token = 'test_token'
|
||||
params = {'pageno': 1, 'headers': {}, 'cookies': {}}
|
||||
query = 'test query'
|
||||
request_params = kagi.request(query, params)
|
||||
|
||||
self.assertIn('url', request_params)
|
||||
self.assertIn('token=test_token', request_params['url'])
|
||||
self.assertIn('q=test+query', request_params['url'])
|
||||
self.assertEqual(request_params['max_redirects'], 1)
|
||||
self.assertTrue(request_params['allow_redirects'])
|
||||
|
||||
# Test with both required cookies
|
||||
params['cookies']['kagi_session'] = 'test_session'
|
||||
params['cookies']['_kagi_search_'] = 'test_search'
|
||||
request_params = kagi.request(query, params)
|
||||
self.assertNotIn('token=', request_params['url'])
|
||||
self.assertIn('q=test+query', request_params['url'])
|
||||
self.assertEqual(request_params['max_redirects'], 1)
|
||||
self.assertTrue(request_params['allow_redirects'])
|
||||
|
||||
# Test with missing search cookie
|
||||
params['cookies'] = {'kagi_session': 'test_session'}
|
||||
request_params = kagi.request(query, params)
|
||||
self.assertIn('token=', request_params['url'])
|
||||
|
||||
# Test with missing session cookie
|
||||
params['cookies'] = {'_kagi_search_': 'test_search'}
|
||||
request_params = kagi.request(query, params)
|
||||
self.assertIn('token=', request_params['url'])
|
||||
|
||||
# Test pagination
|
||||
params['pageno'] = 2
|
||||
request_params = kagi.request(query, params)
|
||||
self.assertIn('batch=2', request_params['url'])
|
||||
self.assertEqual(request_params['max_redirects'], 1)
|
||||
|
||||
def test_response(self):
|
||||
def verify_cookie_capture(cookie_headers, expected_session, expected_search):
|
||||
mock_headers = mock.Mock()
|
||||
mock_headers.get_list = mock.Mock(return_value=cookie_headers)
|
||||
mock_headers.__contains__ = mock.Mock(return_value=True)
|
||||
|
||||
response = mock.Mock(
|
||||
text=self.test_html, status_code=200, headers=mock_headers, search_params={'cookies': {}}
|
||||
)
|
||||
results = kagi.response(response)
|
||||
|
||||
self.assertEqual(response.search_params['cookies'].get('kagi_session'), expected_session)
|
||||
self.assertEqual(response.search_params['cookies'].get('_kagi_search_'), expected_search)
|
||||
return results
|
||||
|
||||
# Test cookie capture with standard attributes
|
||||
results = verify_cookie_capture(
|
||||
['kagi_session=test_session; Path=/; HttpOnly', '_kagi_search_=test_search; Path=/; HttpOnly'],
|
||||
'test_session',
|
||||
'test_search',
|
||||
)
|
||||
|
||||
# Test cookie capture with additional attributes
|
||||
results = verify_cookie_capture(
|
||||
[
|
||||
'kagi_session=test_session2; Path=/; HttpOnly; SameSite=Lax',
|
||||
'_kagi_search_=test_search2; Domain=.kagi.com; Path=/; SameSite=Lax',
|
||||
],
|
||||
'test_session2',
|
||||
'test_search2',
|
||||
)
|
||||
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 2) # 2 search results
|
||||
|
||||
# Check first result
|
||||
self.assertEqual(results[0]['title'], 'Result 1')
|
||||
self.assertEqual(results[0]['url'], 'https://example1.com')
|
||||
self.assertEqual(results[0]['content'], 'Content 1')
|
||||
self.assertEqual(results[0]['domain'], 'example1.com')
|
||||
|
||||
# Check second result
|
||||
self.assertEqual(results[1]['title'], 'Result 2')
|
||||
self.assertEqual(results[1]['url'], 'https://example2.com')
|
||||
self.assertEqual(results[1]['content'], 'Content 2')
|
||||
self.assertEqual(results[1]['domain'], 'example2.com')
|
||||
|
||||
def test_response_error_handling(self):
|
||||
# Test invalid token/cookie response
|
||||
response = mock.Mock(
|
||||
text='', status_code=401, search_params={'cookies': {'kagi_session': 'invalid_session'}}, headers={}
|
||||
)
|
||||
self.assertRaises(SearxEngineAPIException, kagi.response, response)
|
||||
# Verify invalid cookie was cleared
|
||||
self.assertNotIn('kagi_session', response.search_params['cookies'])
|
||||
|
||||
# Test rate limit response
|
||||
response = mock.Mock(text='', status_code=429, search_params={'cookies': {}}, headers={})
|
||||
self.assertRaises(SearxEngineAPIException, kagi.response, response)
|
||||
|
||||
# Test other error response
|
||||
response = mock.Mock(text='', status_code=500, search_params={'cookies': {}}, headers={})
|
||||
self.assertRaises(SearxEngineAPIException, kagi.response, response)
|
@ -2,13 +2,13 @@
|
||||
# pylint: disable=missing-module-docstring,disable=missing-class-docstring,invalid-name
|
||||
|
||||
import json
|
||||
from urllib.parse import ParseResult
|
||||
import babel
|
||||
from mock import Mock
|
||||
|
||||
import searx.webapp
|
||||
import searx.search
|
||||
import searx.search.processors
|
||||
from searx.result_types._base import MainResult
|
||||
|
||||
from searx.results import Timing
|
||||
from searx.preferences import Preferences
|
||||
@ -31,30 +31,21 @@ class ViewsTestCase(SearxTestCase): # pylint: disable=too-many-public-methods
|
||||
|
||||
# set some defaults
|
||||
test_results = [
|
||||
{
|
||||
'content': 'first test content',
|
||||
'title': 'First Test',
|
||||
'url': 'http://first.test.xyz',
|
||||
'engines': ['youtube', 'startpage'],
|
||||
'engine': 'startpage',
|
||||
'parsed_url': ParseResult(
|
||||
scheme='http', netloc='first.test.xyz', path='/', params='', query='', fragment=''
|
||||
),
|
||||
'template': 'default.html',
|
||||
},
|
||||
{
|
||||
'content': 'second test content',
|
||||
'title': 'Second Test',
|
||||
'url': 'http://second.test.xyz',
|
||||
'engines': ['youtube', 'startpage'],
|
||||
'engine': 'youtube',
|
||||
'parsed_url': ParseResult(
|
||||
scheme='http', netloc='second.test.xyz', path='/', params='', query='', fragment=''
|
||||
),
|
||||
'template': 'default.html',
|
||||
},
|
||||
MainResult(
|
||||
title="First Test",
|
||||
url="http://first.test.xyz",
|
||||
content="first test content",
|
||||
engine="startpage",
|
||||
),
|
||||
MainResult(
|
||||
title="Second Test",
|
||||
url="http://second.test.xyz",
|
||||
content="second test content",
|
||||
engine="youtube",
|
||||
),
|
||||
]
|
||||
|
||||
for r in test_results:
|
||||
r.normalize_result_fields()
|
||||
timings = [
|
||||
Timing(engine='startpage', total=0.8, load=0.7),
|
||||
Timing(engine='youtube', total=0.9, load=0.6),
|
||||
|
Loading…
Reference in New Issue
Block a user