Compare commits

...

6 Commits

Author SHA1 Message Date
Alexandre Flament 7f1c0c6a7c
Merge 4398ce059f into dfaf5868e2 2024-11-11 16:27:32 +08:00
Leo Liu dfaf5868e2 [fix] settings.yml - enabled_plugins: document to reflect default settings
Remove 'Autodetect search language', which is no longer valid, from settings,
and add 'Unit converter plugin', which is now default enabled, to settings.
2024-11-10 16:09:41 +01:00
Leo Liu b173f3a8b9 Fix scheduler.lua 2024-11-10 15:53:58 +01:00
dependabot[bot] 2fbf15eccb [upd] pypi: Bump typer-slim from 0.12.5 to 0.13.0
Bumps [typer-slim](https://github.com/fastapi/typer) from 0.12.5 to 0.13.0.
- [Release notes](https://github.com/fastapi/typer/releases)
- [Changelog](https://github.com/fastapi/typer/blob/master/docs/release-notes.md)
- [Commits](https://github.com/fastapi/typer/compare/0.12.5...0.13.0)

---
updated-dependencies:
- dependency-name: typer-slim
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-11-08 08:51:43 +01:00
searxng-bot 08c5f258d8 [l10n] update translations from Weblate
8d90a7e6d - 2024-11-06 - sahussawud <sahussawud@users.noreply.translate.codeberg.org>
41ee8bb0d - 2024-11-02 - laaknor <laaknor@users.noreply.translate.codeberg.org>
c1a30afab - 2024-11-02 - return42 <return42@users.noreply.translate.codeberg.org>
627ab7a8e - 2024-11-01 - zarlin <zarlin@users.noreply.translate.codeberg.org>
2024-11-08 08:45:07 +01:00
Alexandre Flament 4398ce059f Add baidu engine (experimental) 2022-07-26 10:52:35 +02:00
12 changed files with 209 additions and 36 deletions

View File

@ -18,4 +18,4 @@ fasttext-predict==0.9.2.2
tomli==2.0.2; python_version < '3.11'
msgspec==0.18.6
eval_type_backport; python_version < '3.9'
typer-slim==0.12.5
typer-slim==0.13.0

169
searx/engines/baidu.py Normal file
View File

@ -0,0 +1,169 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Bing (Web)
- https://github.com/searx/searx/issues/2019#issuecomment-648227442
"""
import re
from urllib.parse import urlencode
from lxml import html
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
from searx.network import raise_for_httperror, multi_requests, get, Request
from searx.exceptions import SearxEngineCaptchaException
about = {
"website": 'https://www.baidu.com',
"wikidata_id": 'Q14772',
"official_api_documentation": 'https://apis.baidu.com/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
"language": 'zn',
}
# engine dependent config
categories = ['general', 'web']
paging = False
time_range_support = False
safesearch = False
base_url = 'https://www.baidu.com/'
search_string = 's?{query}'
skip_tpls = ('img_normal', 'short_video', 'yl_music_song', 'dict3', 'recommend_list')
desc_xpath_per_tpl = {
'se_com_default': './/span[contains(@class, "content-right_8Zs40")]',
'kaifa_pc_open_source_software': './/p[contains(@class, "c-color-text")]',
'bk_polysemy': './/div/@aria-label',
'se_st_single_video_zhanzhang': './/span[contains(@class, "c-span-last")]//p[2]',
}
def get_initial_parameters(params):
resp_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
dom = html.fromstring(resp_index.text)
query_params = {}
for ielement in eval_xpath_list(dom, '//form[@id="form"]//input[@name]'):
name = ielement.attrib.get('name')
value = ielement.attrib.get('value')
query_params[name] = value
return query_params, resp_index.cookies
def request(query, params):
params['headers'].update(
{
'Accept-Language': 'en-US,en;q=0.5',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Sec-GPC': '1',
'Upgrade-Insecure-Requests': '1',
'TE': 'trailers',
}
)
query_params, cookies = get_initial_parameters(params)
query_params['wd'] = query
params['url'] = base_url + search_string.format(query=urlencode(query_params))
params['cookies'] = cookies
params['raise_for_httperror'] = False
return params
def response(resp):
results = []
if resp.url.host == 'wappass.baidu.com' or resp.url.path.startswith('/static/captcha'):
raise SearxEngineCaptchaException()
raise_for_httperror(resp)
dom = html.fromstring(resp.text)
# follow redirect but don't use the result page to reduce the CAPTCHA issue
redirect_element = eval_xpath_getindex(dom, '//noscript/meta[@http-equiv="refresh"]/@content', 0, default=None)
if redirect_element and redirect_element.startswith('0; url='):
get(
base_url + redirect_element[8:],
headers=resp.search_params['headers'],
cookies=resp.search_params['cookies'],
)
for result in eval_xpath_list(dom, '//div[contains(@id,"content_left")]/div[contains(@class, "c-container")]'):
tpl = result.attrib.get('tpl')
if tpl in skip_tpls:
continue
if tpl == 'kaifa_pc_blog_weak':
# skip the result to kaifa.baidu.com (search engine for IT)
# but includes results from kaifa
for r2 in eval_xpath_list(result, './/div[contains(@class, "c-gap-bottom-small")]'):
title = extract_text(eval_xpath(r2, './/div[@class="c-row"]//a'))
url = extract_text(eval_xpath(r2, './/div[@class="c-row"]//a/@href'))
content = extract_text(eval_xpath(r2, '//span[@class="c-line-clamp2"]'))
results.append(
{
'url': url,
'title': title,
'content': content,
}
)
continue
# normal results
title = extract_text(eval_xpath(result, './/h3/a'))
url = extract_text(eval_xpath(result, './/h3/a/@href'))
if not title or not url:
continue
content = None
if tpl in desc_xpath_per_tpl:
# try the XPath for the Baidu template
content = extract_text(eval_xpath(result, desc_xpath_per_tpl[tpl]))
if not content:
# no content was found: try all the XPath from the Baidu templates
for xp in desc_xpath_per_tpl.values():
content = extract_text(eval_xpath(result, xp))
if content:
break
results.append(
{
'url': url,
'title': title,
'content': content,
}
)
# resolve the Baidu redirections
# note: Baidu does not support HTTP/2
request_list = [
Request.get(
u['url'].replace('http://www.baidu.com/link?url=', 'https://www.baidu.com/link?url='),
allow_redirects=False,
headers=resp.search_params['headers'],
)
for u in results
]
response_list = multi_requests(request_list)
for i, redirect_response in enumerate(response_list):
if not isinstance(redirect_response, Exception):
results[i]['url'] = redirect_response.headers['location']
return results
def debug_write_content_to_file(text):
RE_STYLE_ELEMENT = re.compile(r'<style[^>]*>[^<]+</style>')
RE_SCRIPT_ELEMENT = re.compile(r'<script[^>]*>[^<]+</script>')
RE_COMMENT_ELEMENT = re.compile(r'\<\!\-\-[^-]+\-\-\>')
with open('baidu.html', 'wt', encoding='utf-8') as f:
text = RE_STYLE_ELEMENT.sub("", text)
text = RE_SCRIPT_ELEMENT.sub("", text)
text = RE_COMMENT_ELEMENT.sub("", text)
text = "\n".join([ll.rstrip() for ll in text.splitlines() if ll.strip()])
f.write(text)

View File

@ -20,7 +20,7 @@ if (next_call_ts == false or next_call_ts == nil) then
-- 2/ the next call is a random time between start_after_from and start_after_to
local initial_delay = math.random(start_after_from, start_after_to)
redis.call('SET', redis_key, now + initial_delay)
return { false, delay }
return { false, initial_delay }
end
-- next_call_ts is defined

View File

@ -226,15 +226,12 @@ outgoing:
# - 'Hash plugin'
# - 'Self Information'
# - 'Tracker URL remover'
# - 'Unit converter plugin'
# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
# # these plugins are disabled if nothing is configured ..
# - 'Hostnames plugin' # see 'hostnames' configuration below
# - 'Open Access DOI rewrite'
# - 'Tor check plugin'
# # Read the docs before activate: auto-detection of the language could be
# # detrimental to users expectations / users can activate the plugin in the
# # preferences if they want.
# - 'Autodetect search language'
# Configuration of the "Hostnames plugin":
#
@ -425,6 +422,12 @@ engines:
shortcut: bi
disabled: true
- name: baidu
engine: baidu
shortcut: ba
timeout: 15
disabled: true
- name: bing images
engine: bing_images
shortcut: bii

View File

@ -39,9 +39,8 @@ msgstr ""
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-26 21:13+0000\n"
"Last-Translator: Atul_Eterno <Atul_Eterno@users.noreply.translate.codeberg."
"org>\n"
"PO-Revision-Date: 2024-11-03 09:08+0000\n"
"Last-Translator: return42 <return42@users.noreply.translate.codeberg.org>\n"
"Language-Team: Spanish <https://translate.codeberg.org/projects/searxng/"
"searxng/es/>\n"
"Language: es\n"
@ -1246,7 +1245,7 @@ msgstr "Tiempo máximo"
#: searx/templates/simple/preferences/favicon.html:2
msgid "Favicon Resolver"
msgstr ""
msgstr "Buscador de favicon"
#: searx/templates/simple/preferences/favicon.html:15
msgid "Display favicons near search results"

View File

@ -10,21 +10,22 @@
# return42 <return42@users.noreply.translate.codeberg.org>, 2024.
# omfj <omfj@users.noreply.translate.codeberg.org>, 2024.
# combwizard <combwizard@users.noreply.translate.codeberg.org>, 2024.
# laaknor <laaknor@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-04 21:35+0000\n"
"Last-Translator: combwizard "
"<combwizard@users.noreply.translate.codeberg.org>\n"
"PO-Revision-Date: 2024-11-03 09:08+0000\n"
"Last-Translator: laaknor <laaknor@users.noreply.translate.codeberg.org>\n"
"Language-Team: Norwegian Bokmål <https://translate.codeberg.org/projects/"
"searxng/searxng/nb_NO/>\n"
"Language: nb_NO\n"
"Language-Team: Norwegian Bokmål "
"<https://translate.codeberg.org/projects/searxng/searxng/nb_NO/>\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n != 1;\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -1051,7 +1052,7 @@ msgstr "Det er ingen flere resultater. Du kan prøve å:"
#: searx/templates/simple/messages/no_results.html:19
msgid "Refresh the page."
msgstr "oppfrisk siden"
msgstr "oppfrisk siden."
#: searx/templates/simple/messages/no_results.html:20
msgid "Search for another query or select another category (above)."
@ -1882,4 +1883,3 @@ msgstr "skjul video"
#~ msgid "Engines cannot retrieve results"
#~ msgstr "Søkemotorer kan ikke motta resultater"

View File

@ -23,13 +23,14 @@
# notlmutsaers <notlmutsaers@users.noreply.translate.codeberg.org>, 2024.
# return42 <return42@users.noreply.translate.codeberg.org>, 2024.
# ljansen <ljansen@users.noreply.translate.codeberg.org>, 2024.
# zarlin <zarlin@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: searx\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-28 21:07+0000\n"
"Last-Translator: ljansen <ljansen@users.noreply.translate.codeberg.org>\n"
"PO-Revision-Date: 2024-11-02 04:00+0000\n"
"Last-Translator: zarlin <zarlin@users.noreply.translate.codeberg.org>\n"
"Language-Team: Dutch <https://translate.codeberg.org/projects/searxng/"
"searxng/nl/>\n"
"Language: nl\n"
@ -493,7 +494,7 @@ msgstr "stemmen"
#: searx/engines/radio_browser.py:107
msgid "clicks"
msgstr "clicks"
msgstr "klikken"
#: searx/engines/seekr.py:193 searx/engines/yummly.py:71
#: searx/engines/zlibrary.py:137
@ -662,7 +663,7 @@ msgstr "Voorkeuren"
#: searx/templates/simple/base.html:68
msgid "Powered by"
msgstr "Zoekmachine"
msgstr "Verzorgd door"
#: searx/templates/simple/base.html:68
msgid "a privacy-respecting, open metasearch engine"
@ -1069,7 +1070,7 @@ msgstr "Er zijn geen resultaten meer. U kunt proberen om:"
#: searx/templates/simple/messages/no_results.html:19
msgid "Refresh the page."
msgstr "Ververs de pagina"
msgstr "Ververs de pagina."
#: searx/templates/simple/messages/no_results.html:20
msgid "Search for another query or select another category (above)."
@ -1235,9 +1236,8 @@ msgid "Max time"
msgstr "Max. duur"
#: searx/templates/simple/preferences/favicon.html:2
#, fuzzy
msgid "Favicon Resolver"
msgstr "favicon-resolver"
msgstr "Favicon Oplosser"
#: searx/templates/simple/preferences/favicon.html:15
msgid "Display favicons near search results"

View File

@ -8,21 +8,23 @@
# return42 <return42@users.noreply.translate.codeberg.org>, 2024.
# abhabongse <abhabongse@users.noreply.translate.codeberg.org>, 2024.
# tutakrab <tutakrab@users.noreply.translate.codeberg.org>, 2024.
# sahussawud <sahussawud@users.noreply.translate.codeberg.org>, 2024.
msgid ""
msgstr ""
"Project-Id-Version: PROJECT VERSION\n"
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
"PO-Revision-Date: 2024-10-02 16:10+0000\n"
"Last-Translator: tutakrab <tutakrab@users.noreply.translate.codeberg.org>"
"PO-Revision-Date: 2024-11-06 07:26+0000\n"
"Last-Translator: sahussawud <sahussawud@users.noreply.translate.codeberg.org>"
"\n"
"Language-Team: Thai <https://translate.codeberg.org/projects/searxng/searxng/"
"th/>\n"
"Language: th\n"
"Language-Team: Thai "
"<https://translate.codeberg.org/projects/searxng/searxng/th/>\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=1; plural=0;\n"
"X-Generator: Weblate 5.8.1\n"
"Generated-By: Babel 2.16.0\n"
#. CONSTANT_NAMES['NO_SUBGROUPING']
@ -163,7 +165,7 @@ msgstr "มืด"
#. STYLE_NAMES['BLACK']
#: searx/searxng.msg
msgid "black"
msgstr ""
msgstr "สีดำ"
#. BRAND_CUSTOM_LINKS['UPTIME']
#: searx/searxng.msg
@ -331,12 +333,12 @@ msgstr "ผู้เขียน"
#. SOCIAL_MEDIA_TERMS['THREAD OPEN']
#: searx/engines/discourse.py:149 searx/searxng.msg
msgid "open"
msgstr ""
msgstr "สร้าง"
#. SOCIAL_MEDIA_TERMS['THREAD CLOSED']
#: searx/engines/discourse.py:149 searx/searxng.msg
msgid "closed"
msgstr ""
msgstr "ลบ"
#. SOCIAL_MEDIA_TERMS['THREAD ANSWERED']
#: searx/engines/discourse.py:160 searx/searxng.msg
@ -450,7 +452,7 @@ msgstr "คำนวณ {functions} จากอาร์กิวเมนต
#: searx/engines/mozhi.py:57
msgid "Synonyms"
msgstr ""
msgstr "คำเหมือน"
#: searx/engines/openstreetmap.py:159
msgid "Get directions"
@ -538,8 +540,9 @@ msgid "hash digest"
msgstr "แฮชย่อย"
#: searx/plugins/hostnames.py:103
#, fuzzy
msgid "Hostnames plugin"
msgstr ""
msgstr "ชื่อโฮส ปลั๊กอิน"
#: searx/plugins/hostnames.py:104
msgid "Rewrite hostnames, remove results or prioritize them based on the hostname"
@ -1698,4 +1701,3 @@ msgstr "ซ่อนวิดีโอ"
#~ msgid "Engines cannot retrieve results"
#~ msgstr "เครื่องมือไม่สามารถดึงผลลัพธ์ได้"