mirror of https://github.com/searxng/searxng.git
Compare commits
6 Commits
3085474818
...
7f1c0c6a7c
Author | SHA1 | Date |
---|---|---|
Alexandre Flament | 7f1c0c6a7c | |
Leo Liu | dfaf5868e2 | |
Leo Liu | b173f3a8b9 | |
dependabot[bot] | 2fbf15eccb | |
searxng-bot | 08c5f258d8 | |
Alexandre Flament | 4398ce059f |
|
@ -18,4 +18,4 @@ fasttext-predict==0.9.2.2
|
|||
tomli==2.0.2; python_version < '3.11'
|
||||
msgspec==0.18.6
|
||||
eval_type_backport; python_version < '3.9'
|
||||
typer-slim==0.12.5
|
||||
typer-slim==0.13.0
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Bing (Web)
|
||||
|
||||
- https://github.com/searx/searx/issues/2019#issuecomment-648227442
|
||||
"""
|
||||
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
|
||||
from searx.network import raise_for_httperror, multi_requests, get, Request
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
|
||||
about = {
|
||||
"website": 'https://www.baidu.com',
|
||||
"wikidata_id": 'Q14772',
|
||||
"official_api_documentation": 'https://apis.baidu.com/',
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
"language": 'zn',
|
||||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['general', 'web']
|
||||
paging = False
|
||||
time_range_support = False
|
||||
safesearch = False
|
||||
|
||||
base_url = 'https://www.baidu.com/'
|
||||
search_string = 's?{query}'
|
||||
|
||||
skip_tpls = ('img_normal', 'short_video', 'yl_music_song', 'dict3', 'recommend_list')
|
||||
|
||||
desc_xpath_per_tpl = {
|
||||
'se_com_default': './/span[contains(@class, "content-right_8Zs40")]',
|
||||
'kaifa_pc_open_source_software': './/p[contains(@class, "c-color-text")]',
|
||||
'bk_polysemy': './/div/@aria-label',
|
||||
'se_st_single_video_zhanzhang': './/span[contains(@class, "c-span-last")]//p[2]',
|
||||
}
|
||||
|
||||
|
||||
def get_initial_parameters(params):
|
||||
resp_index = get(base_url, headers=params['headers'], raise_for_httperror=True)
|
||||
dom = html.fromstring(resp_index.text)
|
||||
query_params = {}
|
||||
for ielement in eval_xpath_list(dom, '//form[@id="form"]//input[@name]'):
|
||||
name = ielement.attrib.get('name')
|
||||
value = ielement.attrib.get('value')
|
||||
query_params[name] = value
|
||||
return query_params, resp_index.cookies
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['headers'].update(
|
||||
{
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Sec-GPC': '1',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'TE': 'trailers',
|
||||
}
|
||||
)
|
||||
|
||||
query_params, cookies = get_initial_parameters(params)
|
||||
query_params['wd'] = query
|
||||
|
||||
params['url'] = base_url + search_string.format(query=urlencode(query_params))
|
||||
params['cookies'] = cookies
|
||||
params['raise_for_httperror'] = False
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
if resp.url.host == 'wappass.baidu.com' or resp.url.path.startswith('/static/captcha'):
|
||||
raise SearxEngineCaptchaException()
|
||||
raise_for_httperror(resp)
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# follow redirect but don't use the result page to reduce the CAPTCHA issue
|
||||
redirect_element = eval_xpath_getindex(dom, '//noscript/meta[@http-equiv="refresh"]/@content', 0, default=None)
|
||||
if redirect_element and redirect_element.startswith('0; url='):
|
||||
get(
|
||||
base_url + redirect_element[8:],
|
||||
headers=resp.search_params['headers'],
|
||||
cookies=resp.search_params['cookies'],
|
||||
)
|
||||
|
||||
for result in eval_xpath_list(dom, '//div[contains(@id,"content_left")]/div[contains(@class, "c-container")]'):
|
||||
tpl = result.attrib.get('tpl')
|
||||
if tpl in skip_tpls:
|
||||
continue
|
||||
|
||||
if tpl == 'kaifa_pc_blog_weak':
|
||||
# skip the result to kaifa.baidu.com (search engine for IT)
|
||||
# but includes results from kaifa
|
||||
for r2 in eval_xpath_list(result, './/div[contains(@class, "c-gap-bottom-small")]'):
|
||||
title = extract_text(eval_xpath(r2, './/div[@class="c-row"]//a'))
|
||||
url = extract_text(eval_xpath(r2, './/div[@class="c-row"]//a/@href'))
|
||||
content = extract_text(eval_xpath(r2, '//span[@class="c-line-clamp2"]'))
|
||||
results.append(
|
||||
{
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# normal results
|
||||
title = extract_text(eval_xpath(result, './/h3/a'))
|
||||
url = extract_text(eval_xpath(result, './/h3/a/@href'))
|
||||
|
||||
if not title or not url:
|
||||
continue
|
||||
|
||||
content = None
|
||||
if tpl in desc_xpath_per_tpl:
|
||||
# try the XPath for the Baidu template
|
||||
content = extract_text(eval_xpath(result, desc_xpath_per_tpl[tpl]))
|
||||
if not content:
|
||||
# no content was found: try all the XPath from the Baidu templates
|
||||
for xp in desc_xpath_per_tpl.values():
|
||||
content = extract_text(eval_xpath(result, xp))
|
||||
if content:
|
||||
break
|
||||
results.append(
|
||||
{
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
}
|
||||
)
|
||||
|
||||
# resolve the Baidu redirections
|
||||
# note: Baidu does not support HTTP/2
|
||||
request_list = [
|
||||
Request.get(
|
||||
u['url'].replace('http://www.baidu.com/link?url=', 'https://www.baidu.com/link?url='),
|
||||
allow_redirects=False,
|
||||
headers=resp.search_params['headers'],
|
||||
)
|
||||
for u in results
|
||||
]
|
||||
response_list = multi_requests(request_list)
|
||||
for i, redirect_response in enumerate(response_list):
|
||||
if not isinstance(redirect_response, Exception):
|
||||
results[i]['url'] = redirect_response.headers['location']
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def debug_write_content_to_file(text):
|
||||
RE_STYLE_ELEMENT = re.compile(r'<style[^>]*>[^<]+</style>')
|
||||
RE_SCRIPT_ELEMENT = re.compile(r'<script[^>]*>[^<]+</script>')
|
||||
RE_COMMENT_ELEMENT = re.compile(r'\<\!\-\-[^-]+\-\-\>')
|
||||
with open('baidu.html', 'wt', encoding='utf-8') as f:
|
||||
text = RE_STYLE_ELEMENT.sub("", text)
|
||||
text = RE_SCRIPT_ELEMENT.sub("", text)
|
||||
text = RE_COMMENT_ELEMENT.sub("", text)
|
||||
text = "\n".join([ll.rstrip() for ll in text.splitlines() if ll.strip()])
|
||||
f.write(text)
|
|
@ -20,7 +20,7 @@ if (next_call_ts == false or next_call_ts == nil) then
|
|||
-- 2/ the next call is a random time between start_after_from and start_after_to
|
||||
local initial_delay = math.random(start_after_from, start_after_to)
|
||||
redis.call('SET', redis_key, now + initial_delay)
|
||||
return { false, delay }
|
||||
return { false, initial_delay }
|
||||
end
|
||||
|
||||
-- next_call_ts is defined
|
||||
|
|
|
@ -226,15 +226,12 @@ outgoing:
|
|||
# - 'Hash plugin'
|
||||
# - 'Self Information'
|
||||
# - 'Tracker URL remover'
|
||||
# - 'Unit converter plugin'
|
||||
# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
|
||||
# # these plugins are disabled if nothing is configured ..
|
||||
# - 'Hostnames plugin' # see 'hostnames' configuration below
|
||||
# - 'Open Access DOI rewrite'
|
||||
# - 'Tor check plugin'
|
||||
# # Read the docs before activate: auto-detection of the language could be
|
||||
# # detrimental to users expectations / users can activate the plugin in the
|
||||
# # preferences if they want.
|
||||
# - 'Autodetect search language'
|
||||
|
||||
# Configuration of the "Hostnames plugin":
|
||||
#
|
||||
|
@ -425,6 +422,12 @@ engines:
|
|||
shortcut: bi
|
||||
disabled: true
|
||||
|
||||
- name: baidu
|
||||
engine: baidu
|
||||
shortcut: ba
|
||||
timeout: 15
|
||||
disabled: true
|
||||
|
||||
- name: bing images
|
||||
engine: bing_images
|
||||
shortcut: bii
|
||||
|
|
Binary file not shown.
|
@ -39,9 +39,8 @@ msgstr ""
|
|||
"Project-Id-Version: searx\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
|
||||
"PO-Revision-Date: 2024-10-26 21:13+0000\n"
|
||||
"Last-Translator: Atul_Eterno <Atul_Eterno@users.noreply.translate.codeberg."
|
||||
"org>\n"
|
||||
"PO-Revision-Date: 2024-11-03 09:08+0000\n"
|
||||
"Last-Translator: return42 <return42@users.noreply.translate.codeberg.org>\n"
|
||||
"Language-Team: Spanish <https://translate.codeberg.org/projects/searxng/"
|
||||
"searxng/es/>\n"
|
||||
"Language: es\n"
|
||||
|
@ -1246,7 +1245,7 @@ msgstr "Tiempo máximo"
|
|||
|
||||
#: searx/templates/simple/preferences/favicon.html:2
|
||||
msgid "Favicon Resolver"
|
||||
msgstr ""
|
||||
msgstr "Buscador de favicon"
|
||||
|
||||
#: searx/templates/simple/preferences/favicon.html:15
|
||||
msgid "Display favicons near search results"
|
||||
|
|
Binary file not shown.
|
@ -10,21 +10,22 @@
|
|||
# return42 <return42@users.noreply.translate.codeberg.org>, 2024.
|
||||
# omfj <omfj@users.noreply.translate.codeberg.org>, 2024.
|
||||
# combwizard <combwizard@users.noreply.translate.codeberg.org>, 2024.
|
||||
# laaknor <laaknor@users.noreply.translate.codeberg.org>, 2024.
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: PROJECT VERSION\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
|
||||
"PO-Revision-Date: 2024-10-04 21:35+0000\n"
|
||||
"Last-Translator: combwizard "
|
||||
"<combwizard@users.noreply.translate.codeberg.org>\n"
|
||||
"PO-Revision-Date: 2024-11-03 09:08+0000\n"
|
||||
"Last-Translator: laaknor <laaknor@users.noreply.translate.codeberg.org>\n"
|
||||
"Language-Team: Norwegian Bokmål <https://translate.codeberg.org/projects/"
|
||||
"searxng/searxng/nb_NO/>\n"
|
||||
"Language: nb_NO\n"
|
||||
"Language-Team: Norwegian Bokmål "
|
||||
"<https://translate.codeberg.org/projects/searxng/searxng/nb_NO/>\n"
|
||||
"Plural-Forms: nplurals=2; plural=n != 1;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Plural-Forms: nplurals=2; plural=n != 1;\n"
|
||||
"X-Generator: Weblate 5.8.1\n"
|
||||
"Generated-By: Babel 2.16.0\n"
|
||||
|
||||
#. CONSTANT_NAMES['NO_SUBGROUPING']
|
||||
|
@ -1051,7 +1052,7 @@ msgstr "Det er ingen flere resultater. Du kan prøve å:"
|
|||
|
||||
#: searx/templates/simple/messages/no_results.html:19
|
||||
msgid "Refresh the page."
|
||||
msgstr "oppfrisk siden"
|
||||
msgstr "oppfrisk siden."
|
||||
|
||||
#: searx/templates/simple/messages/no_results.html:20
|
||||
msgid "Search for another query or select another category (above)."
|
||||
|
@ -1882,4 +1883,3 @@ msgstr "skjul video"
|
|||
|
||||
#~ msgid "Engines cannot retrieve results"
|
||||
#~ msgstr "Søkemotorer kan ikke motta resultater"
|
||||
|
||||
|
|
Binary file not shown.
|
@ -23,13 +23,14 @@
|
|||
# notlmutsaers <notlmutsaers@users.noreply.translate.codeberg.org>, 2024.
|
||||
# return42 <return42@users.noreply.translate.codeberg.org>, 2024.
|
||||
# ljansen <ljansen@users.noreply.translate.codeberg.org>, 2024.
|
||||
# zarlin <zarlin@users.noreply.translate.codeberg.org>, 2024.
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: searx\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
|
||||
"PO-Revision-Date: 2024-10-28 21:07+0000\n"
|
||||
"Last-Translator: ljansen <ljansen@users.noreply.translate.codeberg.org>\n"
|
||||
"PO-Revision-Date: 2024-11-02 04:00+0000\n"
|
||||
"Last-Translator: zarlin <zarlin@users.noreply.translate.codeberg.org>\n"
|
||||
"Language-Team: Dutch <https://translate.codeberg.org/projects/searxng/"
|
||||
"searxng/nl/>\n"
|
||||
"Language: nl\n"
|
||||
|
@ -493,7 +494,7 @@ msgstr "stemmen"
|
|||
|
||||
#: searx/engines/radio_browser.py:107
|
||||
msgid "clicks"
|
||||
msgstr "clicks"
|
||||
msgstr "klikken"
|
||||
|
||||
#: searx/engines/seekr.py:193 searx/engines/yummly.py:71
|
||||
#: searx/engines/zlibrary.py:137
|
||||
|
@ -662,7 +663,7 @@ msgstr "Voorkeuren"
|
|||
|
||||
#: searx/templates/simple/base.html:68
|
||||
msgid "Powered by"
|
||||
msgstr "Zoekmachine"
|
||||
msgstr "Verzorgd door"
|
||||
|
||||
#: searx/templates/simple/base.html:68
|
||||
msgid "a privacy-respecting, open metasearch engine"
|
||||
|
@ -1069,7 +1070,7 @@ msgstr "Er zijn geen resultaten meer. U kunt proberen om:"
|
|||
|
||||
#: searx/templates/simple/messages/no_results.html:19
|
||||
msgid "Refresh the page."
|
||||
msgstr "Ververs de pagina"
|
||||
msgstr "Ververs de pagina."
|
||||
|
||||
#: searx/templates/simple/messages/no_results.html:20
|
||||
msgid "Search for another query or select another category (above)."
|
||||
|
@ -1235,9 +1236,8 @@ msgid "Max time"
|
|||
msgstr "Max. duur"
|
||||
|
||||
#: searx/templates/simple/preferences/favicon.html:2
|
||||
#, fuzzy
|
||||
msgid "Favicon Resolver"
|
||||
msgstr "favicon-resolver"
|
||||
msgstr "Favicon Oplosser"
|
||||
|
||||
#: searx/templates/simple/preferences/favicon.html:15
|
||||
msgid "Display favicons near search results"
|
||||
|
|
Binary file not shown.
|
@ -8,21 +8,23 @@
|
|||
# return42 <return42@users.noreply.translate.codeberg.org>, 2024.
|
||||
# abhabongse <abhabongse@users.noreply.translate.codeberg.org>, 2024.
|
||||
# tutakrab <tutakrab@users.noreply.translate.codeberg.org>, 2024.
|
||||
# sahussawud <sahussawud@users.noreply.translate.codeberg.org>, 2024.
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: PROJECT VERSION\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2024-10-05 06:24+0000\n"
|
||||
"PO-Revision-Date: 2024-10-02 16:10+0000\n"
|
||||
"Last-Translator: tutakrab <tutakrab@users.noreply.translate.codeberg.org>"
|
||||
"PO-Revision-Date: 2024-11-06 07:26+0000\n"
|
||||
"Last-Translator: sahussawud <sahussawud@users.noreply.translate.codeberg.org>"
|
||||
"\n"
|
||||
"Language-Team: Thai <https://translate.codeberg.org/projects/searxng/searxng/"
|
||||
"th/>\n"
|
||||
"Language: th\n"
|
||||
"Language-Team: Thai "
|
||||
"<https://translate.codeberg.org/projects/searxng/searxng/th/>\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"MIME-Version: 1.0\n"
|
||||
"Content-Type: text/plain; charset=utf-8\n"
|
||||
"Content-Transfer-Encoding: 8bit\n"
|
||||
"Plural-Forms: nplurals=1; plural=0;\n"
|
||||
"X-Generator: Weblate 5.8.1\n"
|
||||
"Generated-By: Babel 2.16.0\n"
|
||||
|
||||
#. CONSTANT_NAMES['NO_SUBGROUPING']
|
||||
|
@ -163,7 +165,7 @@ msgstr "มืด"
|
|||
#. STYLE_NAMES['BLACK']
|
||||
#: searx/searxng.msg
|
||||
msgid "black"
|
||||
msgstr ""
|
||||
msgstr "สีดำ"
|
||||
|
||||
#. BRAND_CUSTOM_LINKS['UPTIME']
|
||||
#: searx/searxng.msg
|
||||
|
@ -331,12 +333,12 @@ msgstr "ผู้เขียน"
|
|||
#. SOCIAL_MEDIA_TERMS['THREAD OPEN']
|
||||
#: searx/engines/discourse.py:149 searx/searxng.msg
|
||||
msgid "open"
|
||||
msgstr ""
|
||||
msgstr "สร้าง"
|
||||
|
||||
#. SOCIAL_MEDIA_TERMS['THREAD CLOSED']
|
||||
#: searx/engines/discourse.py:149 searx/searxng.msg
|
||||
msgid "closed"
|
||||
msgstr ""
|
||||
msgstr "ลบ"
|
||||
|
||||
#. SOCIAL_MEDIA_TERMS['THREAD ANSWERED']
|
||||
#: searx/engines/discourse.py:160 searx/searxng.msg
|
||||
|
@ -450,7 +452,7 @@ msgstr "คำนวณ {functions} จากอาร์กิวเมนต
|
|||
|
||||
#: searx/engines/mozhi.py:57
|
||||
msgid "Synonyms"
|
||||
msgstr ""
|
||||
msgstr "คำเหมือน"
|
||||
|
||||
#: searx/engines/openstreetmap.py:159
|
||||
msgid "Get directions"
|
||||
|
@ -538,8 +540,9 @@ msgid "hash digest"
|
|||
msgstr "แฮชย่อย"
|
||||
|
||||
#: searx/plugins/hostnames.py:103
|
||||
#, fuzzy
|
||||
msgid "Hostnames plugin"
|
||||
msgstr ""
|
||||
msgstr "ชื่อโฮส ปลั๊กอิน"
|
||||
|
||||
#: searx/plugins/hostnames.py:104
|
||||
msgid "Rewrite hostnames, remove results or prioritize them based on the hostname"
|
||||
|
@ -1698,4 +1701,3 @@ msgstr "ซ่อนวิดีโอ"
|
|||
|
||||
#~ msgid "Engines cannot retrieve results"
|
||||
#~ msgstr "เครื่องมือไม่สามารถดึงผลลัพธ์ได้"
|
||||
|
||||
|
|
Loading…
Reference in New Issue