mirror of https://github.com/searxng/searxng.git
[fix] duckduckgo extra: crashes and returns no results
This commit is contained in:
parent
c4b874e9b0
commit
ca81860c0b
|
@ -6,7 +6,7 @@ DuckDuckGo Lite
|
|||
|
||||
from typing import TYPE_CHECKING
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import urlencode, quote_plus
|
||||
import json
|
||||
import babel
|
||||
import lxml.html
|
||||
|
@ -18,12 +18,12 @@ from searx import (
|
|||
)
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
extr,
|
||||
extract_text,
|
||||
)
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx import redisdb
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.utils import extr
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -60,25 +60,25 @@ form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
|||
__CACHE = []
|
||||
|
||||
|
||||
def _cache_key(data: dict):
|
||||
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{data['q']}//{data['kl']}")
|
||||
def _cache_key(query, region):
|
||||
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{query}//{region}")
|
||||
|
||||
|
||||
def cache_vqd(data: dict, value):
|
||||
def cache_vqd(query, region, value):
|
||||
"""Caches a ``vqd`` value from a query."""
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
logger.debug("cache vqd value: %s", value)
|
||||
c.set(_cache_key(data), value, ex=600)
|
||||
c.set(_cache_key(query, region), value, ex=600)
|
||||
|
||||
else:
|
||||
logger.debug("MEM cache vqd value: %s", value)
|
||||
if len(__CACHE) > 100: # cache vqd from last 100 queries
|
||||
__CACHE.pop(0)
|
||||
__CACHE.append((_cache_key(data), value))
|
||||
__CACHE.append((_cache_key(query, region), value))
|
||||
|
||||
|
||||
def get_vqd(data):
|
||||
def get_vqd(query, region):
|
||||
"""Returns the ``vqd`` that fits to the *query* (``data`` from HTTP POST).
|
||||
|
||||
DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
|
||||
|
@ -108,7 +108,7 @@ def get_vqd(data):
|
|||
|
||||
"""
|
||||
|
||||
key = _cache_key(data)
|
||||
key = _cache_key(query, region)
|
||||
value = None
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
|
@ -126,6 +126,23 @@ def get_vqd(data):
|
|||
return None
|
||||
|
||||
|
||||
def extract_vqd(query):
|
||||
"""
|
||||
Scrape the vqd value matching the query from DuckDuckGo Web.
|
||||
That function is currently not required for general results,
|
||||
only for extra results such as images, videos, ...
|
||||
|
||||
Also see ``get_vqd(query, region)`` above.
|
||||
"""
|
||||
resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}')
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
|
||||
vqd = extr(resp.text, 'vqd="', '"')
|
||||
|
||||
return vqd
|
||||
|
||||
|
||||
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
||||
"""Get DuckDuckGo's language identifier from SearXNG's locale.
|
||||
|
||||
|
@ -313,7 +330,7 @@ def request(query, params):
|
|||
# from here on no more params['data'] shuld be set, since this dict is
|
||||
# needed to get a vqd value from the cache ..
|
||||
|
||||
vqd = get_vqd(params['data'])
|
||||
vqd = get_vqd(query, eng_region)
|
||||
|
||||
# Certain conditions must be met in order to call up one of the
|
||||
# following pages ...
|
||||
|
@ -362,7 +379,7 @@ def response(resp):
|
|||
form = form[0]
|
||||
form_vqd = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
||||
|
||||
cache_vqd(resp.search_params["data"], form_vqd)
|
||||
cache_vqd(resp.search_params['data']['q'], resp.search_params['data']['kl'], form_vqd)
|
||||
|
||||
# just select "web-result" and ignore results of class "result--ad result--ad--small"
|
||||
for div_result in eval_xpath(doc, '//div[@id="links"]/div[contains(@class, "web-result")]'):
|
||||
|
|
|
@ -11,6 +11,8 @@ from searx.utils import get_embeded_stream_url
|
|||
|
||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
||||
from searx.engines.duckduckgo import (
|
||||
cache_vqd,
|
||||
extract_vqd,
|
||||
get_ddg_lang,
|
||||
get_vqd,
|
||||
)
|
||||
|
@ -48,15 +50,20 @@ search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
|
|||
|
||||
|
||||
def request(query, params):
|
||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
||||
|
||||
# request needs a vqd argument
|
||||
vqd = get_vqd(query)
|
||||
vqd = get_vqd(query, eng_region)
|
||||
if not vqd:
|
||||
vqd = extract_vqd(query)
|
||||
if vqd:
|
||||
cache_vqd(query, eng_region, vqd)
|
||||
|
||||
if not vqd:
|
||||
# some search terms do not have results and therefore no vqd value
|
||||
params['url'] = None
|
||||
return params
|
||||
|
||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
||||
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
||||
|
||||
args = {
|
||||
|
@ -86,6 +93,12 @@ def request(query, params):
|
|||
|
||||
params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
|
||||
|
||||
# sending these two headers prevents rate limiting for the query
|
||||
params['headers'] = {
|
||||
'Referer': 'https://duckduckgo.com/',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue