[fix] fix duckduckgo engine

- remove paging support: a "vqd" parameter is required between each request. This parameter is uniq for each request
- update the URL (no redirect), use the POST method
- language support: works if there is no more than request per minute, otherwise it is ignored !
This commit is contained in:
Alexandre Flament 2020-10-09 15:01:40 +02:00
parent a05c660e30
commit cfd21bc475
1 changed files with 13 additions and 30 deletions

View File

@ -21,7 +21,7 @@ from searx.utils import extract_text, match_language, eval_xpath
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = True paging = False
language_support = True language_support = True
supported_languages_url = 'https://duckduckgo.com/util/u172.js' supported_languages_url = 'https://duckduckgo.com/util/u172.js'
time_range_support = True time_range_support = True
@ -37,9 +37,7 @@ language_aliases = {
} }
# search-url # search-url
url = 'https://duckduckgo.com/html?{query}&s={offset}&dc={dc_param}' url = 'https://html.duckduckgo.com/html'
time_range_url = '&df={range}'
time_range_dict = {'day': 'd', time_range_dict = {'day': 'd',
'week': 'w', 'week': 'w',
'month': 'm'} 'month': 'm'}
@ -65,36 +63,21 @@ def get_region_code(lang, lang_list=[]):
def request(query, params): def request(query, params):
if params['time_range'] not in (None, 'None', '') and params['time_range'] not in time_range_dict: if params['time_range'] is not None and params['time_range'] not in time_range_dict:
return params return params
offset = (params['pageno'] - 1) * 30 params['url'] = url
params['method'] = 'POST'
params['data']['b'] = ''
params['data']['q'] = query
params['data']['df'] = ''
region_code = get_region_code(params['language'], supported_languages) region_code = get_region_code(params['language'], supported_languages)
params['url'] = 'https://duckduckgo.com/html/'
if params['pageno'] > 1:
params['method'] = 'POST'
params['data']['q'] = query
params['data']['s'] = offset
params['data']['dc'] = 30
params['data']['nextParams'] = ''
params['data']['v'] = 'l'
params['data']['o'] = 'json'
params['data']['api'] = '/d.js'
if params['time_range'] in time_range_dict:
params['data']['df'] = time_range_dict[params['time_range']]
if region_code: if region_code:
params['data']['kl'] = region_code params['data']['kl'] = region_code
else: params['cookies']['kl'] = region_code
if region_code:
params['url'] = url.format(
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)
else:
params['url'] = url.format(
query=urlencode({'q': query}), offset=offset, dc_param=offset)
if params['time_range'] in time_range_dict: if params['time_range'] in time_range_dict:
params['url'] += time_range_url.format(range=time_range_dict[params['time_range']]) params['data']['df'] = time_range_dict[params['time_range']]
return params return params