diff --git a/searx/engines/google.py b/searx/engines/google.py index 4e6fa6190..578dec60c 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -138,12 +138,7 @@ content_xpath = './/div[@class="IsZvec"]' # Suggestions are links placed in a *card-section*, we extract only the text # from the links not the links itself. -suggestion_xpath = '//div[contains(@class, "card-section")]//a' - -# Since google does *auto-correction* on the first query these are not really -# *spelling suggestions*, we use them anyway. -spelling_suggestion_xpath = '//div[@class="med"]/p/a' - +suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a' def get_lang_info(params, lang_list, custom_aliases, supported_any_language): """Composing various language properties for the google engines. @@ -322,7 +317,6 @@ def response(resp): # convert the text to dom dom = html.fromstring(resp.text) - # results --> answer answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]') if answer_list: @@ -379,9 +373,6 @@ def response(resp): # append suggestion results.append({'suggestion': extract_text(suggestion)}) - for correction in eval_xpath_list(dom, spelling_suggestion_xpath): - results.append({'correction': extract_text(correction)}) - # return results return results diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py index 9403ef4f7..abf046f4c 100644 --- a/searx/engines/google_videos.py +++ b/searx/engines/google_videos.py @@ -31,13 +31,9 @@ from searx.engines.google import ( get_lang_info, time_range_dict, filter_mapping, - results_xpath, g_section_with_header, title_xpath, - href_xpath, - content_xpath, suggestion_xpath, - spelling_suggestion_xpath, detect_google_sorry, ) @@ -74,11 +70,27 @@ def _re(regexpr): RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr)) return RE_CACHE[regexpr] + +def scrap_out_thumbs_src(dom): + ret_val = {} + thumb_name = 'dimg_' + for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'): + _script = script.text + # "dimg_35":"https://i.ytimg.c....", + _dimurl = _re("s='([^']*)").findall( _script) + for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script): + v = v.replace(r'\u003d','=') + v = v.replace(r'\u0026','&') + ret_val[k] = v + logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys()) + return ret_val + + def scrap_out_thumbs(dom): """Scrap out thumbnail data from