Remove unnecessary code in wolframalpha_noapi engine

The answer is scraped from a js function, so parsing the html tree
doesn't achieve anything here.
This commit is contained in:
a01200356 2015-12-29 21:11:49 -06:00
parent b51ba32f61
commit d827fc49a1
1 changed files with 18 additions and 31 deletions

View File

@ -10,8 +10,6 @@
import re import re
import json import json
from urllib import urlencode from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
# search-url # search-url
url = 'http://www.wolframalpha.com/' url = 'http://www.wolframalpha.com/'
@ -25,42 +23,31 @@ def request(query, params):
return params return params
# tries to find answer under the pattern given # get response from search-request
def extract_answer(script_list, pattern): def response(resp):
answer = None results = []
# the answer is inside a js function
# answer can be located in different 'pods', although by default it should be in pod_0200
possible_locations = ['pod_0200\.push(.*)\n',
'pod_0100\.push(.*)\n']
# get line that matches the pattern # get line that matches the pattern
for script in script_list: for pattern in possible_locations:
try: try:
line = re.search(pattern, script.text_content()).group(1) line = re.search(pattern, resp.text).group(1)
break
except AttributeError: except AttributeError:
continue continue
# extract answer from json if not line:
answer = line[line.find('{') : line.rfind('}')+1] return results
answer = json.loads(answer.encode('unicode-escape'))
answer = answer['stringified'].decode('unicode-escape')
return answer # extract answer from json
answer = line[line.find('{') : line.rfind('}')+1]
answer = json.loads(answer.encode('unicode-escape'))
answer = answer['stringified'].decode('unicode-escape')
results.append({'answer': answer})
# get response from search-request
def response(resp):
dom = html.fromstring(resp.text)
# the answer is inside a js script
scripts = dom.xpath('//script')
results = []
# answer can be located in different 'pods', although by default it should be in pod_0200
answer = extract_answer(scripts, 'pod_0200\.push(.*)\n')
if not answer:
answer = extract_answer(scripts, 'pod_0100\.push(.*)\n')
if answer:
results.append({'answer': answer})
else:
results.append({'answer': answer})
return results return results