mirror of https://github.com/searxng/searxng.git
[mod] do not escape html content in engines
This commit is contained in:
parent
28f12ef5a0
commit
16bdc0baf4
|
@ -12,7 +12,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
@ -135,7 +134,7 @@ def response(resp):
|
||||||
for result in dom.xpath(xpath_results):
|
for result in dom.xpath(xpath_results):
|
||||||
link = result.xpath(xpath_link)[0]
|
link = result.xpath(xpath_link)[0]
|
||||||
href = urljoin(base_url, link.attrib.get('href'))
|
href = urljoin(base_url, link.attrib.get('href'))
|
||||||
title = escape(extract_text(link))
|
title = extract_text(link)
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title})
|
'title': title})
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from searx.utils import searx_useragent
|
from searx.utils import searx_useragent
|
||||||
from cgi import escape
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -94,7 +93,7 @@ def response(resp):
|
||||||
url = item.text
|
url = item.text
|
||||||
|
|
||||||
elif item.attrib["name"] == "dcdescription":
|
elif item.attrib["name"] == "dcdescription":
|
||||||
content = escape(item.text[:300])
|
content = item.text[:300]
|
||||||
if len(item.text) > 300:
|
if len(item.text) > 300:
|
||||||
content += "..."
|
content += "..."
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from cgi import escape
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
|
@ -61,7 +60,7 @@ def response(resp):
|
||||||
link = result.xpath('.//h3/a')[0]
|
link = result.xpath('.//h3/a')[0]
|
||||||
url = link.attrib.get('href')
|
url = link.attrib.get('href')
|
||||||
title = extract_text(link)
|
title = extract_text(link)
|
||||||
content = escape(extract_text(result.xpath('.//p')))
|
content = extract_text(result.xpath('.//p'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
|
@ -73,7 +72,7 @@ def response(resp):
|
||||||
link = result.xpath('.//h2/a')[0]
|
link = result.xpath('.//h2/a')[0]
|
||||||
url = link.attrib.get('href')
|
url = link.attrib.get('href')
|
||||||
title = extract_text(link)
|
title = extract_text(link)
|
||||||
content = escape(extract_text(result.xpath('.//p')))
|
content = extract_text(result.xpath('.//p'))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
@ -51,8 +50,8 @@ def response(resp):
|
||||||
for result in search_res:
|
for result in search_res:
|
||||||
link = result.xpath('.//td[@class="torrent_name"]//a')[0]
|
link = result.xpath('.//td[@class="torrent_name"]//a')[0]
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
href = urljoin(url, link.attrib.get('href'))
|
||||||
title = escape(extract_text(link))
|
title = extract_text(link)
|
||||||
content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
|
content = extract_text(result.xpath('.//pre[@class="snippet"]')[0])
|
||||||
content = "<br />".join(content.split("\n"))
|
content = "<br />".join(content.split("\n"))
|
||||||
|
|
||||||
filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
|
filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from json import loads
|
from json import loads
|
||||||
from cgi import escape
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -57,7 +56,7 @@ def response(resp):
|
||||||
for res in search_res['list']:
|
for res in search_res['list']:
|
||||||
title = res['title']
|
title = res['title']
|
||||||
url = res['url']
|
url = res['url']
|
||||||
content = escape(res['description'])
|
content = res['description']
|
||||||
thumbnail = res['thumbnail_360_url']
|
thumbnail = res['thumbnail_360_url']
|
||||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
||||||
embedded = embedded_url.format(videoid=res['id'])
|
embedded = embedded_url.format(videoid=res['id'])
|
||||||
|
|
|
@ -51,10 +51,11 @@ def response(resp):
|
||||||
if url.startswith('http://'):
|
if url.startswith('http://'):
|
||||||
url = 'https' + url[4:]
|
url = 'https' + url[4:]
|
||||||
|
|
||||||
content = result['artist']['name'] +\
|
content = '{} - {} - {}'.format(
|
||||||
" • " +\
|
result['artist']['name'],
|
||||||
result['album']['title'] +\
|
result['album']['title'],
|
||||||
" • " + result['title']
|
result['title'])
|
||||||
|
|
||||||
embedded = embedded_url.format(audioid=result['id'])
|
embedded = embedded_url.format(audioid=result['id'])
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
|
|
|
@ -12,7 +12,6 @@
|
||||||
import re
|
import re
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from cgi import escape
|
|
||||||
from searx.utils import is_valid_lang
|
from searx.utils import is_valid_lang
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
@ -62,8 +61,8 @@ def response(resp):
|
||||||
|
|
||||||
results.append({
|
results.append({
|
||||||
'url': urljoin(resp.url, '?%d' % k),
|
'url': urljoin(resp.url, '?%d' % k),
|
||||||
'title': escape(from_result.text_content()),
|
'title': from_result.text_content(),
|
||||||
'content': escape('; '.join(to_results))
|
'content': '; '.join(to_results)
|
||||||
})
|
})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -13,7 +13,6 @@
|
||||||
from urllib import quote_plus
|
from urllib import quote_plus
|
||||||
from json import loads
|
from json import loads
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from cgi import escape
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -56,7 +55,7 @@ def response(resp):
|
||||||
url = result.attrib.get('data-contenturl')
|
url = result.attrib.get('data-contenturl')
|
||||||
thumbnail = result.xpath('.//img')[0].attrib.get('src')
|
thumbnail = result.xpath('.//img')[0].attrib.get('src')
|
||||||
title = ''.join(result.xpath(title_xpath))
|
title = ''.join(result.xpath(title_xpath))
|
||||||
content = escape(''.join(result.xpath(content_xpath)))
|
content = ''.join(result.xpath(content_xpath))
|
||||||
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
|
pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')
|
||||||
publishedDate = parser.parse(pubdate)
|
publishedDate = parser.parse(pubdate)
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -43,7 +42,7 @@ def response(resp):
|
||||||
img_src = app.xpath('.//img/@src')[0]
|
img_src = app.xpath('.//img/@src')[0]
|
||||||
|
|
||||||
content = extract_text(app.xpath('./p')[0])
|
content = extract_text(app.xpath('./p')[0])
|
||||||
content = escape(content.replace(title, '', 1).strip())
|
content = content.replace(title, '', 1).strip()
|
||||||
|
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -77,21 +77,13 @@ def response(resp):
|
||||||
|
|
||||||
url = build_flickr_url(photo['owner'], photo['id'])
|
url = build_flickr_url(photo['owner'], photo['id'])
|
||||||
|
|
||||||
title = photo['title']
|
|
||||||
|
|
||||||
content = '<span class="photo-author">' +\
|
|
||||||
photo['ownername'] +\
|
|
||||||
'</span><br />' +\
|
|
||||||
'<span class="description">' +\
|
|
||||||
photo['description']['_content'] +\
|
|
||||||
'</span>'
|
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': photo['title'],
|
||||||
'img_src': img_src,
|
'img_src': img_src,
|
||||||
'thumbnail_src': thumbnail_src,
|
'thumbnail_src': thumbnail_src,
|
||||||
'content': content,
|
'content': content = photo['description']['_content'],
|
||||||
|
'author': photo['ownername'],
|
||||||
'template': 'images.html'})
|
'template': 'images.html'})
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
|
|
|
@ -102,16 +102,15 @@ def response(resp):
|
||||||
|
|
||||||
title = photo.get('title', '')
|
title = photo.get('title', '')
|
||||||
|
|
||||||
content = '<span class="photo-author">' +\
|
author = photo['username']
|
||||||
photo['username'] +\
|
|
||||||
'</span><br />'
|
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': url,
|
results.append({'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'img_src': img_src,
|
'img_src': img_src,
|
||||||
'thumbnail_src': thumbnail_src,
|
'thumbnail_src': thumbnail_src,
|
||||||
'content': content,
|
'content': '',
|
||||||
|
'author': author,
|
||||||
'template': 'images.html'})
|
'template': 'images.html'})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from random import randint
|
from random import randint
|
||||||
from time import time
|
from time import time
|
||||||
|
@ -78,8 +77,8 @@ def response(resp):
|
||||||
for result in response_json['results']:
|
for result in response_json['results']:
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': result['url'],
|
results.append({'url': result['url'],
|
||||||
'title': escape(result['title']),
|
'title': result['title'],
|
||||||
'content': escape(result['sum'])})
|
'content': result['sum']})
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -12,7 +12,6 @@
|
||||||
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from json import loads
|
from json import loads
|
||||||
from cgi import escape
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
@ -48,7 +47,7 @@ def response(resp):
|
||||||
url = res['html_url']
|
url = res['html_url']
|
||||||
|
|
||||||
if res['description']:
|
if res['description']:
|
||||||
content = escape(res['description'][:500])
|
content = res['description'][:500]
|
||||||
else:
|
else:
|
||||||
content = ''
|
content = ''
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
# @parse url, title, content, suggestion
|
# @parse url, title, content, suggestion
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urlparse import urlparse, parse_qsl
|
from urlparse import urlparse, parse_qsl
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
|
@ -155,7 +154,7 @@ def parse_url(url_string, google_hostname):
|
||||||
def extract_text_from_dom(result, xpath):
|
def extract_text_from_dom(result, xpath):
|
||||||
r = result.xpath(xpath)
|
r = result.xpath(xpath)
|
||||||
if len(r) > 0:
|
if len(r) > 0:
|
||||||
return escape(extract_text(r[0]))
|
return extract_text(r[0])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -264,7 +263,7 @@ def response(resp):
|
||||||
# parse suggestion
|
# parse suggestion
|
||||||
for suggestion in dom.xpath(suggestion_xpath):
|
for suggestion in dom.xpath(suggestion_xpath):
|
||||||
# append suggestion
|
# append suggestion
|
||||||
results.append({'suggestion': escape(extract_text(suggestion))})
|
results.append({'suggestion': extract_text(suggestion)})
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
@ -57,7 +56,7 @@ def response(resp):
|
||||||
link = result.xpath('.//a[@class="cellMainLink"]')[0]
|
link = result.xpath('.//a[@class="cellMainLink"]')[0]
|
||||||
href = urljoin(url, link.attrib['href'])
|
href = urljoin(url, link.attrib['href'])
|
||||||
title = extract_text(link)
|
title = extract_text(link)
|
||||||
content = escape(extract_text(result.xpath(content_xpath)))
|
content = extract_text(result.xpath(content_xpath))
|
||||||
seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
|
seed = extract_text(result.xpath('.//td[contains(@class, "green")]'))
|
||||||
leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
|
leech = extract_text(result.xpath('.//td[contains(@class, "red")]'))
|
||||||
filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]'))
|
filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]'))
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
@parse url, title, content, seed, leech, torrentfile
|
@parse url, title, content, seed, leech, torrentfile
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
@ -78,7 +77,7 @@ def response(resp):
|
||||||
|
|
||||||
# torrent title
|
# torrent title
|
||||||
page_a = result.xpath(xpath_title)[0]
|
page_a = result.xpath(xpath_title)[0]
|
||||||
title = escape(extract_text(page_a))
|
title = extract_text(page_a)
|
||||||
|
|
||||||
# link to the page
|
# link to the page
|
||||||
href = page_a.attrib.get('href')
|
href = page_a.attrib.get('href')
|
||||||
|
@ -90,7 +89,7 @@ def response(resp):
|
||||||
try:
|
try:
|
||||||
file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
|
file_size, suffix = result.xpath(xpath_filesize)[0].split(' ')
|
||||||
file_size = int(float(file_size) * get_filesize_mul(suffix))
|
file_size = int(float(file_size) * get_filesize_mul(suffix))
|
||||||
except Exception as e:
|
except:
|
||||||
file_size = None
|
file_size = None
|
||||||
|
|
||||||
# seed count
|
# seed count
|
||||||
|
@ -105,7 +104,6 @@ def response(resp):
|
||||||
# content string contains all information not included into template
|
# content string contains all information not included into template
|
||||||
content = 'Category: "{category}". Downloaded {downloads} times.'
|
content = 'Category: "{category}". Downloaded {downloads} times.'
|
||||||
content = content.format(category=category, downloads=downloads)
|
content = content.format(category=category, downloads=downloads)
|
||||||
content = escape(content)
|
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
# @parse url, title, content, seed, leech, magnetlink
|
# @parse url, title, content, seed, leech, magnetlink
|
||||||
|
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
@ -62,7 +61,7 @@ def response(resp):
|
||||||
link = result.xpath('.//div[@class="detName"]//a')[0]
|
link = result.xpath('.//div[@class="detName"]//a')[0]
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
href = urljoin(url, link.attrib.get('href'))
|
||||||
title = extract_text(link)
|
title = extract_text(link)
|
||||||
content = escape(extract_text(result.xpath(content_xpath)))
|
content = extract_text(result.xpath(content_xpath))
|
||||||
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
|
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
|
||||||
|
|
||||||
# convert seed to int if possible
|
# convert seed to int if possible
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urlparse import urlparse, urljoin
|
from urlparse import urlparse, urljoin
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
@ -68,7 +67,7 @@ def response(resp):
|
||||||
img_results.append(params)
|
img_results.append(params)
|
||||||
else:
|
else:
|
||||||
created = datetime.fromtimestamp(data['created_utc'])
|
created = datetime.fromtimestamp(data['created_utc'])
|
||||||
content = escape(data['selftext'])
|
content = data['selftext']
|
||||||
if len(content) > 500:
|
if len(content) > 500:
|
||||||
content = content[:500] + '...'
|
content = content[:500] + '...'
|
||||||
params['content'] = content
|
params['content'] = content
|
||||||
|
|
|
@ -44,20 +44,12 @@ def response(resp):
|
||||||
# parse results
|
# parse results
|
||||||
for result in search_results.get('results', []):
|
for result in search_results.get('results', []):
|
||||||
href = result['url']
|
href = result['url']
|
||||||
title = "[" + result['type'] + "] " +\
|
title = "[{}] {} {}".format(result['type'], result['namespace'], result['name'])
|
||||||
result['namespace'] +\
|
|
||||||
" " + result['name']
|
|
||||||
content = '<span class="highlight">[' +\
|
|
||||||
result['type'] + "] " +\
|
|
||||||
result['name'] + " " +\
|
|
||||||
result['synopsis'] +\
|
|
||||||
"</span><br />" +\
|
|
||||||
result['description']
|
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title,
|
'title': title,
|
||||||
'content': content})
|
'content': result['description']})
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
# @parse url, title, content, seed, leech, magnetlink
|
# @parse url, title, content, seed, leech, magnetlink
|
||||||
|
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
|
@ -46,10 +46,11 @@ def response(resp):
|
||||||
if result['type'] == 'track':
|
if result['type'] == 'track':
|
||||||
title = result['name']
|
title = result['name']
|
||||||
url = result['external_urls']['spotify']
|
url = result['external_urls']['spotify']
|
||||||
content = result['artists'][0]['name'] +\
|
content = '{} - {} - {}'.format(
|
||||||
" • " +\
|
result['artists'][0]['name'],
|
||||||
result['album']['name'] +\
|
result['album']['name'],
|
||||||
" • " + result['name']
|
result['name'])
|
||||||
|
|
||||||
embedded = embedded_url.format(audioid=result['id'])
|
embedded = embedded_url.format(audioid=result['id'])
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
@ -48,8 +47,8 @@ def response(resp):
|
||||||
for result in dom.xpath(results_xpath):
|
for result in dom.xpath(results_xpath):
|
||||||
link = result.xpath(link_xpath)[0]
|
link = result.xpath(link_xpath)[0]
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
href = urljoin(url, link.attrib.get('href'))
|
||||||
title = escape(extract_text(link))
|
title = extract_text(link)
|
||||||
content = escape(extract_text(result.xpath(content_xpath)))
|
content = extract_text(result.xpath(content_xpath))
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
# @todo paging
|
# @todo paging
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from cgi import escape
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import re
|
import re
|
||||||
|
@ -79,10 +78,10 @@ def response(resp):
|
||||||
if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
|
if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
title = escape(extract_text(link))
|
title = extract_text(link)
|
||||||
|
|
||||||
if result.xpath('./p[@class="desc clk"]'):
|
if result.xpath('./p[@class="desc clk"]'):
|
||||||
content = escape(extract_text(result.xpath('./p[@class="desc clk"]')))
|
content = extract_text(result.xpath('./p[@class="desc clk"]'))
|
||||||
else:
|
else:
|
||||||
content = ''
|
content = ''
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import quote_plus
|
from urllib import quote_plus
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
|
@ -59,7 +58,7 @@ def response(resp):
|
||||||
elif search_lang:
|
elif search_lang:
|
||||||
href = href + search_lang + '/'
|
href = href + search_lang + '/'
|
||||||
|
|
||||||
title = escape(extract_text(link))
|
title = extract_text(link)
|
||||||
|
|
||||||
content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
|
content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
|
||||||
content = content + " - "
|
content = content + " - "
|
||||||
|
@ -75,7 +74,7 @@ def response(resp):
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title,
|
'title': title,
|
||||||
'content': escape(content)})
|
'content': content})
|
||||||
|
|
||||||
# return results
|
# return results
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from urllib import urlencode, unquote
|
from urllib import urlencode, unquote
|
||||||
import re
|
import re
|
||||||
|
@ -78,7 +77,7 @@ def response(resp):
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': result['SourceUrl'],
|
results.append({'url': result['SourceUrl'],
|
||||||
'title': escape(result['Title']),
|
'title': result['Title'],
|
||||||
'content': '',
|
'content': '',
|
||||||
'img_src': img_url,
|
'img_src': img_url,
|
||||||
'template': 'images.html'})
|
'template': 'images.html'})
|
||||||
|
@ -90,8 +89,8 @@ def response(resp):
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': result_url,
|
results.append({'url': result_url,
|
||||||
'title': escape(result_title),
|
'title': result_title,
|
||||||
'content': escape(result_content)})
|
'content': result_content})
|
||||||
|
|
||||||
# parse images
|
# parse images
|
||||||
for result in json.get('Images', []):
|
for result in json.get('Images', []):
|
||||||
|
@ -100,7 +99,7 @@ def response(resp):
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append({'url': result['SourceUrl'],
|
results.append({'url': result['SourceUrl'],
|
||||||
'title': escape(result['Title']),
|
'title': result['Title'],
|
||||||
'content': '',
|
'content': '',
|
||||||
'img_src': img_url,
|
'img_src': img_url,
|
||||||
'template': 'images.html'})
|
'template': 'images.html'})
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
|
|
@ -12,7 +12,6 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
from cgi import escape
|
|
||||||
from searx.utils import is_valid_lang
|
from searx.utils import is_valid_lang
|
||||||
|
|
||||||
categories = ['general']
|
categories = ['general']
|
||||||
|
@ -52,14 +51,14 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
results.append({
|
results.append({
|
||||||
'url': escape(web_url.format(
|
'url': web_url.format(
|
||||||
from_lang=resp.search_params['from_lang'][2],
|
from_lang=resp.search_params['from_lang'][2],
|
||||||
to_lang=resp.search_params['to_lang'][2],
|
to_lang=resp.search_params['to_lang'][2],
|
||||||
query=resp.search_params['query'])),
|
query=resp.search_params['query']),
|
||||||
'title': escape('[{0}-{1}] {2}'.format(
|
'title': '[{0}-{1}] {2}'.format(
|
||||||
resp.search_params['from_lang'][1],
|
resp.search_params['from_lang'][1],
|
||||||
resp.search_params['to_lang'][1],
|
resp.search_params['to_lang'][1],
|
||||||
resp.search_params['query'])),
|
resp.search_params['query']),
|
||||||
'content': escape(resp.json()['responseData']['translatedText'])
|
'content': resp.json()['responseData']['translatedText']
|
||||||
})
|
})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
# @stable no
|
# @stable no
|
||||||
# @parse url, infobox
|
# @parse url, infobox
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from json import loads
|
from json import loads
|
||||||
from time import time
|
from time import time
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
@parse url, title, content
|
@parse url, title, content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from cgi import escape
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.search import logger
|
from searx.search import logger
|
||||||
|
@ -52,8 +51,8 @@ def response(resp):
|
||||||
for result in dom.xpath(results_xpath):
|
for result in dom.xpath(results_xpath):
|
||||||
try:
|
try:
|
||||||
res = {'url': result.xpath(url_xpath)[0],
|
res = {'url': result.xpath(url_xpath)[0],
|
||||||
'title': escape(''.join(result.xpath(title_xpath))),
|
'title': ''.join(result.xpath(title_xpath)),
|
||||||
'content': escape(''.join(result.xpath(content_xpath)))}
|
'content': ''.join(result.xpath(content_xpath))}
|
||||||
except:
|
except:
|
||||||
logger.exception('yandex parse crash')
|
logger.exception('yandex parse crash')
|
||||||
continue
|
continue
|
||||||
|
|
Loading…
Reference in New Issue