searxng/searx/engines/dailymotion.py

from urllib import urlencode
from json import loads
from lxml import html

categories = ['videos']
locale = 'en_US'

# see http://www.dailymotion.com/doc/api/obj-video.html
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page={pageno}&{query}'  # noqa

# TODO use video result template
content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'

paging = True


def request(query, params):
    params['url'] = search_url.format(
        query=urlencode({'search': query, 'localization': locale}),
        pageno=params['pageno'])
    return params


def response(resp):
    results = []
    search_res = loads(resp.text)
    if not 'list' in search_res:
        return results
    for res in search_res['list']:
        title = res['title']
        url = res['url']
        if res['thumbnail_360_url']:
            content = content_tpl.format(url, res['thumbnail_360_url'])
        else:
            content = ''
        if res['description']:
            description = text_content_from_html(res['description'])
            content += description[:500]
        results.append({'url': url, 'title': title, 'content': content})
    return results


def text_content_from_html(html_string):
    desc_html = html.fragment_fromstring(html_string, create_parent=True)
    return desc_html.text_content()
add dailymotion engine 2013-12-30 21:42:37 +00:00			`from urllib import urlencode`
			`from json import loads`
Improves PEP8 compatibility. 2014-02-05 19:24:31 +00:00			`from lxml import html`
add dailymotion engine 2013-12-30 21:42:37 +00:00
			`categories = ['videos']`
[fix] dailymotion engine : no more html tag in the description 2014-01-05 12:55:17 +00:00			`locale = 'en_US'`
add dailymotion engine 2013-12-30 21:42:37 +00:00
			`# see http://www.dailymotion.com/doc/api/obj-video.html`
[enh] paging support for dailymotion 2014-01-29 23:01:42 +00:00			`search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page={pageno}&{query}' # noqa`
[fix] pep/flake8 compatibility 2014-01-20 01:31:20 +00:00
			`# TODO use video result template`
			`content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'`
add dailymotion engine 2013-12-30 21:42:37 +00:00
[enh] paging support for dailymotion 2014-01-29 23:01:42 +00:00			`paging = True`

fix: robot fw, entry points, some flake8, package searx egg 2014-01-19 21:59:01 +00:00
add dailymotion engine 2013-12-30 21:42:37 +00:00			`def request(query, params):`
fix: robot fw, entry points, some flake8, package searx egg 2014-01-19 21:59:01 +00:00			`params['url'] = search_url.format(`
[enh] paging support for dailymotion 2014-01-29 23:01:42 +00:00			`query=urlencode({'search': query, 'localization': locale}),`
			`pageno=params['pageno'])`
add dailymotion engine 2013-12-30 21:42:37 +00:00			`return params`


			`def response(resp):`
			`results = []`
			`search_res = loads(resp.text)`
			`if not 'list' in search_res:`
			`return results`
			`for res in search_res['list']:`
			`title = res['title']`
			`url = res['url']`
			`if res['thumbnail_360_url']:`
[fix] pep/flake8 compatibility 2014-01-20 01:31:20 +00:00			`content = content_tpl.format(url, res['thumbnail_360_url'])`
add dailymotion engine 2013-12-30 21:42:37 +00:00			`else:`
			`content = ''`
			`if res['description']:`
[fix] dailymotion engine : no more html tag in the description 2014-01-05 12:55:17 +00:00			`description = text_content_from_html(res['description'])`
			`content += description[:500]`
add dailymotion engine 2013-12-30 21:42:37 +00:00			`results.append({'url': url, 'title': title, 'content': content})`
			`return results`
[fix] dailymotion engine : no more html tag in the description 2014-01-05 12:55:17 +00:00
fix: robot fw, entry points, some flake8, package searx egg 2014-01-19 21:59:01 +00:00
[fix] dailymotion engine : no more html tag in the description 2014-01-05 12:55:17 +00:00			`def text_content_from_html(html_string):`
			`desc_html = html.fragment_fromstring(html_string, create_parent=True)`
			`return desc_html.text_content()`