mirror of https://github.com/searxng/searxng.git
Merge pull request #639 from kvch/digbt-engine
add digbt engine - fixes #638
This commit is contained in:
commit
13bed1f872
|
@ -16,6 +16,7 @@ from urllib import quote
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from searx.engines.xpath import extract_text
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.utils import get_torrent_size
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['videos', 'music', 'files']
|
categories = ['videos', 'music', 'files']
|
||||||
|
@ -68,20 +69,7 @@ def response(resp):
|
||||||
leech = 0
|
leech = 0
|
||||||
|
|
||||||
# convert filesize to byte if possible
|
# convert filesize to byte if possible
|
||||||
try:
|
filesize = get_torrent_size(filesize, filesize_multiplier)
|
||||||
filesize = float(filesize)
|
|
||||||
|
|
||||||
# convert filesize to byte
|
|
||||||
if filesize_multiplier == 'TB':
|
|
||||||
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'GB':
|
|
||||||
filesize = int(filesize * 1024 * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'MB':
|
|
||||||
filesize = int(filesize * 1024 * 1024)
|
|
||||||
elif filesize_multiplier == 'KB':
|
|
||||||
filesize = int(filesize * 1024)
|
|
||||||
except:
|
|
||||||
filesize = None
|
|
||||||
|
|
||||||
# convert files to int if possible
|
# convert files to int if possible
|
||||||
if files.isdigit():
|
if files.isdigit():
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
"""
|
||||||
|
DigBT (Videos, Music, Files)
|
||||||
|
|
||||||
|
@website https://digbt.org
|
||||||
|
@provide-api no
|
||||||
|
|
||||||
|
@using-api no
|
||||||
|
@results HTML (using search portal)
|
||||||
|
@stable no (HTML can change)
|
||||||
|
@parse url, title, content, magnetlink
|
||||||
|
"""
|
||||||
|
|
||||||
|
from urlparse import urljoin
|
||||||
|
from lxml import html
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
from searx.utils import get_torrent_size
|
||||||
|
|
||||||
|
categories = ['videos', 'music', 'files']
|
||||||
|
paging = True
|
||||||
|
|
||||||
|
URL = 'https://digbt.org'
|
||||||
|
SEARCH_URL = URL + '/search/{query}-time-{pageno}'
|
||||||
|
FILESIZE = 3
|
||||||
|
FILESIZE_MULTIPLIER = 4
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp):
|
||||||
|
dom = html.fromstring(resp.content)
|
||||||
|
search_res = dom.xpath('.//td[@class="x-item"]')
|
||||||
|
|
||||||
|
if not search_res:
|
||||||
|
return list()
|
||||||
|
|
||||||
|
results = list()
|
||||||
|
for result in search_res:
|
||||||
|
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
|
||||||
|
title = result.xpath('.//a[@title]/text()')[0]
|
||||||
|
content = extract_text(result.xpath('.//div[@class="files"]'))
|
||||||
|
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
|
||||||
|
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
|
||||||
|
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
|
||||||
|
|
||||||
|
results.append({'url': url,
|
||||||
|
'title': title,
|
||||||
|
'content': content,
|
||||||
|
'filesize': filesize,
|
||||||
|
'magnetlink': magnetlink,
|
||||||
|
'seed': 'N/A',
|
||||||
|
'leech': 'N/A',
|
||||||
|
'template': 'torrent.html'})
|
||||||
|
|
||||||
|
return results
|
|
@ -87,7 +87,7 @@ engines:
|
||||||
- name : btdigg
|
- name : btdigg
|
||||||
engine : btdigg
|
engine : btdigg
|
||||||
shortcut : bt
|
shortcut : bt
|
||||||
|
|
||||||
- name : crossref
|
- name : crossref
|
||||||
engine : json_engine
|
engine : json_engine
|
||||||
paging : True
|
paging : True
|
||||||
|
@ -118,6 +118,12 @@ engines:
|
||||||
weight : 2
|
weight : 2
|
||||||
disabled : True
|
disabled : True
|
||||||
|
|
||||||
|
- name : digbt
|
||||||
|
engine : digbt
|
||||||
|
shortcut : dbt
|
||||||
|
timeout : 6.0
|
||||||
|
disabled : True
|
||||||
|
|
||||||
- name : digg
|
- name : digg
|
||||||
engine : digg
|
engine : digg
|
||||||
shortcut : dg
|
shortcut : dg
|
||||||
|
|
|
@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
|
||||||
return a_list[index]
|
return a_list[index]
|
||||||
else:
|
else:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def get_torrent_size(filesize, filesize_multiplier):
|
||||||
|
try:
|
||||||
|
filesize = float(filesize)
|
||||||
|
|
||||||
|
if filesize_multiplier == 'TB':
|
||||||
|
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
|
||||||
|
elif filesize_multiplier == 'GB':
|
||||||
|
filesize = int(filesize * 1024 * 1024 * 1024)
|
||||||
|
elif filesize_multiplier == 'MB':
|
||||||
|
filesize = int(filesize * 1024 * 1024)
|
||||||
|
elif filesize_multiplier == 'KB':
|
||||||
|
filesize = int(filesize * 1024)
|
||||||
|
except:
|
||||||
|
filesize = None
|
||||||
|
|
||||||
|
return filesize
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
from collections import defaultdict
|
||||||
|
import mock
|
||||||
|
from searx.engines import digbt
|
||||||
|
from searx.testing import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestDigBTEngine(SearxTestCase):
|
||||||
|
|
||||||
|
def test_request(self):
|
||||||
|
query = 'test_query'
|
||||||
|
dicto = defaultdict(dict)
|
||||||
|
dicto['pageno'] = 0
|
||||||
|
params = digbt.request(query, dicto)
|
||||||
|
self.assertIn('url', params)
|
||||||
|
self.assertIn(query, params['url'])
|
||||||
|
self.assertIn('digbt.org', params['url'])
|
||||||
|
|
||||||
|
def test_response(self):
|
||||||
|
self.assertRaises(AttributeError, digbt.response, None)
|
||||||
|
self.assertRaises(AttributeError, digbt.response, [])
|
||||||
|
self.assertRaises(AttributeError, digbt.response, '')
|
||||||
|
self.assertRaises(AttributeError, digbt.response, '[]')
|
||||||
|
|
||||||
|
response = mock.Mock(content='<html></html>')
|
||||||
|
self.assertEqual(digbt.response(response), [])
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<table class="table">
|
||||||
|
<tr><td class="x-item">
|
||||||
|
<div>
|
||||||
|
<a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a>
|
||||||
|
<span class="ctime"><span style="color:red;">4 hours ago</span></span>
|
||||||
|
</div>
|
||||||
|
<div class="files">
|
||||||
|
<ul>
|
||||||
|
<li>The Big Bang Theory 2.9 GB</li>
|
||||||
|
<li>....</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="tail">
|
||||||
|
Files: 1 Size: 2.9 GB Downloads: 1 Updated: <span style="color:red;">4 hours ago</span>
|
||||||
|
|
||||||
|
<a class="title" href="magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory">
|
||||||
|
<span class="glyphicon glyphicon-magnet"></span> magnet-link
|
||||||
|
</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</td></tr>
|
||||||
|
</table>
|
||||||
|
"""
|
||||||
|
response = mock.Mock(content=html)
|
||||||
|
results = digbt.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 1)
|
||||||
|
self.assertEqual(results[0]['title'], 'The Big Bang Theory')
|
||||||
|
self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html')
|
||||||
|
self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....')
|
||||||
|
self.assertEqual(results[0]['filesize'], 3113851289)
|
||||||
|
self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')
|
Loading…
Reference in New Issue