mirror of https://github.com/searxng/searxng.git
Merge branch 'master' of https://github.com/asciimoo/searx
This commit is contained in:
commit
132681b3aa
|
@ -1,20 +1,24 @@
|
||||||
env
|
|
||||||
engines.cfg
|
|
||||||
.installed.cfg
|
|
||||||
.coverage
|
.coverage
|
||||||
covearge/
|
.installed.cfg
|
||||||
|
engines.cfg
|
||||||
|
env
|
||||||
|
robot_log.html
|
||||||
|
robot_output.xml
|
||||||
|
robot_report.html
|
||||||
setup.cfg
|
setup.cfg
|
||||||
|
|
||||||
*.pyc
|
*.pyc
|
||||||
*/*.pyc
|
*/*.pyc
|
||||||
|
|
||||||
bin/
|
bin/
|
||||||
|
build/
|
||||||
|
covearge/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
eggs/
|
||||||
include/
|
include/
|
||||||
lib/
|
lib/
|
||||||
build/
|
|
||||||
develop-eggs/
|
|
||||||
eggs/
|
|
||||||
local/
|
local/
|
||||||
searx.egg-info/
|
|
||||||
parts/
|
parts/
|
||||||
|
searx.egg-info/
|
||||||
var/
|
var/
|
||||||
|
|
15
Makefile
15
Makefile
|
@ -21,11 +21,7 @@ $(python):
|
||||||
tests: .installed.cfg
|
tests: .installed.cfg
|
||||||
@bin/test
|
@bin/test
|
||||||
|
|
||||||
enginescfg:
|
robot: .installed.cfg
|
||||||
@test -f ./engines.cfg || echo "Copying engines.cfg ..."
|
|
||||||
@cp --no-clobber engines.cfg_sample engines.cfg
|
|
||||||
|
|
||||||
robot: .installed.cfg enginescfg
|
|
||||||
@bin/robot
|
@bin/robot
|
||||||
|
|
||||||
flake8: .installed.cfg
|
flake8: .installed.cfg
|
||||||
|
@ -37,18 +33,21 @@ coverage: .installed.cfg
|
||||||
@bin/coverage report --show-missing
|
@bin/coverage report --show-missing
|
||||||
@bin/coverage html --directory ./coverage
|
@bin/coverage html --directory ./coverage
|
||||||
|
|
||||||
production: bin/buildout production.cfg setup.py enginescfg
|
production: bin/buildout production.cfg setup.py
|
||||||
bin/buildout -c production.cfg $(options)
|
bin/buildout -c production.cfg $(options)
|
||||||
@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
|
@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
|
||||||
@echo "* Hint 1: on production, disable debug mode and change secret_key"
|
@echo "* Hint 1: on production, disable debug mode and change secret_key"
|
||||||
@echo "* Hint 2: searx will be executed at server startup by crontab"
|
@echo "* Hint 2: searx will be executed at server startup by crontab"
|
||||||
@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
|
@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
|
||||||
|
|
||||||
minimal: bin/buildout minimal.cfg setup.py enginescfg
|
minimal: bin/buildout minimal.cfg setup.py
|
||||||
bin/buildout -c minimal.cfg $(options)
|
bin/buildout -c minimal.cfg $(options)
|
||||||
|
|
||||||
|
locales:
|
||||||
|
@pybabel compile -d searx/translations
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
|
@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
|
||||||
searx.egg-info lib include .coverage coverage
|
searx.egg-info lib include .coverage coverage
|
||||||
|
|
||||||
.PHONY: all tests enginescfg robot flake8 coverage production minimal clean
|
.PHONY: all tests robot flake8 coverage production minimal locales clean
|
||||||
|
|
122
README.md
122
README.md
|
@ -1,122 +0,0 @@
|
||||||
searx
|
|
||||||
=====
|
|
||||||
|
|
||||||
A privacy-respecting, hackable [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engine).
|
|
||||||
|
|
||||||
List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instances).
|
|
||||||
|
|
||||||
[![Flattr searx](http://api.flattr.com/button/flattr-badge-large.png)](https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software)
|
|
||||||
|
|
||||||
|
|
||||||
### Features
|
|
||||||
|
|
||||||
* Tracking free
|
|
||||||
* Modular (see [examples](https://github.com/asciimoo/searx/blob/master/examples))
|
|
||||||
* Parallel queries
|
|
||||||
* Supports multiple output formats
|
|
||||||
* json `curl https://searx.0x2a.tk/?format=json&q=[query]`
|
|
||||||
* csv `curl https://searx.0x2a.tk/?format=csv&q=[query]`
|
|
||||||
* opensearch/rss `curl https://searx.0x2a.tk/?format=rss&q=[query]`
|
|
||||||
* Opensearch support (you can set as default search engine)
|
|
||||||
* Configurable search engines/categories
|
|
||||||
|
|
||||||
|
|
||||||
### Installation
|
|
||||||
|
|
||||||
* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
|
|
||||||
* install dependencies: `pip install -r requirements.txt`
|
|
||||||
* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
|
|
||||||
* run `python searx/webapp.py` to start the application
|
|
||||||
|
|
||||||
For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)
|
|
||||||
|
|
||||||
|
|
||||||
### Alternative (Recommended) Installation
|
|
||||||
|
|
||||||
* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
|
|
||||||
* build in current folder: `make minimal`
|
|
||||||
* run `bin/searx-run` to start the application
|
|
||||||
|
|
||||||
|
|
||||||
### Development
|
|
||||||
|
|
||||||
Just run `make`. Versions of dependencies are pinned down inside `versions.cfg` to produce most stable build. Also remember, NO make command should be run as root, not even `make production`
|
|
||||||
|
|
||||||
|
|
||||||
### Deployment
|
|
||||||
|
|
||||||
* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
|
|
||||||
* build in current folder: `make production`
|
|
||||||
* run `bin/supervisord` to start the application
|
|
||||||
|
|
||||||
|
|
||||||
### Upgrading
|
|
||||||
|
|
||||||
* inside previously cloned searx directory run: `git stash` to temporarily save any changes you have made
|
|
||||||
* pull source: `git pull origin master`
|
|
||||||
* re-build in current folder: `make production`
|
|
||||||
* run `bin/supervisorctl stop searx` to stop searx, if it does not, then run `fuser -k 8888/tcp`
|
|
||||||
* run `bin/supervisorctl reload` to re-read supervisor config and start searx
|
|
||||||
|
|
||||||
|
|
||||||
### Command make
|
|
||||||
|
|
||||||
##### `make`
|
|
||||||
|
|
||||||
Builds development environment with testing support.
|
|
||||||
|
|
||||||
##### `make tests`
|
|
||||||
|
|
||||||
Runs tests. You can write tests [here](https://github.com/asciimoo/searx/tree/master/searx/tests) and remember 'untested code is broken code'.
|
|
||||||
|
|
||||||
##### `make robot`
|
|
||||||
|
|
||||||
Runs robot (Selenium) tests, you must have `firefox` installed because this functional tests actually run the browser and perform operations on it. Also searx is executed with [settings_robot](https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py).
|
|
||||||
|
|
||||||
##### `make flake8`
|
|
||||||
|
|
||||||
'pep8 is a tool to check your Python code against some of the style conventions in [PEP 8](http://www.python.org/dev/peps/pep-0008/).'
|
|
||||||
|
|
||||||
##### `make coverage`
|
|
||||||
|
|
||||||
Checks coverage of tests, after running this, execute this: `firefox ./coverage/index.html`
|
|
||||||
|
|
||||||
##### `make production`
|
|
||||||
|
|
||||||
Used to make co-called production environment - without tests (you should ran tests before deploying searx on the server). This installs supervisord, so if searx crashes, it will try to pick itself up again. And crontab entry is added to start supervisord at server boot.
|
|
||||||
|
|
||||||
##### `make minimal`
|
|
||||||
|
|
||||||
Minimal build - without test frameworks, the quickest build option.
|
|
||||||
|
|
||||||
##### `make clean`
|
|
||||||
|
|
||||||
Deletes several folders and files (see `Makefile` for more), so that next time you run any other `make` command it will rebuild everithing.
|
|
||||||
|
|
||||||
|
|
||||||
### TODO
|
|
||||||
|
|
||||||
* Moar engines
|
|
||||||
* Better ui
|
|
||||||
* Language support
|
|
||||||
* Documentation
|
|
||||||
* Pagination
|
|
||||||
* Fix `flake8` errors, `make flake8` will be merged into `make tests` when it does not fail anymore
|
|
||||||
* Tests
|
|
||||||
* When we have more tests, we can integrate Travis-CI
|
|
||||||
|
|
||||||
|
|
||||||
### Bugs
|
|
||||||
|
|
||||||
Bugs or suggestions? Visit the [issue tracker](https://github.com/asciimoo/searx/issues).
|
|
||||||
|
|
||||||
|
|
||||||
### [License](https://github.com/asciimoo/searx/blob/master/LICENSE)
|
|
||||||
|
|
||||||
|
|
||||||
### More about searx
|
|
||||||
|
|
||||||
* [ohloh](https://www.ohloh.net/p/searx/)
|
|
||||||
* [twitter](https://twitter.com/Searx_engine)
|
|
||||||
* IRC: #searx @ freenode
|
|
||||||
|
|
|
@ -0,0 +1,159 @@
|
||||||
|
searx
|
||||||
|
=====
|
||||||
|
|
||||||
|
A privacy-respecting, hackable `metasearch
|
||||||
|
engine <https://en.wikipedia.org/wiki/Metasearch_engine>`__.
|
||||||
|
|
||||||
|
List of `running
|
||||||
|
instances <https://github.com/asciimoo/searx/wiki/Searx-instances>`__.
|
||||||
|
|
||||||
|
|Flattr searx|
|
||||||
|
|
||||||
|
Features
|
||||||
|
~~~~~~~~
|
||||||
|
|
||||||
|
- Tracking free
|
||||||
|
- Modular (see
|
||||||
|
`examples <https://github.com/asciimoo/searx/blob/master/examples>`__)
|
||||||
|
- Parallel queries
|
||||||
|
- Supports multiple output formats
|
||||||
|
- json ``curl https://searx.0x2a.tk/?format=json&q=[query]``
|
||||||
|
- csv ``curl https://searx.0x2a.tk/?format=csv&q=[query]``
|
||||||
|
- opensearch/rss ``curl https://searx.0x2a.tk/?format=rss&q=[query]``
|
||||||
|
- Opensearch support (you can set as default search engine)
|
||||||
|
- Configurable search engines/categories
|
||||||
|
|
||||||
|
Installation
|
||||||
|
~~~~~~~~~~~~
|
||||||
|
|
||||||
|
- clone source:
|
||||||
|
``git clone git@github.com:asciimoo/searx.git && cd searx``
|
||||||
|
- install dependencies: ``pip install -r requirements.txt``
|
||||||
|
- edit your
|
||||||
|
`settings.yml <https://github.com/asciimoo/searx/blob/master/settings.yml>`__
|
||||||
|
(set your ``secret_key``!)
|
||||||
|
- run ``python searx/webapp.py`` to start the application
|
||||||
|
|
||||||
|
For all the details, follow this `step by step
|
||||||
|
installation <https://github.com/asciimoo/searx/wiki/Installation>`__
|
||||||
|
|
||||||
|
Alternative (Recommended) Installation
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
- clone source:
|
||||||
|
``git clone git@github.com:asciimoo/searx.git && cd searx``
|
||||||
|
- build in current folder: ``make minimal``
|
||||||
|
- run ``bin/searx-run`` to start the application
|
||||||
|
|
||||||
|
Development
|
||||||
|
~~~~~~~~~~~
|
||||||
|
|
||||||
|
Just run ``make``. Versions of dependencies are pinned down inside
|
||||||
|
``versions.cfg`` to produce most stable build. Also remember, NO make
|
||||||
|
command should be run as root, not even ``make production``
|
||||||
|
|
||||||
|
Deployment
|
||||||
|
~~~~~~~~~~
|
||||||
|
|
||||||
|
- clone source:
|
||||||
|
``git clone git@github.com:asciimoo/searx.git && cd searx``
|
||||||
|
- build in current folder: ``make production``
|
||||||
|
- run ``bin/supervisord`` to start the application
|
||||||
|
|
||||||
|
Upgrading
|
||||||
|
~~~~~~~~~
|
||||||
|
|
||||||
|
- inside previously cloned searx directory run: ``git stash`` to
|
||||||
|
temporarily save any changes you have made
|
||||||
|
- pull source: ``git pull origin master``
|
||||||
|
- re-build in current folder: ``make production``
|
||||||
|
- run ``bin/supervisorctl stop searx`` to stop searx, if it does not,
|
||||||
|
then run ``fuser -k 8888/tcp``
|
||||||
|
- run ``bin/supervisorctl reload`` to re-read supervisor config and
|
||||||
|
start searx
|
||||||
|
|
||||||
|
Command make
|
||||||
|
~~~~~~~~~~~~
|
||||||
|
|
||||||
|
``make``
|
||||||
|
''''''''
|
||||||
|
|
||||||
|
Builds development environment with testing support.
|
||||||
|
|
||||||
|
``make tests``
|
||||||
|
''''''''''''''
|
||||||
|
|
||||||
|
Runs tests. You can write tests
|
||||||
|
`here <https://github.com/asciimoo/searx/tree/master/searx/tests>`__ and
|
||||||
|
remember 'untested code is broken code'.
|
||||||
|
|
||||||
|
``make robot``
|
||||||
|
''''''''''''''
|
||||||
|
|
||||||
|
Runs robot (Selenium) tests, you must have ``firefox`` installed because
|
||||||
|
this functional tests actually run the browser and perform operations on
|
||||||
|
it. Also searx is executed with
|
||||||
|
`settings\_robot <https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py>`__.
|
||||||
|
|
||||||
|
``make flake8``
|
||||||
|
'''''''''''''''
|
||||||
|
|
||||||
|
'pep8 is a tool to check your Python code against some of the style
|
||||||
|
conventions in `PEP 8 <http://www.python.org/dev/peps/pep-0008/>`__.'
|
||||||
|
|
||||||
|
``make coverage``
|
||||||
|
'''''''''''''''''
|
||||||
|
|
||||||
|
Checks coverage of tests, after running this, execute this:
|
||||||
|
``firefox ./coverage/index.html``
|
||||||
|
|
||||||
|
``make production``
|
||||||
|
'''''''''''''''''''
|
||||||
|
|
||||||
|
Used to make co-called production environment - without tests (you
|
||||||
|
should ran tests before deploying searx on the server). This installs
|
||||||
|
supervisord, so if searx crashes, it will try to pick itself up again.
|
||||||
|
And crontab entry is added to start supervisord at server boot.
|
||||||
|
|
||||||
|
``make minimal``
|
||||||
|
''''''''''''''''
|
||||||
|
|
||||||
|
Minimal build - without test frameworks, the quickest build option.
|
||||||
|
|
||||||
|
``make clean``
|
||||||
|
''''''''''''''
|
||||||
|
|
||||||
|
Deletes several folders and files (see ``Makefile`` for more), so that
|
||||||
|
next time you run any other ``make`` command it will rebuild everithing.
|
||||||
|
|
||||||
|
TODO
|
||||||
|
~~~~
|
||||||
|
|
||||||
|
- Moar engines
|
||||||
|
- Better ui
|
||||||
|
- Language support
|
||||||
|
- Documentation
|
||||||
|
- Pagination
|
||||||
|
- Fix ``flake8`` errors, ``make flake8`` will be merged into
|
||||||
|
``make tests`` when it does not fail anymore
|
||||||
|
- Tests
|
||||||
|
- When we have more tests, we can integrate Travis-CI
|
||||||
|
|
||||||
|
Bugs
|
||||||
|
~~~~
|
||||||
|
|
||||||
|
Bugs or suggestions? Visit the `issue
|
||||||
|
tracker <https://github.com/asciimoo/searx/issues>`__.
|
||||||
|
|
||||||
|
`License <https://github.com/asciimoo/searx/blob/master/LICENSE>`__
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
More about searx
|
||||||
|
~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
- `ohloh <https://www.ohloh.net/p/searx/>`__
|
||||||
|
- `twitter <https://twitter.com/Searx_engine>`__
|
||||||
|
- IRC: #searx @ freenode
|
||||||
|
|
||||||
|
.. |Flattr searx| image:: http://api.flattr.com/button/flattr-badge-large.png
|
||||||
|
:target: https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software
|
|
@ -0,0 +1,3 @@
|
||||||
|
[python: **.py]
|
||||||
|
[jinja2: **/templates/**.html]
|
||||||
|
extensions=jinja2.ext.autoescape,jinja2.ext.with_
|
|
@ -16,8 +16,6 @@ recipe = zc.recipe.egg:script
|
||||||
eggs = ${buildout:eggs}
|
eggs = ${buildout:eggs}
|
||||||
interpreter = py
|
interpreter = py
|
||||||
dependent-scripts = true
|
dependent-scripts = true
|
||||||
entry-points =
|
|
||||||
searx-run=searx.webapp:run
|
|
||||||
|
|
||||||
|
|
||||||
[robot]
|
[robot]
|
||||||
|
|
|
@ -13,5 +13,3 @@ parts +=
|
||||||
recipe = zc.recipe.egg:script
|
recipe = zc.recipe.egg:script
|
||||||
eggs = ${buildout:eggs}
|
eggs = ${buildout:eggs}
|
||||||
interpreter = py
|
interpreter = py
|
||||||
entry-points =
|
|
||||||
searx-run=searx.webapp:run
|
|
||||||
|
|
|
@ -15,8 +15,6 @@ parts +=
|
||||||
recipe = zc.recipe.egg:script
|
recipe = zc.recipe.egg:script
|
||||||
eggs = ${buildout:eggs}
|
eggs = ${buildout:eggs}
|
||||||
interpreter = py
|
interpreter = py
|
||||||
entry-points =
|
|
||||||
searx-run=searx.webapp:run
|
|
||||||
|
|
||||||
|
|
||||||
[supervisor]
|
[supervisor]
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
flask
|
flask
|
||||||
|
flask-babel
|
||||||
grequests
|
grequests
|
||||||
lxml
|
lxml
|
||||||
pyyaml
|
pyyaml
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from os import environ
|
from os import environ
|
||||||
from os.path import realpath, dirname, join
|
from os.path import realpath, dirname, join, abspath
|
||||||
try:
|
try:
|
||||||
from yaml import load
|
from yaml import load
|
||||||
except:
|
except:
|
||||||
|
@ -7,8 +7,7 @@ except:
|
||||||
stderr.write('[E] install pyyaml\n')
|
stderr.write('[E] install pyyaml\n')
|
||||||
exit(2)
|
exit(2)
|
||||||
|
|
||||||
|
searx_dir = abspath(dirname(__file__))
|
||||||
searx_dir = realpath(dirname(realpath(__file__))+'/../')
|
|
||||||
engine_dir = dirname(realpath(__file__))
|
engine_dir = dirname(realpath(__file__))
|
||||||
|
|
||||||
if 'SEARX_SETTINGS_PATH' in environ:
|
if 'SEARX_SETTINGS_PATH' in environ:
|
||||||
|
@ -19,4 +18,3 @@ else:
|
||||||
|
|
||||||
with open(settings_path) as settings_yaml:
|
with open(settings_path) as settings_yaml:
|
||||||
settings = load(settings_yaml)
|
settings = load(settings_yaml)
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ from searx import settings
|
||||||
from searx.utils import gen_useragent
|
from searx.utils import gen_useragent
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from flask.ext.babel import gettext
|
||||||
|
|
||||||
engine_dir = dirname(realpath(__file__))
|
engine_dir = dirname(realpath(__file__))
|
||||||
|
|
||||||
|
@ -35,6 +36,7 @@ engines = {}
|
||||||
|
|
||||||
categories = {'general': []}
|
categories = {'general': []}
|
||||||
|
|
||||||
|
|
||||||
def load_module(filename):
|
def load_module(filename):
|
||||||
modname = splitext(filename)[0]
|
modname = splitext(filename)[0]
|
||||||
if modname in sys.modules:
|
if modname in sys.modules:
|
||||||
|
@ -50,7 +52,7 @@ if not 'engines' in settings or not settings['engines']:
|
||||||
|
|
||||||
for engine_data in settings['engines']:
|
for engine_data in settings['engines']:
|
||||||
engine_name = engine_data['engine']
|
engine_name = engine_data['engine']
|
||||||
engine = load_module(engine_name+'.py')
|
engine = load_module(engine_name + '.py')
|
||||||
for param_name in engine_data:
|
for param_name in engine_data:
|
||||||
if param_name == 'engine':
|
if param_name == 'engine':
|
||||||
continue
|
continue
|
||||||
|
@ -58,38 +60,50 @@ for engine_data in settings['engines']:
|
||||||
if engine_data['categories'] == 'none':
|
if engine_data['categories'] == 'none':
|
||||||
engine.categories = []
|
engine.categories = []
|
||||||
else:
|
else:
|
||||||
engine.categories = map(str.strip, engine_data['categories'].split(','))
|
engine.categories = map(
|
||||||
|
str.strip, engine_data['categories'].split(','))
|
||||||
continue
|
continue
|
||||||
setattr(engine, param_name, engine_data[param_name])
|
setattr(engine, param_name, engine_data[param_name])
|
||||||
for engine_attr in dir(engine):
|
for engine_attr in dir(engine):
|
||||||
if engine_attr.startswith('_'):
|
if engine_attr.startswith('_'):
|
||||||
continue
|
continue
|
||||||
if getattr(engine, engine_attr) == None:
|
if getattr(engine, engine_attr) is None:
|
||||||
print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)
|
print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
engines[engine.name] = engine
|
engines[engine.name] = engine
|
||||||
engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0}
|
engine.stats = {
|
||||||
|
'result_count': 0,
|
||||||
|
'search_count': 0,
|
||||||
|
'page_load_time': 0,
|
||||||
|
'score_count': 0,
|
||||||
|
'errors': 0
|
||||||
|
}
|
||||||
if hasattr(engine, 'categories'):
|
if hasattr(engine, 'categories'):
|
||||||
for category_name in engine.categories:
|
for category_name in engine.categories:
|
||||||
categories.setdefault(category_name, []).append(engine)
|
categories.setdefault(category_name, []).append(engine)
|
||||||
else:
|
else:
|
||||||
categories['general'].append(engine)
|
categories['general'].append(engine)
|
||||||
|
|
||||||
|
|
||||||
def default_request_params():
|
def default_request_params():
|
||||||
return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
|
return {
|
||||||
|
'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
|
||||||
|
|
||||||
|
|
||||||
def make_callback(engine_name, results, suggestions, callback, params):
|
def make_callback(engine_name, results, suggestions, callback, params):
|
||||||
# creating a callback wrapper for the search engine results
|
# creating a callback wrapper for the search engine results
|
||||||
def process_callback(response, **kwargs):
|
def process_callback(response, **kwargs):
|
||||||
cb_res = []
|
cb_res = []
|
||||||
response.search_params = params
|
response.search_params = params
|
||||||
engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds()
|
engines[engine_name].stats['page_load_time'] += \
|
||||||
|
(datetime.now() - params['started']).total_seconds()
|
||||||
try:
|
try:
|
||||||
search_results = callback(response)
|
search_results = callback(response)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
engines[engine_name].stats['errors'] += 1
|
engines[engine_name].stats['errors'] += 1
|
||||||
results[engine_name] = cb_res
|
results[engine_name] = cb_res
|
||||||
print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e))
|
print '[E] Error with engine "{0}":\n\t{1}'.format(
|
||||||
|
engine_name, str(e))
|
||||||
return
|
return
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
result['engine'] = engine_name
|
result['engine'] = engine_name
|
||||||
|
@ -101,23 +115,25 @@ def make_callback(engine_name, results, suggestions, callback, params):
|
||||||
results[engine_name] = cb_res
|
results[engine_name] = cb_res
|
||||||
return process_callback
|
return process_callback
|
||||||
|
|
||||||
|
|
||||||
def score_results(results):
|
def score_results(results):
|
||||||
flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
|
flat_res = filter(
|
||||||
|
None, chain.from_iterable(izip_longest(*results.values())))
|
||||||
flat_len = len(flat_res)
|
flat_len = len(flat_res)
|
||||||
engines_len = len(results)
|
engines_len = len(results)
|
||||||
results = []
|
results = []
|
||||||
# deduplication + scoring
|
# deduplication + scoring
|
||||||
for i,res in enumerate(flat_res):
|
for i, res in enumerate(flat_res):
|
||||||
res['parsed_url'] = urlparse(res['url'])
|
res['parsed_url'] = urlparse(res['url'])
|
||||||
res['engines'] = [res['engine']]
|
res['engines'] = [res['engine']]
|
||||||
weight = 1.0
|
weight = 1.0
|
||||||
if hasattr(engines[res['engine']], 'weight'):
|
if hasattr(engines[res['engine']], 'weight'):
|
||||||
weight = float(engines[res['engine']].weight)
|
weight = float(engines[res['engine']].weight)
|
||||||
score = int((flat_len - i)/engines_len)*weight+1
|
score = int((flat_len - i) / engines_len) * weight + 1
|
||||||
duplicated = False
|
duplicated = False
|
||||||
for new_res in results:
|
for new_res in results:
|
||||||
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path
|
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
||||||
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path
|
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
||||||
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
|
if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
|
||||||
p1 == p2 and\
|
p1 == p2 and\
|
||||||
res['parsed_url'].query == new_res['parsed_url'].query and\
|
res['parsed_url'].query == new_res['parsed_url'].query and\
|
||||||
|
@ -125,7 +141,7 @@ def score_results(results):
|
||||||
duplicated = new_res
|
duplicated = new_res
|
||||||
break
|
break
|
||||||
if duplicated:
|
if duplicated:
|
||||||
if len(res.get('content', '')) > len(duplicated.get('content', '')):
|
if len(res.get('content', '')) > len(duplicated.get('content', '')): # noqa
|
||||||
duplicated['content'] = res['content']
|
duplicated['content'] = res['content']
|
||||||
duplicated['score'] += score
|
duplicated['score'] += score
|
||||||
duplicated['engines'].append(res['engine'])
|
duplicated['engines'].append(res['engine'])
|
||||||
|
@ -139,6 +155,7 @@ def score_results(results):
|
||||||
results.append(res)
|
results.append(res)
|
||||||
return sorted(results, key=itemgetter('score'), reverse=True)
|
return sorted(results, key=itemgetter('score'), reverse=True)
|
||||||
|
|
||||||
|
|
||||||
def search(query, request, selected_engines):
|
def search(query, request, selected_engines):
|
||||||
global engines, categories, number_of_searches
|
global engines, categories, number_of_searches
|
||||||
requests = []
|
requests = []
|
||||||
|
@ -160,13 +177,20 @@ def search(query, request, selected_engines):
|
||||||
request_params['started'] = datetime.now()
|
request_params['started'] = datetime.now()
|
||||||
request_params = engine.request(query, request_params)
|
request_params = engine.request(query, request_params)
|
||||||
|
|
||||||
callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params)
|
callback = make_callback(
|
||||||
|
selected_engine['name'],
|
||||||
|
results,
|
||||||
|
suggestions,
|
||||||
|
engine.response,
|
||||||
|
request_params
|
||||||
|
)
|
||||||
|
|
||||||
request_args = dict(headers = request_params['headers']
|
request_args = dict(
|
||||||
,hooks = dict(response=callback)
|
headers=request_params['headers'],
|
||||||
,cookies = request_params['cookies']
|
hooks=dict(response=callback),
|
||||||
,timeout = settings['server']['request_timeout']
|
cookies=request_params['cookies'],
|
||||||
)
|
timeout=settings['server']['request_timeout']
|
||||||
|
)
|
||||||
|
|
||||||
if request_params['method'] == 'GET':
|
if request_params['method'] == 'GET':
|
||||||
req = grequests.get
|
req = grequests.get
|
||||||
|
@ -180,7 +204,7 @@ def search(query, request, selected_engines):
|
||||||
|
|
||||||
requests.append(req(request_params['url'], **request_args))
|
requests.append(req(request_params['url'], **request_args))
|
||||||
grequests.map(requests)
|
grequests.map(requests)
|
||||||
for engine_name,engine_results in results.items():
|
for engine_name, engine_results in results.items():
|
||||||
engines[engine_name].stats['search_count'] += 1
|
engines[engine_name].stats['search_count'] += 1
|
||||||
engines[engine_name].stats['result_count'] += len(engine_results)
|
engines[engine_name].stats['result_count'] += len(engine_results)
|
||||||
|
|
||||||
|
@ -192,6 +216,7 @@ def search(query, request, selected_engines):
|
||||||
|
|
||||||
return results, suggestions
|
return results, suggestions
|
||||||
|
|
||||||
|
|
||||||
def get_engines_stats():
|
def get_engines_stats():
|
||||||
# TODO refactor
|
# TODO refactor
|
||||||
pageloads = []
|
pageloads = []
|
||||||
|
@ -200,14 +225,15 @@ def get_engines_stats():
|
||||||
errors = []
|
errors = []
|
||||||
scores_per_result = []
|
scores_per_result = []
|
||||||
|
|
||||||
max_pageload = max_results = max_score = max_errors = max_score_per_result = 0
|
max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
|
||||||
for engine in engines.values():
|
for engine in engines.values():
|
||||||
if engine.stats['search_count'] == 0:
|
if engine.stats['search_count'] == 0:
|
||||||
continue
|
continue
|
||||||
results_num = engine.stats['result_count']/float(engine.stats['search_count'])
|
results_num = \
|
||||||
load_times = engine.stats['page_load_time']/float(engine.stats['search_count'])
|
engine.stats['result_count'] / float(engine.stats['search_count'])
|
||||||
|
load_times = engine.stats['page_load_time'] / float(engine.stats['search_count']) # noqa
|
||||||
if results_num:
|
if results_num:
|
||||||
score = engine.stats['score_count'] / float(engine.stats['search_count'])
|
score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
|
||||||
score_per_result = score / results_num
|
score_per_result = score / results_num
|
||||||
else:
|
else:
|
||||||
score = score_per_result = 0.0
|
score = score_per_result = 0.0
|
||||||
|
@ -220,30 +246,48 @@ def get_engines_stats():
|
||||||
results.append({'avg': results_num, 'name': engine.name})
|
results.append({'avg': results_num, 'name': engine.name})
|
||||||
scores.append({'avg': score, 'name': engine.name})
|
scores.append({'avg': score, 'name': engine.name})
|
||||||
errors.append({'avg': engine.stats['errors'], 'name': engine.name})
|
errors.append({'avg': engine.stats['errors'], 'name': engine.name})
|
||||||
scores_per_result.append({'avg': score_per_result, 'name': engine.name})
|
scores_per_result.append({
|
||||||
|
'avg': score_per_result,
|
||||||
|
'name': engine.name
|
||||||
|
})
|
||||||
|
|
||||||
for engine in pageloads:
|
for engine in pageloads:
|
||||||
engine['percentage'] = int(engine['avg']/max_pageload*100)
|
engine['percentage'] = int(engine['avg'] / max_pageload * 100)
|
||||||
|
|
||||||
for engine in results:
|
for engine in results:
|
||||||
engine['percentage'] = int(engine['avg']/max_results*100)
|
engine['percentage'] = int(engine['avg'] / max_results * 100)
|
||||||
|
|
||||||
for engine in scores:
|
for engine in scores:
|
||||||
engine['percentage'] = int(engine['avg']/max_score*100)
|
engine['percentage'] = int(engine['avg'] / max_score * 100)
|
||||||
|
|
||||||
for engine in scores_per_result:
|
for engine in scores_per_result:
|
||||||
engine['percentage'] = int(engine['avg']/max_score_per_result*100)
|
engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
|
||||||
|
|
||||||
for engine in errors:
|
for engine in errors:
|
||||||
if max_errors:
|
if max_errors:
|
||||||
engine['percentage'] = int(float(engine['avg'])/max_errors*100)
|
engine['percentage'] = int(float(engine['avg']) / max_errors * 100)
|
||||||
else:
|
else:
|
||||||
engine['percentage'] = 0
|
engine['percentage'] = 0
|
||||||
|
|
||||||
|
return [
|
||||||
return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg')))
|
(
|
||||||
,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True))
|
gettext('Page loads (sec)'),
|
||||||
,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True))
|
sorted(pageloads, key=itemgetter('avg'))
|
||||||
,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True))
|
),
|
||||||
,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True))
|
(
|
||||||
]
|
gettext('Number of results'),
|
||||||
|
sorted(results, key=itemgetter('avg'), reverse=True)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
gettext('Scores'),
|
||||||
|
sorted(scores, key=itemgetter('avg'), reverse=True)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
gettext('Scores per result'),
|
||||||
|
sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
|
||||||
|
),
|
||||||
|
(
|
||||||
|
gettext('Errors'),
|
||||||
|
sorted(errors, key=itemgetter('avg'), reverse=True)
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
|
@ -4,11 +4,12 @@ from cgi import escape
|
||||||
|
|
||||||
base_url = 'http://www.bing.com/'
|
base_url = 'http://www.bing.com/'
|
||||||
search_string = 'search?{query}'
|
search_string = 'search?{query}'
|
||||||
locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
|
locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale}))
|
search_path = search_string.format(
|
||||||
|
query=urlencode({'q': query, 'setmkt': locale}))
|
||||||
#if params['category'] == 'images':
|
#if params['category'] == 'images':
|
||||||
# params['url'] = base_url + 'images/' + search_path
|
# params['url'] = base_url + 'images/' + search_path
|
||||||
params['url'] = base_url + search_path
|
params['url'] = base_url + search_path
|
||||||
|
|
|
@ -5,7 +5,8 @@ categories = []
|
||||||
url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
|
url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
|
||||||
weight = 100
|
weight = 100
|
||||||
|
|
||||||
parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)
|
parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) # noqa
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
m = parser_re.match(query)
|
m = parser_re.match(query)
|
||||||
|
@ -19,7 +20,7 @@ def request(query, params):
|
||||||
# wrong params
|
# wrong params
|
||||||
return params
|
return params
|
||||||
|
|
||||||
q = (from_currency+to_currency).upper()
|
q = (from_currency + to_currency).upper()
|
||||||
|
|
||||||
params['url'] = url.format(query=q)
|
params['url'] = url.format(query=q)
|
||||||
params['ammount'] = ammount
|
params['ammount'] = ammount
|
||||||
|
@ -33,25 +34,29 @@ def response(resp):
|
||||||
global base_url
|
global base_url
|
||||||
results = []
|
results = []
|
||||||
try:
|
try:
|
||||||
_,conversion_rate,_ = resp.text.split(',', 2)
|
_, conversion_rate, _ = resp.text.split(',', 2)
|
||||||
conversion_rate = float(conversion_rate)
|
conversion_rate = float(conversion_rate)
|
||||||
except:
|
except:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount']
|
title = '{0} {1} in {2} is {3}'.format(
|
||||||
,resp.search_params['from']
|
resp.search_params['ammount'],
|
||||||
,resp.search_params['to']
|
resp.search_params['from'],
|
||||||
,resp.search_params['ammount']*conversion_rate
|
resp.search_params['to'],
|
||||||
)
|
resp.search_params['ammount'] * conversion_rate
|
||||||
|
)
|
||||||
|
|
||||||
content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to'])
|
content = '1 {0} is {1} {2}'.format(resp.search_params['from'],
|
||||||
|
conversion_rate,
|
||||||
|
resp.search_params['to'])
|
||||||
now_date = datetime.now().strftime('%Y%m%d')
|
now_date = datetime.now().strftime('%Y%m%d')
|
||||||
url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'
|
url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' # noqa
|
||||||
url = url.format(now_date
|
url = url.format(
|
||||||
,resp.search_params['ammount']
|
now_date,
|
||||||
,resp.search_params['from'].lower()
|
resp.search_params['ammount'],
|
||||||
,resp.search_params['to'].lower()
|
resp.search_params['from'].lower(),
|
||||||
)
|
resp.search_params['to'].lower()
|
||||||
|
)
|
||||||
results.append({'title': title, 'content': content, 'url': url})
|
results.append({'title': title, 'content': content, 'url': url})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -1,17 +1,21 @@
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from json import loads
|
from json import loads
|
||||||
from cgi import escape
|
|
||||||
|
|
||||||
categories = ['videos']
|
categories = ['videos']
|
||||||
locale = 'en_US'
|
locale = 'en_US'
|
||||||
|
|
||||||
# see http://www.dailymotion.com/doc/api/obj-video.html
|
# see http://www.dailymotion.com/doc/api/obj-video.html
|
||||||
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
|
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' # noqa
|
||||||
|
|
||||||
|
# TODO use video result template
|
||||||
|
content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
global search_url
|
global search_url
|
||||||
params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
|
params['url'] = search_url.format(
|
||||||
|
query=urlencode({'search': query, 'localization': locale}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,7 +28,7 @@ def response(resp):
|
||||||
title = res['title']
|
title = res['title']
|
||||||
url = res['url']
|
url = res['url']
|
||||||
if res['thumbnail_360_url']:
|
if res['thumbnail_360_url']:
|
||||||
content = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'.format(url, res['thumbnail_360_url'])
|
content = content_tpl.format(url, res['thumbnail_360_url'])
|
||||||
else:
|
else:
|
||||||
content = ''
|
content = ''
|
||||||
if res['description']:
|
if res['description']:
|
||||||
|
@ -33,6 +37,7 @@ def response(resp):
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def text_content_from_html(html_string):
|
def text_content_from_html(html_string):
|
||||||
desc_html = html.fragment_fromstring(html_string, create_parent=True)
|
desc_html = html.fragment_fromstring(html_string, create_parent=True)
|
||||||
return desc_html.text_content()
|
return desc_html.text_content()
|
||||||
|
|
|
@ -7,6 +7,7 @@ categories = ['images']
|
||||||
base_url = 'https://www.deviantart.com/'
|
base_url = 'https://www.deviantart.com/'
|
||||||
search_url = base_url+'search?'
|
search_url = base_url+'search?'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
global search_url
|
global search_url
|
||||||
params['url'] = search_url + urlencode({'q': query})
|
params['url'] = search_url + urlencode({'q': query})
|
||||||
|
@ -22,8 +23,11 @@ def response(resp):
|
||||||
for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
|
for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
|
||||||
link = result.xpath('.//a[contains(@class, "thumb")]')[0]
|
link = result.xpath('.//a[contains(@class, "thumb")]')[0]
|
||||||
url = urljoin(base_url, link.attrib.get('href'))
|
url = urljoin(base_url, link.attrib.get('href'))
|
||||||
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
|
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa
|
||||||
title = ''.join(title_links[0].xpath('.//text()'))
|
title = ''.join(title_links[0].xpath('.//text()'))
|
||||||
img_src = link.xpath('.//img')[0].attrib['src']
|
img_src = link.xpath('.//img')[0].attrib['src']
|
||||||
results.append({'url': url, 'title': title, 'img_src': img_src, 'template': 'images.html'})
|
results.append({'url': url,
|
||||||
|
'title': title,
|
||||||
|
'img_src': img_src,
|
||||||
|
'template': 'images.html'})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -6,8 +6,11 @@ url = 'https://duckduckgo.com/'
|
||||||
search_url = url + 'd.js?{query}&p=1&s=0'
|
search_url = url + 'd.js?{query}&p=1&s=0'
|
||||||
locale = 'us-en'
|
locale = 'us-en'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query, 'l': locale}))
|
q = urlencode({'q': query,
|
||||||
|
'l': locale})
|
||||||
|
params['url'] = search_url.format(query=q)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,8 +20,7 @@ def response(resp):
|
||||||
for r in search_res:
|
for r in search_res:
|
||||||
if not r.get('t'):
|
if not r.get('t'):
|
||||||
continue
|
continue
|
||||||
results.append({'title': r['t']
|
results.append({'title': r['t'],
|
||||||
,'content': html_to_text(r['a'])
|
'content': html_to_text(r['a']),
|
||||||
,'url': r['u']
|
'url': r['u']})
|
||||||
})
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -3,8 +3,9 @@ from urllib import urlencode
|
||||||
|
|
||||||
url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
|
url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = url.format(query=urlencode({'q': query}))
|
params['url'] = url.format(query=urlencode({'q': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,11 +14,10 @@ def response(resp):
|
||||||
results = []
|
results = []
|
||||||
if 'Definition' in search_res:
|
if 'Definition' in search_res:
|
||||||
if search_res.get('AbstractURL'):
|
if search_res.get('AbstractURL'):
|
||||||
res = {'title' : search_res.get('Heading', '')
|
res = {'title': search_res.get('Heading', ''),
|
||||||
,'content' : search_res.get('Definition', '')
|
'content': search_res.get('Definition', ''),
|
||||||
,'url' : search_res.get('AbstractURL', '')
|
'url': search_res.get('AbstractURL', ''),
|
||||||
,'class' : 'definition_result'
|
'class': 'definition_result'}
|
||||||
}
|
|
||||||
results.append(res)
|
results.append(res)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -2,7 +2,8 @@ from urllib import urlencode
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
|
|
||||||
url = 'http://www.filecrop.com/'
|
url = 'http://www.filecrop.com/'
|
||||||
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1'
|
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' # noqa
|
||||||
|
|
||||||
|
|
||||||
class FilecropResultParser(HTMLParser):
|
class FilecropResultParser(HTMLParser):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -18,22 +19,28 @@ class FilecropResultParser(HTMLParser):
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
|
|
||||||
if tag == 'tr':
|
if tag == 'tr':
|
||||||
if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs:
|
if ('bgcolor', '#edeff5') in attrs or\
|
||||||
|
('bgcolor', '#ffffff') in attrs:
|
||||||
self.__start_processing = True
|
self.__start_processing = True
|
||||||
|
|
||||||
if not self.__start_processing:
|
if not self.__start_processing:
|
||||||
return
|
return
|
||||||
|
|
||||||
if tag == 'label':
|
if tag == 'label':
|
||||||
self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
|
self.result['title'] = [attr[1] for attr in attrs
|
||||||
elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs:
|
if attr[0] == 'title'][0]
|
||||||
|
elif tag == 'a' and ('rel', 'nofollow') in attrs\
|
||||||
|
and ('class', 'sourcelink') in attrs:
|
||||||
if 'content' in self.result:
|
if 'content' in self.result:
|
||||||
self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0]
|
self.result['content'] += [attr[1] for attr in attrs
|
||||||
|
if attr[0] == 'title'][0]
|
||||||
else:
|
else:
|
||||||
self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
|
self.result['content'] = [attr[1] for attr in attrs
|
||||||
|
if attr[0] == 'title'][0]
|
||||||
self.result['content'] += ' '
|
self.result['content'] += ' '
|
||||||
elif tag == 'a':
|
elif tag == 'a':
|
||||||
self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0]
|
self.result['url'] = url + [attr[1] for attr in attrs
|
||||||
|
if attr[0] == 'href'][0]
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
if self.__start_processing is False:
|
if self.__start_processing is False:
|
||||||
|
@ -60,10 +67,12 @@ class FilecropResultParser(HTMLParser):
|
||||||
|
|
||||||
self.data_counter += 1
|
self.data_counter += 1
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'w' :query}))
|
params['url'] = search_url.format(query=urlencode({'w': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
parser = FilecropResultParser()
|
parser = FilecropResultParser()
|
||||||
parser.feed(resp.text)
|
parser.feed(resp.text)
|
||||||
|
|
|
@ -8,21 +8,27 @@ categories = ['images']
|
||||||
|
|
||||||
url = 'https://secure.flickr.com/'
|
url = 'https://secure.flickr.com/'
|
||||||
search_url = url+'search/?{query}'
|
search_url = url+'search/?{query}'
|
||||||
|
results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
params['url'] = search_url.format(query=urlencode({'q': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
global base_url
|
global base_url
|
||||||
results = []
|
results = []
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'):
|
for result in dom.xpath(results_xpath):
|
||||||
href = urljoin(url, result.attrib.get('href'))
|
href = urljoin(url, result.attrib.get('href'))
|
||||||
img = result.xpath('.//img')[0]
|
img = result.xpath('.//img')[0]
|
||||||
title = img.attrib.get('alt', '')
|
title = img.attrib.get('alt', '')
|
||||||
img_src = img.attrib.get('data-defer-src')
|
img_src = img.attrib.get('data-defer-src')
|
||||||
if not img_src:
|
if not img_src:
|
||||||
continue
|
continue
|
||||||
results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'})
|
results.append({'url': href,
|
||||||
|
'title': title,
|
||||||
|
'img_src': img_src,
|
||||||
|
'template': 'images.html'})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -4,12 +4,15 @@ from cgi import escape
|
||||||
|
|
||||||
categories = ['it']
|
categories = ['it']
|
||||||
|
|
||||||
search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}'
|
search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' # noqa
|
||||||
|
|
||||||
|
accept_header = 'application/vnd.github.preview.text-match+json'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
global search_url
|
global search_url
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
params['url'] = search_url.format(query=urlencode({'q': query}))
|
||||||
params['headers']['Accept'] = 'application/vnd.github.preview.text-match+json'
|
params['headers']['Accept'] = accept_header
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,12 +6,14 @@ from json import loads
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
|
||||||
url = 'https://ajax.googleapis.com/'
|
url = 'https://ajax.googleapis.com/'
|
||||||
search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}'
|
search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' # noqa
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
params['url'] = search_url.format(query=urlencode({'q': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
search_res = loads(resp.text)
|
search_res = loads(resp.text)
|
||||||
|
@ -24,5 +26,9 @@ def response(resp):
|
||||||
title = result['title']
|
title = result['title']
|
||||||
if not result['url']:
|
if not result['url']:
|
||||||
continue
|
continue
|
||||||
results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'})
|
results.append({'url': href,
|
||||||
|
'title': title,
|
||||||
|
'content': '',
|
||||||
|
'img_src': result['url'],
|
||||||
|
'template': 'images.html'})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -2,12 +2,13 @@ from urllib import urlencode
|
||||||
from json import loads
|
from json import loads
|
||||||
from collections import Iterable
|
from collections import Iterable
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
url_query = None
|
url_query = None
|
||||||
content_query = None
|
content_query = None
|
||||||
title_query = None
|
title_query = None
|
||||||
#suggestion_xpath = ''
|
#suggestion_xpath = ''
|
||||||
|
|
||||||
|
|
||||||
def iterate(iterable):
|
def iterate(iterable):
|
||||||
if type(iterable) == dict:
|
if type(iterable) == dict:
|
||||||
it = iterable.iteritems()
|
it = iterable.iteritems()
|
||||||
|
@ -17,11 +18,15 @@ def iterate(iterable):
|
||||||
for index, value in it:
|
for index, value in it:
|
||||||
yield str(index), value
|
yield str(index), value
|
||||||
|
|
||||||
|
|
||||||
def is_iterable(obj):
|
def is_iterable(obj):
|
||||||
if type(obj) == str: return False
|
if type(obj) == str:
|
||||||
if type(obj) == unicode: return False
|
return False
|
||||||
|
if type(obj) == unicode:
|
||||||
|
return False
|
||||||
return isinstance(obj, Iterable)
|
return isinstance(obj, Iterable)
|
||||||
|
|
||||||
|
|
||||||
def parse(query):
|
def parse(query):
|
||||||
q = []
|
q = []
|
||||||
for part in query.split('/'):
|
for part in query.split('/'):
|
||||||
|
@ -31,6 +36,7 @@ def parse(query):
|
||||||
q.append(part)
|
q.append(part)
|
||||||
return q
|
return q
|
||||||
|
|
||||||
|
|
||||||
def do_query(data, q):
|
def do_query(data, q):
|
||||||
ret = []
|
ret = []
|
||||||
if not len(q):
|
if not len(q):
|
||||||
|
@ -38,7 +44,7 @@ def do_query(data, q):
|
||||||
|
|
||||||
qkey = q[0]
|
qkey = q[0]
|
||||||
|
|
||||||
for key,value in iterate(data):
|
for key, value in iterate(data):
|
||||||
|
|
||||||
if len(q) == 1:
|
if len(q) == 1:
|
||||||
if key == qkey:
|
if key == qkey:
|
||||||
|
@ -54,11 +60,13 @@ def do_query(data, q):
|
||||||
ret.extend(do_query(value, q))
|
ret.extend(do_query(value, q))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def query(data, query_string):
|
def query(data, query_string):
|
||||||
q = parse(query_string)
|
q = parse(query_string)
|
||||||
|
|
||||||
return do_query(data, q)
|
return do_query(data, q)
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
query = urlencode({'q': query})[2:]
|
query = urlencode({'q': query})[2:]
|
||||||
params['url'] = search_url.format(query=query)
|
params['url'] = search_url.format(query=query)
|
||||||
|
|
|
@ -3,10 +3,12 @@ from urllib import urlencode, quote
|
||||||
|
|
||||||
url = 'https://en.wikipedia.org/'
|
url = 'https://en.wikipedia.org/'
|
||||||
|
|
||||||
|
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json' # noqa
|
||||||
|
|
||||||
number_of_results = 10
|
number_of_results = 10
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json'
|
|
||||||
params['url'] = search_url.format(query=urlencode({'srsearch': query}))
|
params['url'] = search_url.format(query=urlencode({'srsearch': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -14,7 +16,5 @@ def request(query, params):
|
||||||
def response(resp):
|
def response(resp):
|
||||||
search_results = loads(resp.text)
|
search_results = loads(resp.text)
|
||||||
res = search_results.get('query', {}).get('search', [])
|
res = search_results.get('query', {}).get('search', [])
|
||||||
|
return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa
|
||||||
return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')),
|
|
||||||
'title': result['title']} for result in res[:int(number_of_results)]]
|
'title': result['title']} for result in res[:int(number_of_results)]]
|
||||||
|
|
||||||
|
|
|
@ -7,13 +7,18 @@ categories = ['videos', 'music']
|
||||||
|
|
||||||
url = 'https://thepiratebay.se/'
|
url = 'https://thepiratebay.se/'
|
||||||
search_url = url + 'search/{search_term}/0/99/{search_type}'
|
search_url = url + 'search/{search_term}/0/99/{search_type}'
|
||||||
search_types = {'videos': '200'
|
search_types = {'videos': '200',
|
||||||
,'music' : '100'
|
'music': '100',
|
||||||
,'files' : '0'
|
'files': '0'}
|
||||||
}
|
|
||||||
|
magnet_xpath = './/a[@title="Download this torrent using magnet"]'
|
||||||
|
content_xpath = './/font[@class="detDesc"]//text()'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category']))
|
search_type = search_types.get(params['category'])
|
||||||
|
params['url'] = search_url.format(search_term=quote(query),
|
||||||
|
search_type=search_type)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,10 +32,14 @@ def response(resp):
|
||||||
link = result.xpath('.//div[@class="detName"]//a')[0]
|
link = result.xpath('.//div[@class="detName"]//a')[0]
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
href = urljoin(url, link.attrib.get('href'))
|
||||||
title = ' '.join(link.xpath('.//text()'))
|
title = ' '.join(link.xpath('.//text()'))
|
||||||
content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()')))
|
content = escape(' '.join(result.xpath(content_xpath)))
|
||||||
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
|
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
|
||||||
magnetlink = result.xpath('.//a[@title="Download this torrent using magnet"]')[0]
|
magnetlink = result.xpath(magnet_xpath)[0]
|
||||||
results.append({'url': href, 'title': title, 'content': content,
|
results.append({'url': href,
|
||||||
'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib['href'],
|
'title': title,
|
||||||
|
'content': content,
|
||||||
|
'seed': seed,
|
||||||
|
'leech': leech,
|
||||||
|
'magnetlink': magnetlink.attrib['href'],
|
||||||
'template': 'torrent.html'})
|
'template': 'torrent.html'})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -5,7 +5,8 @@ categories = ['music']
|
||||||
|
|
||||||
guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||||
url = 'https://api.soundcloud.com/'
|
url = 'https://api.soundcloud.com/'
|
||||||
search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id
|
search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id # noqa
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
global search_url
|
global search_url
|
||||||
|
@ -21,5 +22,7 @@ def response(resp):
|
||||||
if result['kind'] in ('track', 'playlist'):
|
if result['kind'] in ('track', 'playlist'):
|
||||||
title = result['title']
|
title = result['title']
|
||||||
content = result['description']
|
content = result['description']
|
||||||
results.append({'url': result['permalink_url'], 'title': title, 'content': content})
|
results.append({'url': result['permalink_url'],
|
||||||
|
'title': title,
|
||||||
|
'content': content})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -7,6 +7,8 @@ categories = ['it']
|
||||||
|
|
||||||
url = 'http://stackoverflow.com/'
|
url = 'http://stackoverflow.com/'
|
||||||
search_url = url+'search?'
|
search_url = url+'search?'
|
||||||
|
result_xpath = './/div[@class="excerpt"]//text()'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url + urlencode({'q': query})
|
params['url'] = search_url + urlencode({'q': query})
|
||||||
|
@ -20,6 +22,6 @@ def response(resp):
|
||||||
link = result.xpath('.//div[@class="result-link"]//a')[0]
|
link = result.xpath('.//div[@class="result-link"]//a')[0]
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
href = urljoin(url, link.attrib.get('href'))
|
||||||
title = escape(' '.join(link.xpath('.//text()')))
|
title = escape(' '.join(link.xpath('.//text()')))
|
||||||
content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()')))
|
content = escape(' '.join(result.xpath(result_xpath)))
|
||||||
results.append({'url': href, 'title': title, 'content': content})
|
results.append({'url': href, 'title': title, 'content': content})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from urlparse import urlparse
|
|
||||||
from cgi import escape
|
|
||||||
|
|
||||||
base_url = 'https://startpage.com/'
|
base_url = 'https://startpage.com/'
|
||||||
search_url = base_url+'do/search'
|
search_url = base_url+'do/search'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
global search_url
|
global search_url
|
||||||
query = urlencode({'q': query})[2:]
|
query = urlencode({'q': query})[2:]
|
||||||
|
@ -20,11 +19,10 @@ def response(resp):
|
||||||
results = []
|
results = []
|
||||||
dom = html.fromstring(resp.content)
|
dom = html.fromstring(resp.content)
|
||||||
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
|
||||||
# not ads : div[@class="result"] are the direct childs of div[@id="results"]
|
# not ads: div[@class="result"] are the direct childs of div[@id="results"]
|
||||||
for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
|
for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
|
||||||
link = result.xpath('.//h3/a')[0]
|
link = result.xpath('.//h3/a')[0]
|
||||||
url = link.attrib.get('href')
|
url = link.attrib.get('href')
|
||||||
parsed_url = urlparse(url)
|
|
||||||
title = link.text_content()
|
title = link.text_content()
|
||||||
content = result.xpath('./p[@class="desc"]')[0].text_content()
|
content = result.xpath('./p[@class="desc"]')[0].text_content()
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
|
|
|
@ -7,6 +7,9 @@ categories = ['social media']
|
||||||
|
|
||||||
base_url = 'https://twitter.com/'
|
base_url = 'https://twitter.com/'
|
||||||
search_url = base_url+'search?'
|
search_url = base_url+'search?'
|
||||||
|
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
|
||||||
|
content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
global search_url
|
global search_url
|
||||||
|
@ -21,7 +24,9 @@ def response(resp):
|
||||||
for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
|
for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
|
||||||
link = tweet.xpath('.//small[@class="time"]//a')[0]
|
link = tweet.xpath('.//small[@class="time"]//a')[0]
|
||||||
url = urljoin(base_url, link.attrib.get('href'))
|
url = urljoin(base_url, link.attrib.get('href'))
|
||||||
title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()'))
|
title = ''.join(tweet.xpath(title_xpath))
|
||||||
content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()')))
|
content = escape(''.join(tweet.xpath(content_xpath)))
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
results.append({'url': url,
|
||||||
|
'title': title,
|
||||||
|
'content': content})
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -5,27 +5,31 @@ from lxml import html
|
||||||
|
|
||||||
base_url = 'http://vimeo.com'
|
base_url = 'http://vimeo.com'
|
||||||
search_url = base_url + '/search?{query}'
|
search_url = base_url + '/search?{query}'
|
||||||
url_xpath = None
|
url_xpath = None
|
||||||
content_xpath = None
|
content_xpath = None
|
||||||
title_xpath = None
|
title_xpath = None
|
||||||
results_xpath = ''
|
results_xpath = ''
|
||||||
|
content_tpl = '<a href="{0}"> <img src="{2}"/> </a>'
|
||||||
|
|
||||||
# the cookie set by vimeo contains all the following values, but only __utma seems to be requiered
|
# the cookie set by vimeo contains all the following values,
|
||||||
|
# but only __utma seems to be requiered
|
||||||
cookie = {
|
cookie = {
|
||||||
#'vuid':'918282893.1027205400'
|
#'vuid':'918282893.1027205400'
|
||||||
# 'ab_bs':'%7B%223%22%3A279%7D'
|
# 'ab_bs':'%7B%223%22%3A279%7D'
|
||||||
'__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0'
|
'__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0'
|
||||||
# '__utmb':'18302654.1.10.1388942090'
|
# '__utmb':'18302654.1.10.1388942090'
|
||||||
#, '__utmc':'18302654'
|
#, '__utmc':'18302654'
|
||||||
#, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
|
#, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' # noqa
|
||||||
#, '__utml':'search'
|
#, '__utml':'search'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q' :query}))
|
params['url'] = search_url.format(query=urlencode({'q': query}))
|
||||||
params['cookies'] = cookie
|
params['cookies'] = cookie
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
results = []
|
results = []
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
@ -36,10 +40,9 @@ def response(resp):
|
||||||
url = base_url + result.xpath(url_xpath)[0]
|
url = base_url + result.xpath(url_xpath)[0]
|
||||||
title = p.unescape(extract_text(result.xpath(title_xpath)))
|
title = p.unescape(extract_text(result.xpath(title_xpath)))
|
||||||
thumbnail = extract_text(result.xpath(content_xpath)[0])
|
thumbnail = extract_text(result.xpath(content_xpath)[0])
|
||||||
content = '<a href="{0}"> <img src="{2}"/> </a>'.format(url, title, thumbnail)
|
results.append({'url': url,
|
||||||
results.append({'url': url
|
'title': title,
|
||||||
, 'title': title
|
'content': content_tpl.format(url, title, thumbnail),
|
||||||
, 'content': content
|
'template': 'videos.html',
|
||||||
, 'template':'videos.html'
|
'thumbnail': thumbnail})
|
||||||
, 'thumbnail': thumbnail})
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -1,21 +1,25 @@
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from urllib import urlencode, unquote
|
from urllib import urlencode, unquote
|
||||||
from urlparse import urlparse, urljoin
|
from urlparse import urlparse, urljoin
|
||||||
from cgi import escape
|
|
||||||
from lxml.etree import _ElementStringResult
|
from lxml.etree import _ElementStringResult
|
||||||
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
search_url = None
|
search_url = None
|
||||||
url_xpath = None
|
url_xpath = None
|
||||||
content_xpath = None
|
content_xpath = None
|
||||||
title_xpath = None
|
title_xpath = None
|
||||||
suggestion_xpath = ''
|
suggestion_xpath = ''
|
||||||
results_xpath = ''
|
results_xpath = ''
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
if xpath_results is list, extract the text from each result and concat the list
|
if xpath_results is list, extract the text from each result and concat the list
|
||||||
if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml )
|
if xpath_results is a xml element, extract all the text node from it
|
||||||
|
( text_content() method from lxml )
|
||||||
if xpath_results is a string element, then it's already done
|
if xpath_results is a string element, then it's already done
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def extract_text(xpath_results):
|
def extract_text(xpath_results):
|
||||||
if type(xpath_results) == list:
|
if type(xpath_results) == list:
|
||||||
# it's list of result : concat everything using recursive call
|
# it's list of result : concat everything using recursive call
|
||||||
|
@ -30,7 +34,7 @@ def extract_text(xpath_results):
|
||||||
return ''.join(xpath_results)
|
return ''.join(xpath_results)
|
||||||
else:
|
else:
|
||||||
# it's a element
|
# it's a element
|
||||||
return xpath_results.text_content()
|
return html_to_text(xpath_results.text_content())
|
||||||
|
|
||||||
|
|
||||||
def extract_url(xpath_results):
|
def extract_url(xpath_results):
|
||||||
|
@ -60,7 +64,8 @@ def normalize_url(url):
|
||||||
url += '/'
|
url += '/'
|
||||||
|
|
||||||
# FIXME : hack for yahoo
|
# FIXME : hack for yahoo
|
||||||
if parsed_url.hostname == 'search.yahoo.com' and parsed_url.path.startswith('/r'):
|
if parsed_url.hostname == 'search.yahoo.com'\
|
||||||
|
and parsed_url.path.startswith('/r'):
|
||||||
p = parsed_url.path
|
p = parsed_url.path
|
||||||
mark = p.find('/**')
|
mark = p.find('/**')
|
||||||
if mark != -1:
|
if mark != -1:
|
||||||
|
@ -82,15 +87,15 @@ def response(resp):
|
||||||
if results_xpath:
|
if results_xpath:
|
||||||
for result in dom.xpath(results_xpath):
|
for result in dom.xpath(results_xpath):
|
||||||
url = extract_url(result.xpath(url_xpath))
|
url = extract_url(result.xpath(url_xpath))
|
||||||
title = extract_text(result.xpath(title_xpath)[0 ])
|
title = extract_text(result.xpath(title_xpath)[0])
|
||||||
content = extract_text(result.xpath(content_xpath)[0])
|
content = extract_text(result.xpath(content_xpath)[0])
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
else:
|
else:
|
||||||
for url, title, content in zip(
|
for url, title, content in zip(
|
||||||
map(extract_url, dom.xpath(url_xpath)), \
|
map(extract_url, dom.xpath(url_xpath)),
|
||||||
map(extract_text, dom.xpath(title_xpath)), \
|
map(extract_text, dom.xpath(title_xpath)),
|
||||||
map(extract_text, dom.xpath(content_xpath)), \
|
map(extract_text, dom.xpath(content_xpath))
|
||||||
):
|
):
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
results.append({'url': url, 'title': title, 'content': content})
|
||||||
|
|
||||||
if not suggestion_xpath:
|
if not suggestion_xpath:
|
||||||
|
|
|
@ -4,10 +4,12 @@ from urllib import urlencode
|
||||||
url = 'http://localhost:8090'
|
url = 'http://localhost:8090'
|
||||||
search_url = '/yacysearch.json?{query}&maximumRecords=10'
|
search_url = '/yacysearch.json?{query}&maximumRecords=10'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = url + search_url.format(query=urlencode({'query':query}))
|
params['url'] = url + search_url.format(query=urlencode({'query': query}))
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp):
|
||||||
raw_search_results = loads(resp.text)
|
raw_search_results = loads(resp.text)
|
||||||
|
|
||||||
|
@ -25,7 +27,7 @@ def response(resp):
|
||||||
tmp_result['content'] = ''
|
tmp_result['content'] = ''
|
||||||
|
|
||||||
if len(result['description']):
|
if len(result['description']):
|
||||||
tmp_result['content'] += result['description'] +"<br/>"
|
tmp_result['content'] += result['description'] + "<br/>"
|
||||||
|
|
||||||
if len(result['pubDate']):
|
if len(result['pubDate']):
|
||||||
tmp_result['content'] += result['pubDate'] + "<br/>"
|
tmp_result['content'] += result['pubDate'] + "<br/>"
|
||||||
|
|
|
@ -5,6 +5,7 @@ categories = ['videos']
|
||||||
|
|
||||||
search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}'
|
search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
params['url'] = search_url.format(query=urlencode({'q': query}))
|
||||||
return params
|
return params
|
||||||
|
@ -30,17 +31,16 @@ def response(resp):
|
||||||
thumbnail = ''
|
thumbnail = ''
|
||||||
if len(result['media$group']['media$thumbnail']):
|
if len(result['media$group']['media$thumbnail']):
|
||||||
thumbnail = result['media$group']['media$thumbnail'][0]['url']
|
thumbnail = result['media$group']['media$thumbnail'][0]['url']
|
||||||
content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail)
|
content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail) # noqa
|
||||||
if len(content):
|
if len(content):
|
||||||
content += '<br />' + result['content']['$t']
|
content += '<br />' + result['content']['$t']
|
||||||
else:
|
else:
|
||||||
content = result['content']['$t']
|
content = result['content']['$t']
|
||||||
|
|
||||||
results.append({'url': url
|
results.append({'url': url,
|
||||||
, 'title': title
|
'title': title,
|
||||||
, 'content': content
|
'content': content,
|
||||||
, 'template':'videos.html'
|
'template': 'videos.html',
|
||||||
, 'thumbnail':thumbnail})
|
'thumbnail': thumbnail})
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
|
@ -105,3 +105,7 @@ engines:
|
||||||
url_xpath : ./a/@href
|
url_xpath : ./a/@href
|
||||||
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
|
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
|
||||||
content_xpath : ./a/img/@src
|
content_xpath : ./a/img/@src
|
||||||
|
|
||||||
|
locales:
|
||||||
|
en : English
|
||||||
|
hu : Magyar
|
|
@ -1,16 +0,0 @@
|
||||||
|
|
||||||
port = 11111
|
|
||||||
|
|
||||||
secret_key = "ultrasecretkey" # change this!
|
|
||||||
|
|
||||||
debug = False
|
|
||||||
|
|
||||||
request_timeout = 5.0 # seconds
|
|
||||||
|
|
||||||
weights = {} # 'search_engine_name': float(weight) | default is 1.0
|
|
||||||
|
|
||||||
blacklist = [] # search engine blacklist
|
|
||||||
|
|
||||||
categories = {} # custom search engine categories
|
|
||||||
|
|
||||||
base_url = None # "https://your.domain.tld/" or None (to use request parameters)
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
server:
|
||||||
|
port : 11111
|
||||||
|
secret_key : "ultrasecretkey" # change this!
|
||||||
|
debug : False
|
||||||
|
request_timeout : 3.0 # seconds
|
||||||
|
base_url: False
|
||||||
|
|
||||||
|
engines:
|
||||||
|
- name : wikipedia
|
||||||
|
engine : mediawiki
|
||||||
|
url : https://en.wikipedia.org/
|
||||||
|
number_of_results : 1
|
||||||
|
|
||||||
|
- name : bing
|
||||||
|
engine : bing
|
||||||
|
locale : en-US
|
||||||
|
|
||||||
|
- name : currency
|
||||||
|
engine : currency_convert
|
||||||
|
categories : general
|
||||||
|
|
||||||
|
- name : deviantart
|
||||||
|
engine : deviantart
|
||||||
|
categories : images
|
||||||
|
|
||||||
|
- name : ddg definitions
|
||||||
|
engine : duckduckgo_definitions
|
||||||
|
|
||||||
|
- name : duckduckgo
|
||||||
|
engine : duckduckgo
|
||||||
|
locale : en-us
|
||||||
|
|
||||||
|
- name : filecrop
|
||||||
|
engine : filecrop
|
||||||
|
categories : files
|
||||||
|
|
||||||
|
- name : flickr
|
||||||
|
engine : flickr
|
||||||
|
categories : images
|
||||||
|
|
||||||
|
- name : github
|
||||||
|
engine : github
|
||||||
|
categories : it
|
||||||
|
|
||||||
|
- name : google
|
||||||
|
engine : json_engine
|
||||||
|
search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
|
||||||
|
categories : general
|
||||||
|
url_query : /responseData/results/unescapedUrl
|
||||||
|
content_query : /responseData/results/content
|
||||||
|
title_query : /responseData/results/titleNoFormatting
|
||||||
|
|
||||||
|
- name : google images
|
||||||
|
engine : google_images
|
||||||
|
categories : images
|
||||||
|
|
||||||
|
- name : piratebay
|
||||||
|
engine : piratebay
|
||||||
|
categories : videos, music, files
|
||||||
|
|
||||||
|
- name : soundcloud
|
||||||
|
engine : soundcloud
|
||||||
|
categories : music
|
||||||
|
|
||||||
|
- name : stackoverflow
|
||||||
|
engine : stackoverflow
|
||||||
|
categories : it
|
||||||
|
|
||||||
|
- name : startpage
|
||||||
|
engine : startpage
|
||||||
|
|
||||||
|
- name : twitter
|
||||||
|
engine : twitter
|
||||||
|
categories : social media
|
||||||
|
|
||||||
|
- name : urbandictionary
|
||||||
|
engine : xpath
|
||||||
|
search_url : http://www.urbandictionary.com/define.php?term={query}
|
||||||
|
url_xpath : //div[@class="word"]//a/@href
|
||||||
|
title_xpath : //div[@class="word"]//a
|
||||||
|
content_xpath : //div[@class="definition"]
|
||||||
|
|
||||||
|
- name : yahoo
|
||||||
|
engine : xpath
|
||||||
|
search_url : http://search.yahoo.com/search?p={query}
|
||||||
|
results_xpath : //div[@class="res"]
|
||||||
|
url_xpath : .//h3/a/@href
|
||||||
|
title_xpath : .//h3/a
|
||||||
|
content_xpath : .//div[@class="abstr"]
|
||||||
|
suggestion_xpath : //div[@id="satat"]//a
|
||||||
|
|
||||||
|
- name : youtube
|
||||||
|
engine : youtube
|
||||||
|
categories : videos
|
||||||
|
|
||||||
|
- name : dailymotion
|
||||||
|
engine : dailymotion
|
||||||
|
locale : en_US
|
||||||
|
categories : videos
|
||||||
|
|
||||||
|
- name : vimeo
|
||||||
|
engine : vimeo
|
||||||
|
categories : videos
|
||||||
|
results_xpath : //div[@id="browse_content"]/ol/li
|
||||||
|
url_xpath : ./a/@href
|
||||||
|
title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
|
||||||
|
content_xpath : ./a/img/@src
|
|
@ -49,6 +49,8 @@ input[type="submit"] { border: 1px solid #666666; color: #444444; padding: 4px;
|
||||||
|
|
||||||
input[type="checkbox"] { visibility: hidden; }
|
input[type="checkbox"] { visibility: hidden; }
|
||||||
|
|
||||||
|
fieldset { margin: 8px; }
|
||||||
|
|
||||||
#categories { margin: 0 10px; }
|
#categories { margin: 0 10px; }
|
||||||
|
|
||||||
.checkbox_container { display: inline-block; position: relative; margin: 0 3px; padding: 0px; }
|
.checkbox_container { display: inline-block; position: relative; margin: 0 3px; padding: 0px; }
|
||||||
|
@ -79,7 +81,6 @@ a { text-decoration: none; color: #1a11be; }
|
||||||
a:visited { color: #7b11be; }
|
a:visited { color: #7b11be; }
|
||||||
|
|
||||||
.result { margin: 19px 0 18px 0; padding: 0; max-width: 55em; clear: both; }
|
.result { margin: 19px 0 18px 0; padding: 0; max-width: 55em; clear: both; }
|
||||||
.result:hover { background: #e8e7e6; }
|
|
||||||
.result_title { margin-bottom: 0; }
|
.result_title { margin-bottom: 0; }
|
||||||
.result h3 { font-size: 1em; word-wrap:break-word; margin: 5px 0 1px 0; padding: 0 }
|
.result h3 { font-size: 1em; word-wrap:break-word; margin: 5px 0 1px 0; padding: 0 }
|
||||||
.result .content { font-size: 0.8em; margin: 0; padding: 0; max-width: 54em; word-wrap:break-word; line-height: 1.24; }
|
.result .content { font-size: 0.8em; margin: 0; padding: 0; max-width: 54em; word-wrap:break-word; line-height: 1.24; }
|
||||||
|
@ -201,3 +202,5 @@ tr:hover td { background: #DDDDDD; }
|
||||||
|
|
||||||
.result img { max-width: 90%; width: auto; height: auto }
|
.result img { max-width: 90%; width: auto; height: auto }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.favicon { float: left; margin-right: 4px; }
|
||||||
|
|
|
@ -8,25 +8,25 @@
|
||||||
</p>
|
</p>
|
||||||
<h2>Why use Searx?</h2>
|
<h2>Why use Searx?</h2>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
|
<li>Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li>
|
||||||
<li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
|
<li>Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li>
|
||||||
<li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
|
<li>Searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>If you do care about privacy, want to be a conscious user, moreover believe
|
<p>If you do care about privacy, want to be a conscious user, or otherwise believe
|
||||||
in digital freedom, make Searx your default search engine or run it on your own server</p>
|
in digital freedom, make Searx your default search engine or run it on your own server</p>
|
||||||
|
|
||||||
<h2>Technical details - How does it work?</h2>
|
<h2>Technical details - How does it work?</h2>
|
||||||
|
|
||||||
<p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>,
|
<p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>,
|
||||||
inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br />
|
inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br />
|
||||||
It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they don't show up in our logs, neither in your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br />
|
It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br />
|
||||||
Searx can be added to your browser's search bar, moreover it can be set as the default search engine.
|
Searx can be added to your browser's search bar; moreover, it can be set as the default search engine.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h2>How can I have my own?</h2>
|
<h2>How can I make it my own?</h2>
|
||||||
|
|
||||||
<p>Searx appreciates your suspicion regarding logs, so take the <a href="https://github.com/asciimoo/searx">code</a> and run it yourself! <br />Add your Searx to this <a href="https://github.com/asciimoo/searx/wiki/Searx-instances">list</a> to help other people to have privacy and make the Internet freer!
|
<p>Searx appreciates your concern regarding logs, so take the <a href="https://github.com/asciimoo/searx">code</a> and run it yourself! <br />Add your Searx to this <a href="https://github.com/asciimoo/searx/wiki/Searx-instances">list</a> to help other people reclaim their privacy and make the Internet freer!
|
||||||
<br />The more decentralized the Internet is the more freedom we have!</p>
|
<br />The more decentralized the Internet, is the more freedom we have!</p>
|
||||||
|
|
||||||
<hr />
|
<hr />
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ Searx can be added to your browser's search bar, moreover it can be set as the d
|
||||||
|
|
||||||
<h3>New engines?</h3>
|
<h3>New engines?</h3>
|
||||||
<ul>
|
<ul>
|
||||||
<li>Edit your engines.cfg, see <a href="https://raw.github.com/asciimoo/searx/master/engines.cfg_sample">sample config</a></li>
|
<li>Edit your <a href="https://raw.github.com/asciimoo/searx/master/searx/settings.yml">settings.yml</a></li>
|
||||||
<li>Create your custom engine module, check the <a href="https://github.com/asciimoo/searx/blob/master/examples/basic_engine.py">example engine</a></li>
|
<li>Create your custom engine module, check the <a href="https://github.com/asciimoo/searx/blob/master/examples/basic_engine.py">example engine</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>Don't forget to restart searx after config edit!</p>
|
<p>Don't forget to restart searx after config edit!</p>
|
||||||
|
@ -48,7 +48,7 @@ Searx can be added to your browser's search bar, moreover it can be set as the d
|
||||||
<p>See the <a href="https://github.com/asciimoo/searx/wiki/Installation">installation and setup</a> wiki page</p>
|
<p>See the <a href="https://github.com/asciimoo/searx/wiki/Installation">installation and setup</a> wiki page</p>
|
||||||
|
|
||||||
<h3>How to debug engines?</h3>
|
<h3>How to debug engines?</h3>
|
||||||
<p><a href="/stats">Stats page</a> contains some useful data about the used engines.</p>
|
<p><a href="/stats">Stats page</a> contains some useful data about the engines used.</p>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<div id="categories">
|
<div id="categories">
|
||||||
{% for category in categories %}
|
{% for category in categories %}
|
||||||
<div class="checkbox_container">
|
<div class="checkbox_container">
|
||||||
<input type="checkbox" id="checkbox_{{ category|replace(' ', '_') }}" name="category_{{ category }}" {% if category in selected_categories %}checked="checked"{% endif %} /><label for="checkbox_{{ category|replace(' ', '_') }}">{{ category }}</label>
|
<input type="checkbox" id="checkbox_{{ category|replace(' ', '_') }}" name="category_{{ category }}" {% if category in selected_categories %}checked="checked"{% endif %} /><label for="checkbox_{{ category|replace(' ', '_') }}">{{ _(category) }}</label>
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<h2>Currently used search engines</h2>
|
<h2>{{ _('Currently used search engines') }}</h2>
|
||||||
|
|
||||||
<table style="width: 80%;">
|
<table style="width: 80%;">
|
||||||
<tr>
|
<tr>
|
||||||
<th>Engine name</th>
|
<th>{{ _('Engine name') }}</th>
|
||||||
<th>Category</th>
|
<th>{{ _('Category') }}</th>
|
||||||
</tr>
|
</tr>
|
||||||
{% for (categ,search_engines) in categs %}
|
{% for (categ,search_engines) in categs %}
|
||||||
{% for search_engine in search_engines %}
|
{% for search_engine in search_engines %}
|
||||||
|
@ -20,7 +20,6 @@
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</table>
|
</table>
|
||||||
<p>Please add more engines to this list, pull requests are welcome!</p>
|
<p class="right"><a href="/">{{ _('back') }}</a></p>
|
||||||
<p class="right"><a href="/">back</a></p>
|
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -4,8 +4,8 @@
|
||||||
<div class="title"><h1>searx</h1></div>
|
<div class="title"><h1>searx</h1></div>
|
||||||
{% include 'search.html' %}
|
{% include 'search.html' %}
|
||||||
<p class="top_margin">
|
<p class="top_margin">
|
||||||
<a href="/about" class="hmarg">about</a>
|
<a href="/about" class="hmarg">{{ _('about') }}</a>
|
||||||
<a href="/preferences" class="hmarg">preferences</a>
|
<a href="/preferences" class="hmarg">{{ _('preferences') }}</a>
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -2,18 +2,28 @@
|
||||||
{% block head %} {% endblock %}
|
{% block head %} {% endblock %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<h2>Preferences</h2>
|
<h2>{{ _('Preferences') }}</h2>
|
||||||
|
|
||||||
|
|
||||||
|
<form method="post" action="/preferences" id="search_form">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Default categories</legend>
|
<legend>{{ _('Default categories') }}</legend>
|
||||||
<form method="post" action="/preferences" id="search_form">
|
|
||||||
<p>
|
<p>
|
||||||
{% include 'categories.html' %}
|
{% include 'categories.html' %}
|
||||||
</p>
|
</p>
|
||||||
<input type="submit" value="save" />
|
|
||||||
</form>
|
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<div class="right"><a href="/">back</a></div>
|
<fieldset>
|
||||||
|
<legend>{{ _('Interface language') }}</legend>
|
||||||
|
<p>
|
||||||
|
<select name='locale'>
|
||||||
|
{% for locale_id,locale_name in locales.items() %}
|
||||||
|
<option value={{ locale_id }} {% if locale_id == current_locale %}selected="selected"{% endif %}>{{ locale_name}}</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
</p>
|
||||||
|
</fieldset>
|
||||||
|
<input type="submit" value="{{ _('save') }}" />
|
||||||
|
</form>
|
||||||
|
<div class="right"><a href="/">{{ _('back') }}</a></div>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
<div class="result {{ result.class }}">
|
<div class="result {{ result.class }}">
|
||||||
|
|
||||||
{% if result['favicon'] %}
|
{% if result['favicon'] %}
|
||||||
<div style="float:left; margin:2px;">
|
<img width="14" height="14" class="favicon" src="static/img/icon_{{result['favicon']}}.ico" />
|
||||||
<img width="18" height="18" src="static/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}.ico" title="{{result['favicon']}}.ico" />
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3></br>
|
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
|
||||||
<p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p>
|
<p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p>
|
||||||
<p class="url">{{ result.pretty_url }}</p>
|
<p class="url">{{ result.pretty_url }}</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -1,13 +1,11 @@
|
||||||
<div class="result">
|
<div class="result">
|
||||||
{% if result['favicon'] %}
|
{% if result['favicon'] %}
|
||||||
<div style="float:left; margin:2px;">
|
<img width="14" height="14" class="favicon" src="static/img/icon_{{result['favicon']}}.ico" />
|
||||||
<img width="18" height="18" src="static/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}.ico" title="{{result['favicon']}}.ico" />
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
|
<h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
|
||||||
<a href="{{ result.url }}"><img width="300" height="170" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a>
|
<a href="{{ result.url }}"><img width="400px" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a>
|
||||||
<p class="url">{{ result.url }}</p>
|
<p class="url">{{ result.url }}</p>
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -7,12 +7,12 @@
|
||||||
</div>
|
</div>
|
||||||
<div id="results">
|
<div id="results">
|
||||||
{% if suggestions %}
|
{% if suggestions %}
|
||||||
<div id="suggestions"><span>Suggestions: </span>{% for suggestion in suggestions %}<form method="post" action="/"><input type="hidden" name="q" value="{{suggestion}}"><input type="submit" value="{{ suggestion }}" /></form>{% endfor %}</div>
|
<div id="suggestions"><span>{{ _('Suggestions') }}:</span>{% for suggestion in suggestions %}<form method="post" action="/"><input type="hidden" name="q" value="{{suggestion}}"><input type="submit" value="{{ suggestion }}" /></form>{% endfor %}</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
<div id ="result_count">
|
<div id ="result_count">
|
||||||
Number of results: {{ number_of_results }}
|
{{ _('Number of results') }}: {{ number_of_results }}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% for result in results %}
|
{% for result in results %}
|
||||||
|
@ -23,7 +23,7 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<div id="apis">
|
<div id="apis">
|
||||||
Download results
|
{{ _('Download results') }}
|
||||||
<form method="post" action="/">
|
<form method="post" action="/">
|
||||||
<div class="left">
|
<div class="left">
|
||||||
<input type="hidden" name="q" value="{{ q }}" />
|
<input type="hidden" name="q" value="{{ q }}" />
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
{% extends "base.html" %}
|
{% extends "base.html" %}
|
||||||
{% block head %} {% endblock %}
|
{% block head %} {% endblock %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<h2>Engine stats</h2>
|
<h2>{{ _('Engine stats') }}</h2>
|
||||||
|
|
||||||
{% for stat_name,stat_category in stats %}
|
{% for stat_name,stat_category in stats %}
|
||||||
<div class="left">
|
<div class="left">
|
||||||
|
|
|
@ -7,10 +7,10 @@ from unittest2 import TestCase
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
class SearxTestLayer:
|
class SearxTestLayer:
|
||||||
|
"""Base layer for non-robot tests."""
|
||||||
|
|
||||||
__name__ = u'SearxTestLayer'
|
__name__ = u'SearxTestLayer'
|
||||||
|
|
||||||
|
@ -36,24 +36,37 @@ class SearxRobotLayer(Layer):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
os.setpgrp() # create new process group, become its leader
|
os.setpgrp() # create new process group, become its leader
|
||||||
|
|
||||||
|
# get program paths
|
||||||
webapp = os.path.join(
|
webapp = os.path.join(
|
||||||
os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
|
os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
|
||||||
'webapp.py'
|
'webapp.py'
|
||||||
)
|
)
|
||||||
exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
|
exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
|
||||||
|
|
||||||
|
# set robot settings path
|
||||||
|
os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath(
|
||||||
|
os.path.dirname(__file__) + '/settings_robot.yml')
|
||||||
|
|
||||||
|
# run the server
|
||||||
self.server = subprocess.Popen(
|
self.server = subprocess.Popen(
|
||||||
[exe, webapp, 'settings_robot'],
|
[exe, webapp],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.STDOUT
|
stderr=subprocess.STDOUT
|
||||||
)
|
)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
# TERM all processes in my group
|
# send TERM signal to all processes in my group, to stop subprocesses
|
||||||
os.killpg(os.getpgid(self.server.pid), 15)
|
os.killpg(os.getpgid(self.server.pid), 15)
|
||||||
|
|
||||||
|
# remove previously set environment variable
|
||||||
|
del os.environ['SEARX_SETTINGS_PATH']
|
||||||
|
|
||||||
|
|
||||||
SEARXROBOTLAYER = SearxRobotLayer()
|
SEARXROBOTLAYER = SearxRobotLayer()
|
||||||
|
|
||||||
|
|
||||||
class SearxTestCase(TestCase):
|
class SearxTestCase(TestCase):
|
||||||
|
"""Base test case for non-robot tests."""
|
||||||
|
|
||||||
layer = SearxTestLayer
|
layer = SearxTestLayer
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,115 @@
|
||||||
|
# Hungarian translations for PROJECT.
|
||||||
|
# Copyright (C) 2014 ORGANIZATION
|
||||||
|
# This file is distributed under the same license as the PROJECT project.
|
||||||
|
# FIRST AUTHOR <EMAIL@ADDRESS>, 2014.
|
||||||
|
#
|
||||||
|
msgid ""
|
||||||
|
msgstr ""
|
||||||
|
"Project-Id-Version: PROJECT VERSION\n"
|
||||||
|
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||||
|
"POT-Creation-Date: 2014-01-22 00:55+0100\n"
|
||||||
|
"PO-Revision-Date: 2014-01-21 23:33+0100\n"
|
||||||
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||||
|
"Language-Team: hu <LL@li.org>\n"
|
||||||
|
"Plural-Forms: nplurals=1; plural=0\n"
|
||||||
|
"MIME-Version: 1.0\n"
|
||||||
|
"Content-Type: text/plain; charset=utf-8\n"
|
||||||
|
"Content-Transfer-Encoding: 8bit\n"
|
||||||
|
"Generated-By: Babel 1.3\n"
|
||||||
|
|
||||||
|
#: searx/engines/__init__.py:274
|
||||||
|
msgid "Page loads (sec)"
|
||||||
|
msgstr "Válaszidők (sec)"
|
||||||
|
|
||||||
|
#: searx/engines/__init__.py:278 searx/templates/results.html:15
|
||||||
|
msgid "Number of results"
|
||||||
|
msgstr "Találatok száma"
|
||||||
|
|
||||||
|
#: searx/engines/__init__.py:282
|
||||||
|
msgid "Scores"
|
||||||
|
msgstr "Pontszámok"
|
||||||
|
|
||||||
|
#: searx/engines/__init__.py:286
|
||||||
|
msgid "Scores per result"
|
||||||
|
msgstr "Pontszámok találatonként"
|
||||||
|
|
||||||
|
#: searx/engines/__init__.py:290
|
||||||
|
msgid "Errors"
|
||||||
|
msgstr "Hibák"
|
||||||
|
|
||||||
|
#: searx/templates/engines.html:4
|
||||||
|
msgid "Currently used search engines"
|
||||||
|
msgstr "Jelenleg használt keresők"
|
||||||
|
|
||||||
|
#: searx/templates/engines.html:8
|
||||||
|
msgid "Engine name"
|
||||||
|
msgstr "Kereső neve"
|
||||||
|
|
||||||
|
#: searx/templates/engines.html:9
|
||||||
|
msgid "Category"
|
||||||
|
msgstr "Kategória"
|
||||||
|
|
||||||
|
#: searx/templates/engines.html:23 searx/templates/preferences.html:27
|
||||||
|
msgid "back"
|
||||||
|
msgstr "vissza"
|
||||||
|
|
||||||
|
#: searx/templates/index.html:7
|
||||||
|
msgid "about"
|
||||||
|
msgstr "rólunk"
|
||||||
|
|
||||||
|
#: searx/templates/index.html:8
|
||||||
|
msgid "preferences"
|
||||||
|
msgstr "beállítások"
|
||||||
|
|
||||||
|
#: searx/templates/preferences.html:5
|
||||||
|
msgid "Preferences"
|
||||||
|
msgstr "Beállítások"
|
||||||
|
|
||||||
|
#: searx/templates/preferences.html:10
|
||||||
|
msgid "Default categories"
|
||||||
|
msgstr "Alapértelmezett kategóriák"
|
||||||
|
|
||||||
|
#: searx/templates/preferences.html:16
|
||||||
|
msgid "Interface language"
|
||||||
|
msgstr "Nyelv"
|
||||||
|
|
||||||
|
#: searx/templates/preferences.html:25
|
||||||
|
msgid "save"
|
||||||
|
msgstr "mentés"
|
||||||
|
|
||||||
|
#: searx/templates/results.html:10
|
||||||
|
msgid "Suggestions"
|
||||||
|
msgstr "Javaslatok"
|
||||||
|
|
||||||
|
#: searx/templates/results.html:26
|
||||||
|
msgid "Download results"
|
||||||
|
msgstr "Találatok letöltése"
|
||||||
|
|
||||||
|
#: searx/templates/stats.html:4
|
||||||
|
msgid "Engine stats"
|
||||||
|
msgstr "Kereső statisztikák"
|
||||||
|
|
||||||
|
# categories - manually added
|
||||||
|
# TODO - automatically add
|
||||||
|
|
||||||
|
msgid "files"
|
||||||
|
msgstr "fájlok"
|
||||||
|
|
||||||
|
msgid "general"
|
||||||
|
msgstr "általános"
|
||||||
|
|
||||||
|
msgid "music"
|
||||||
|
msgstr "zene"
|
||||||
|
|
||||||
|
msgid "social media"
|
||||||
|
msgstr "közösségi média"
|
||||||
|
|
||||||
|
msgid "images"
|
||||||
|
msgstr "képek"
|
||||||
|
|
||||||
|
msgid "videos"
|
||||||
|
msgstr "videók"
|
||||||
|
|
||||||
|
msgid "it"
|
||||||
|
msgstr "it"
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
#import htmlentitydefs
|
#import htmlentitydefs
|
||||||
import csv
|
import csv
|
||||||
import codecs
|
from codecs import getincrementalencoder
|
||||||
import cStringIO
|
import cStringIO
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
def gen_useragent():
|
def gen_useragent():
|
||||||
# TODO
|
# TODO
|
||||||
return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
|
ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
|
||||||
|
return ua
|
||||||
|
|
||||||
|
|
||||||
def highlight_content(content, query):
|
def highlight_content(content, query):
|
||||||
|
|
||||||
|
@ -34,16 +37,20 @@ def highlight_content(content, query):
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
class HTMLTextExtractor(HTMLParser):
|
class HTMLTextExtractor(HTMLParser):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
self.result = [ ]
|
self.result = []
|
||||||
|
|
||||||
def handle_data(self, d):
|
def handle_data(self, d):
|
||||||
self.result.append(d)
|
self.result.append(d)
|
||||||
|
|
||||||
def handle_charref(self, number):
|
def handle_charref(self, number):
|
||||||
codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number)
|
if number[0] in (u'x', u'X'):
|
||||||
|
codepoint = int(number[1:], 16)
|
||||||
|
else:
|
||||||
|
codepoint = int(number)
|
||||||
self.result.append(unichr(codepoint))
|
self.result.append(unichr(codepoint))
|
||||||
|
|
||||||
def handle_entityref(self, name):
|
def handle_entityref(self, name):
|
||||||
|
@ -54,6 +61,7 @@ class HTMLTextExtractor(HTMLParser):
|
||||||
def get_text(self):
|
def get_text(self):
|
||||||
return u''.join(self.result)
|
return u''.join(self.result)
|
||||||
|
|
||||||
|
|
||||||
def html_to_text(html):
|
def html_to_text(html):
|
||||||
s = HTMLTextExtractor()
|
s = HTMLTextExtractor()
|
||||||
s.feed(html)
|
s.feed(html)
|
||||||
|
@ -71,10 +79,16 @@ class UnicodeWriter:
|
||||||
self.queue = cStringIO.StringIO()
|
self.queue = cStringIO.StringIO()
|
||||||
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
|
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
|
||||||
self.stream = f
|
self.stream = f
|
||||||
self.encoder = codecs.getincrementalencoder(encoding)()
|
self.encoder = getincrementalencoder(encoding)()
|
||||||
|
|
||||||
def writerow(self, row):
|
def writerow(self, row):
|
||||||
self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row])
|
unicode_row = []
|
||||||
|
for col in row:
|
||||||
|
if type(col) == str or type(col) == unicode:
|
||||||
|
unicode_row.append(col.encode('utf-8').strip())
|
||||||
|
else:
|
||||||
|
unicode_row.append(col)
|
||||||
|
self.writer.writerow(unicode_row)
|
||||||
# Fetch UTF-8 output from the queue ...
|
# Fetch UTF-8 output from the queue ...
|
||||||
data = self.queue.getvalue()
|
data = self.queue.getvalue()
|
||||||
data = data.decode("utf-8")
|
data = data.decode("utf-8")
|
||||||
|
|
156
searx/webapp.py
156
searx/webapp.py
|
@ -17,26 +17,36 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
(C) 2013- by Adam Tauber, <asciimoo@gmail.com>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
if __name__ == "__main__":
|
|
||||||
sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
|
|
||||||
|
|
||||||
from searx import settings
|
|
||||||
|
|
||||||
from flask import Flask, request, render_template, url_for, Response, make_response, redirect
|
|
||||||
from searx.engines import search, categories, engines, get_engines_stats
|
|
||||||
import json
|
import json
|
||||||
import cStringIO
|
import cStringIO
|
||||||
from searx.utils import UnicodeWriter
|
import os
|
||||||
|
|
||||||
|
from flask import Flask, request, render_template
|
||||||
|
from flask import url_for, Response, make_response, redirect
|
||||||
from flask import send_from_directory
|
from flask import send_from_directory
|
||||||
|
|
||||||
|
from searx import settings
|
||||||
|
from searx.engines import search, categories, engines, get_engines_stats
|
||||||
|
from searx.utils import UnicodeWriter
|
||||||
from searx.utils import highlight_content, html_to_text
|
from searx.utils import highlight_content, html_to_text
|
||||||
|
|
||||||
|
from flask.ext.babel import Babel
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(
|
||||||
|
__name__,
|
||||||
|
static_folder=os.path.join(os.path.dirname(__file__), 'static'),
|
||||||
|
template_folder=os.path.join(os.path.dirname(__file__), 'templates')
|
||||||
|
)
|
||||||
|
|
||||||
app.secret_key = settings['server']['secret_key']
|
app.secret_key = settings['server']['secret_key']
|
||||||
|
|
||||||
|
babel = Babel(app)
|
||||||
|
|
||||||
|
#TODO configurable via settings.yml
|
||||||
|
favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
|
||||||
|
'twitter', 'stackoverflow', 'github']
|
||||||
|
|
||||||
|
|
||||||
opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
|
opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
|
||||||
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
|
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
|
||||||
|
@ -51,6 +61,24 @@ opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
@babel.localeselector
|
||||||
|
def get_locale():
|
||||||
|
locale = request.accept_languages.best_match(settings['locales'].keys())
|
||||||
|
|
||||||
|
if request.cookies.get('locale', '') in settings['locales']:
|
||||||
|
locale = request.cookies.get('locale', '')
|
||||||
|
|
||||||
|
if 'locale' in request.args\
|
||||||
|
and request.args['locale'] in settings['locales']:
|
||||||
|
locale = request.args['locale']
|
||||||
|
|
||||||
|
if 'locale' in request.form\
|
||||||
|
and request.form['locale'] in settings['locales']:
|
||||||
|
locale = request.form['locale']
|
||||||
|
|
||||||
|
return locale
|
||||||
|
|
||||||
|
|
||||||
def get_base_url():
|
def get_base_url():
|
||||||
if settings['server']['base_url']:
|
if settings['server']['base_url']:
|
||||||
hostname = settings['server']['base_url']
|
hostname = settings['server']['base_url']
|
||||||
|
@ -65,7 +93,8 @@ def get_base_url():
|
||||||
def render(template_name, **kwargs):
|
def render(template_name, **kwargs):
|
||||||
global categories
|
global categories
|
||||||
kwargs['categories'] = ['general']
|
kwargs['categories'] = ['general']
|
||||||
kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general')
|
kwargs['categories'].extend(x for x in
|
||||||
|
sorted(categories.keys()) if x != 'general')
|
||||||
if not 'selected_categories' in kwargs:
|
if not 'selected_categories' in kwargs:
|
||||||
kwargs['selected_categories'] = []
|
kwargs['selected_categories'] = []
|
||||||
cookie_categories = request.cookies.get('categories', '').split(',')
|
cookie_categories = request.cookies.get('categories', '').split(',')
|
||||||
|
@ -76,6 +105,7 @@ def render(template_name, **kwargs):
|
||||||
kwargs['selected_categories'] = ['general']
|
kwargs['selected_categories'] = ['general']
|
||||||
return render_template(template_name, **kwargs)
|
return render_template(template_name, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def parse_query(query):
|
def parse_query(query):
|
||||||
query_engines = []
|
query_engines = []
|
||||||
query_parts = query.split()
|
query_parts = query.split()
|
||||||
|
@ -89,7 +119,7 @@ def parse_query(query):
|
||||||
def index():
|
def index():
|
||||||
global categories
|
global categories
|
||||||
|
|
||||||
if request.method=='POST':
|
if request.method == 'POST':
|
||||||
request_data = request.form
|
request_data = request.form
|
||||||
else:
|
else:
|
||||||
request_data = request.args
|
request_data = request.args
|
||||||
|
@ -101,14 +131,15 @@ def index():
|
||||||
query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
|
query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
|
||||||
|
|
||||||
if not len(selected_engines):
|
if not len(selected_engines):
|
||||||
for pd_name,pd in request_data.items():
|
for pd_name, pd in request_data.items():
|
||||||
if pd_name.startswith('category_'):
|
if pd_name.startswith('category_'):
|
||||||
category = pd_name[9:]
|
category = pd_name[9:]
|
||||||
if not category in categories:
|
if not category in categories:
|
||||||
continue
|
continue
|
||||||
selected_categories.append(category)
|
selected_categories.append(category)
|
||||||
if not len(selected_categories):
|
if not len(selected_categories):
|
||||||
cookie_categories = request.cookies.get('categories', '').split(',')
|
cookie_categories = request.cookies.get('categories', '')
|
||||||
|
cookie_categories = cookie_categories.split(',')
|
||||||
for ccateg in cookie_categories:
|
for ccateg in cookie_categories:
|
||||||
if ccateg in categories:
|
if ccateg in categories:
|
||||||
selected_categories.append(ccateg)
|
selected_categories.append(ccateg)
|
||||||
|
@ -116,7 +147,9 @@ def index():
|
||||||
selected_categories = ['general']
|
selected_categories = ['general']
|
||||||
|
|
||||||
for categ in selected_categories:
|
for categ in selected_categories:
|
||||||
selected_engines.extend({'category': categ, 'name': x.name} for x in categories[categ])
|
selected_engines.extend({'category': categ,
|
||||||
|
'name': x.name}
|
||||||
|
for x in categories[categ])
|
||||||
|
|
||||||
results, suggestions = search(query, request, selected_engines)
|
results, suggestions = search(query, request, selected_engines)
|
||||||
|
|
||||||
|
@ -131,16 +164,18 @@ def index():
|
||||||
result['content'] = html_to_text(result['content']).strip()
|
result['content'] = html_to_text(result['content']).strip()
|
||||||
result['title'] = html_to_text(result['title']).strip()
|
result['title'] = html_to_text(result['title']).strip()
|
||||||
if len(result['url']) > 74:
|
if len(result['url']) > 74:
|
||||||
result['pretty_url'] = result['url'][:35] + '[..]' + result['url'][-35:]
|
url_parts = result['url'][:35], result['url'][-35:]
|
||||||
|
result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
|
||||||
else:
|
else:
|
||||||
result['pretty_url'] = result['url']
|
result['pretty_url'] = result['url']
|
||||||
|
|
||||||
for engine in result['engines']:
|
for engine in result['engines']:
|
||||||
if engine in ['wikipedia', 'youtube', 'vimeo', 'soundcloud', 'twitter', 'stackoverflow', 'github']:
|
if engine in favicons:
|
||||||
result['favicon'] = engine
|
result['favicon'] = engine
|
||||||
|
|
||||||
if request_data.get('format') == 'json':
|
if request_data.get('format') == 'json':
|
||||||
return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json')
|
return Response(json.dumps({'query': query, 'results': results}),
|
||||||
|
mimetype='application/json')
|
||||||
elif request_data.get('format') == 'csv':
|
elif request_data.get('format') == 'csv':
|
||||||
csv = UnicodeWriter(cStringIO.StringIO())
|
csv = UnicodeWriter(cStringIO.StringIO())
|
||||||
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
|
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
|
||||||
|
@ -151,26 +186,28 @@ def index():
|
||||||
csv.writerow([row.get(key, '') for key in keys])
|
csv.writerow([row.get(key, '') for key in keys])
|
||||||
csv.stream.seek(0)
|
csv.stream.seek(0)
|
||||||
response = Response(csv.stream.read(), mimetype='application/csv')
|
response = Response(csv.stream.read(), mimetype='application/csv')
|
||||||
response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split())))
|
content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
|
||||||
|
response.headers.add('Content-Disposition', content_disp)
|
||||||
return response
|
return response
|
||||||
elif request_data.get('format') == 'rss':
|
elif request_data.get('format') == 'rss':
|
||||||
response_rss = render('opensearch_response_rss.xml'
|
response_rss = render(
|
||||||
,results=results
|
'opensearch_response_rss.xml',
|
||||||
,q=request_data['q']
|
results=results,
|
||||||
,number_of_results=len(results)
|
q=request_data['q'],
|
||||||
,base_url=get_base_url()
|
number_of_results=len(results),
|
||||||
)
|
base_url=get_base_url()
|
||||||
|
)
|
||||||
return Response(response_rss, mimetype='text/xml')
|
return Response(response_rss, mimetype='text/xml')
|
||||||
|
|
||||||
|
return render(
|
||||||
return render('results.html'
|
'results.html',
|
||||||
,results=results
|
results=results,
|
||||||
,q=request_data['q']
|
q=request_data['q'],
|
||||||
,selected_categories=selected_categories
|
selected_categories=selected_categories,
|
||||||
,number_of_results=len(results)+len(featured_results)
|
number_of_results=len(results) + len(featured_results),
|
||||||
,featured_results=featured_results
|
featured_results=featured_results,
|
||||||
,suggestions=suggestions
|
suggestions=suggestions
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/about', methods=['GET'])
|
@app.route('/about', methods=['GET'])
|
||||||
|
@ -187,20 +224,37 @@ def list_engines():
|
||||||
@app.route('/preferences', methods=['GET', 'POST'])
|
@app.route('/preferences', methods=['GET', 'POST'])
|
||||||
def preferences():
|
def preferences():
|
||||||
|
|
||||||
if request.method=='POST':
|
if request.method == 'POST':
|
||||||
selected_categories = []
|
selected_categories = []
|
||||||
for pd_name,pd in request.form.items():
|
locale = None
|
||||||
|
for pd_name, pd in request.form.items():
|
||||||
if pd_name.startswith('category_'):
|
if pd_name.startswith('category_'):
|
||||||
category = pd_name[9:]
|
category = pd_name[9:]
|
||||||
if not category in categories:
|
if not category in categories:
|
||||||
continue
|
continue
|
||||||
selected_categories.append(category)
|
selected_categories.append(category)
|
||||||
if selected_categories:
|
elif pd_name == 'locale' and pd in settings['locales']:
|
||||||
resp = make_response(redirect('/'))
|
locale = pd
|
||||||
|
|
||||||
|
resp = make_response(redirect('/'))
|
||||||
|
|
||||||
|
if locale:
|
||||||
# cookie max age: 4 weeks
|
# cookie max age: 4 weeks
|
||||||
resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4)
|
resp.set_cookie(
|
||||||
return resp
|
'locale', locale,
|
||||||
return render('preferences.html')
|
max_age=60 * 60 * 24 * 7 * 4
|
||||||
|
)
|
||||||
|
|
||||||
|
if selected_categories:
|
||||||
|
# cookie max age: 4 weeks
|
||||||
|
resp.set_cookie(
|
||||||
|
'categories', ','.join(selected_categories),
|
||||||
|
max_age=60 * 60 * 24 * 7 * 4
|
||||||
|
)
|
||||||
|
return resp
|
||||||
|
return render('preferences.html',
|
||||||
|
locales=settings['locales'],
|
||||||
|
current_locale=get_locale())
|
||||||
|
|
||||||
|
|
||||||
@app.route('/stats', methods=['GET'])
|
@app.route('/stats', methods=['GET'])
|
||||||
|
@ -216,6 +270,7 @@ def robots():
|
||||||
Allow: /
|
Allow: /
|
||||||
Allow: /about
|
Allow: /about
|
||||||
Disallow: /stats
|
Disallow: /stats
|
||||||
|
Disallow: /engines
|
||||||
""", mimetype='text/plain')
|
""", mimetype='text/plain')
|
||||||
|
|
||||||
|
|
||||||
|
@ -229,24 +284,27 @@ def opensearch():
|
||||||
base_url = get_base_url()
|
base_url = get_base_url()
|
||||||
ret = opensearch_xml.format(method=method, host=base_url)
|
ret = opensearch_xml.format(method=method, host=base_url)
|
||||||
resp = Response(response=ret,
|
resp = Response(response=ret,
|
||||||
status=200,
|
status=200,
|
||||||
mimetype="application/xml")
|
mimetype="application/xml")
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
@app.route('/favicon.ico')
|
@app.route('/favicon.ico')
|
||||||
def favicon():
|
def favicon():
|
||||||
return send_from_directory(os.path.join(app.root_path, 'static/img'),
|
return send_from_directory(os.path.join(app.root_path, 'static/img'),
|
||||||
'favicon.png', mimetype='image/vnd.microsoft.icon')
|
'favicon.png',
|
||||||
|
mimetype='image/vnd.microsoft.icon')
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
from gevent import monkey
|
from gevent import monkey
|
||||||
monkey.patch_all()
|
monkey.patch_all()
|
||||||
|
|
||||||
app.run(debug = settings['server']['debug']
|
app.run(
|
||||||
,use_debugger = settings['server']['debug']
|
debug=settings['server']['debug'],
|
||||||
,port = settings['server']['port']
|
use_debugger=settings['server']['debug'],
|
||||||
)
|
port=settings['server']['port']
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
23
setup.py
23
setup.py
|
@ -11,12 +11,12 @@ def read(*rnames):
|
||||||
return open(os.path.join(os.path.dirname(__file__), *rnames)).read()
|
return open(os.path.join(os.path.dirname(__file__), *rnames)).read()
|
||||||
|
|
||||||
|
|
||||||
long_description = read('README.md')
|
long_description = read('README.rst')
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='searx',
|
name='searx',
|
||||||
version="0.1",
|
version="0.1.2",
|
||||||
description="",
|
description="A privacy-respecting, hackable metasearch engine",
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
|
@ -30,6 +30,7 @@ setup(
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'flask',
|
'flask',
|
||||||
|
'flask-babel',
|
||||||
'grequests',
|
'grequests',
|
||||||
'lxml',
|
'lxml',
|
||||||
'pyyaml',
|
'pyyaml',
|
||||||
|
@ -49,4 +50,20 @@ setup(
|
||||||
'zope.testrunner',
|
'zope.testrunner',
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'searx-run = searx.webapp:run'
|
||||||
|
]
|
||||||
|
},
|
||||||
|
package_data={
|
||||||
|
'searx': [
|
||||||
|
'settings.yml',
|
||||||
|
'../README.rst',
|
||||||
|
'static/*/*',
|
||||||
|
'translations/*/*',
|
||||||
|
'templates/*.html',
|
||||||
|
'templates/result_templates/*.html',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
[versions]
|
[versions]
|
||||||
Flask = 0.10.1
|
Flask = 0.10.1
|
||||||
|
Flask-Babel = 0.9
|
||||||
Jinja2 = 2.7.2
|
Jinja2 = 2.7.2
|
||||||
MarkupSafe = 0.18
|
MarkupSafe = 0.18
|
||||||
WebOb = 1.3.1
|
WebOb = 1.3.1
|
||||||
|
|
Loading…
Reference in New Issue