mirror of https://github.com/searxng/searxng.git
Compare commits
190 Commits
bf98a4145b
...
bcb549e1eb
Author | SHA1 | Date |
---|---|---|
Markus Heiser | bcb549e1eb | |
Bnyro | b07c0ae39f | |
Markus Heiser | ef163e3b01 | |
Markus Heiser | e55e936e43 | |
Markus Heiser | 56e3d72a76 | |
searxng-bot | cc148a76b0 | |
uply23333 | fa108c140f | |
Markus Heiser | fa4dfd4efe | |
Markus Heiser | b183e620d8 | |
Markus Heiser | f63f97c56c | |
Markus Heiser | 163031c394 | |
Markus Heiser | 3e5621e1af | |
return42 | e392892578 | |
return42 | 68ed8245da | |
return42 | 2d748d1d74 | |
return42 | 2985ece0ca | |
return42 | adc38c5800 | |
return42 | a084436ff4 | |
Markus Heiser | b176323e89 | |
Markus Heiser | da28f5280b | |
dependabot[bot] | 543ab92fde | |
Markus Heiser | e08ff05fff | |
Markus Heiser | a3921b5ed7 | |
Markus Heiser | ae496e9dd0 | |
JJ | 9b01e3c9d6 | |
searxng-bot | 446ee2dd25 | |
Markus Heiser | b14d885f23 | |
Markus Heiser | 050451347b | |
dependabot[bot] | 88caa1d7db | |
dependabot[bot] | a0c704c860 | |
dependabot[bot] | 219040c766 | |
searxng-bot | eeae3664c2 | |
Markus Heiser | 038a2ff6bd | |
rhee876527 | 4ef1c706f8 | |
mrpaulblack | cf7627557a | |
mrpaulblack | 2cacc560d6 | |
Markus Heiser | 058a072404 | |
Markus Heiser | 14fb187548 | |
Markus Heiser | c96ba25f5b | |
dependabot[bot] | 2986681b31 | |
Bnyro | 9f48d5f84f | |
Grant Lanham | 3e87354f0e | |
Grant Lanham | d448def1a6 | |
dependabot[bot] | 8ba203c72b | |
Markus Heiser | e275f8e18e | |
0xhtml | 8b6a3f3e11 | |
Snoweuph | 5b6f40414a | |
Markus Heiser | 7e8b330b3e | |
Markus Heiser | 2fbedc4316 | |
Émilien (perso) | bafb92e646 | |
dependabot[bot] | 1b8db63b33 | |
searxng-bot | 5a32ee410b | |
Allen | 81aaca8f44 | |
Markus Heiser | f1f0dfd231 | |
Markus Heiser | 5332d3a0b8 | |
Markus Heiser | f00fa76eda | |
Markus Heiser | a631f77401 | |
Markus Heiser | a7d02d4101 | |
Markus Heiser | 5ded9ada82 | |
Markus Heiser | 7ab577a1fb | |
Markus Heiser | c49a2707c1 | |
Brock Vojkovic | e17d7632d0 | |
searxng-bot | 3e747d0491 | |
Grant Lanham | 44a06190bb | |
dependabot[bot] | 042c7190e6 | |
Markus Heiser | 2fd6730d4f | |
Markus Heiser | e7a4d7d7c3 | |
Grant Lanham | 2a29e16d25 | |
Markus Heiser | d48f04e809 | |
Bnyro | 421c131707 | |
Bnyro | b42ce34ca8 | |
Bnyro | e4b2823abd | |
Austin-Olacsi | cbf1e90979 | |
Markus Heiser | f07ab6deb0 | |
0xhtml | 0a0fb450b5 | |
return42 | b5009b8610 | |
return42 | d6b04d3ba1 | |
return42 | bc6ee05422 | |
return42 | a4558dda47 | |
return42 | eb31eaaba0 | |
return42 | 609ead9ffe | |
dependabot[bot] | f95a5effcc | |
searxng-bot | 9bae26a106 | |
dependabot[bot] | 940da05f03 | |
Grant Lanham | 6a3375be37 | |
Zhijie He | 6be56aee11 | |
Grant Lanham | 14241e7dac | |
searxng-bot | ea16c82d78 | |
dwitterer | 915cf9b7af | |
dwitterer | ba4942ea51 | |
Markus | 0b3724651e | |
Markus | 5ad0214bd4 | |
Markus | 8b8d830fd3 | |
Markus | 67fcf3cc67 | |
Markus Heiser | d026486ce3 | |
Grant Lanham | 0b832f19bf | |
Markus | 28dc623785 | |
Markus | 3630e464b3 | |
Markus | d3a795c7e7 | |
Markus | 55e2f4a97f | |
Markus | cdb4927b8b | |
Bnyro | 84e2f9d46a | |
searxng-bot | 231e55f38d | |
Bnyro | 33c1236923 | |
Finn Steffens | 9e2bfe14db | |
Lucas Schwiderski | f05566d925 | |
0xhtml | c45870dd71 | |
searxng-bot | 5cca3f6ef2 | |
dependabot[bot] | d2f36cacb3 | |
dependabot[bot] | 153a28ccd6 | |
Markus Heiser | 9eda4044be | |
Markus | 21bfb4996e | |
Bnyro | 94a1f39bde | |
Markus Heiser | b774ee04ba | |
Markus Heiser | 3a3ff8f020 | |
Bnyro | 7d9d5186a0 | |
GenericMale | e65edb141d | |
Brock Vojkovic | b09aa7e360 | |
Bnyro | 5e576b2238 | |
dependabot[bot] | cbd86473aa | |
dependabot[bot] | 769b2ed030 | |
Alexander Sulfrian | e86c96974d | |
searxng-bot | b05e285384 | |
Alexander Sulfrian | 6a7b1a1a57 | |
return42 | 526428a69b | |
return42 | d6ee8f38dd | |
return42 | d72fbcfd46 | |
return42 | f44775c05a | |
return42 | 71451e5770 | |
GenericMale | 8289436e55 | |
searxng-bot | 4f7dd05d99 | |
Dennis ten Hoove | 2033f30c8d | |
Markus Heiser | fe6bac5a08 | |
Austin-Olacsi | e45b771ffa | |
Grant Lanham | 5276219b9d | |
Markus Heiser | 5be55e3309 | |
Markus Heiser | 5c6b126d7f | |
Markus Heiser | 799d72e3fd | |
searxng-bot | c0369ee488 | |
dependabot[bot] | 29056b9ddd | |
dependabot[bot] | 326ade8634 | |
Markus Heiser | 8d14d46c00 | |
Markus Heiser | 45f03f1902 | |
Fmstrat | 8e985aea88 | |
searxng-bot | f1c05e7c16 | |
dependabot[bot] | fec8ab75e1 | |
dependabot[bot] | 516ac8da82 | |
Markus Heiser | dcf95644c6 | |
0xhtml | 0cfed94b08 | |
0xhtml | 7f9ce3b96e | |
return42 | e76a4f72ef | |
dependabot[bot] | c151683a0b | |
dependabot[bot] | 01a3d8d9e4 | |
searxng-bot | 2f1f54f113 | |
Markus Heiser | 98c73010f1 | |
Markus Heiser | edfd0e2fe5 | |
return42 | 3196e7e86b | |
return42 | 7d47c961c3 | |
return42 | ac51c77c33 | |
return42 | 5cba412784 | |
return42 | fff7792e32 | |
Markus Heiser | ee959ed9fc | |
Markus Heiser | 022898e502 | |
Austin-Olacsi | 9f47bdefc6 | |
Markus Heiser | d7bb97b616 | |
Bnyro | 9bbcd37138 | |
Bnyro | 80226ad6b7 | |
Bnyro | 304ddd8114 | |
Markus Heiser | 3f22dbb68a | |
Bnyro | 84abab0808 | |
dependabot[bot] | 8e359eb8ed | |
Markus Heiser | e31b06b686 | |
Sylvain Cau | b9ddd59c5b | |
dependabot[bot] | dde94751d6 | |
dependabot[bot] | 07a0135a92 | |
Ivan Gabaldon | 26b66dd3be | |
searxng-bot | 319afe031e | |
Markus Heiser | 657dcb973a | |
dependabot[bot] | b0aa6fe8a5 | |
searxng-bot | ffde256364 | |
Grant Lanham | 9a4fa7cc4f | |
Markus Heiser | 2039060b64 | |
Bnyro | e4da22ee51 | |
Grant Lanham | e56f4b315f | |
Alexandre Flament | 37d0438f25 | |
Allen | 5468d97d39 | |
Markus Heiser | 37ec668ae1 | |
Markus Heiser | d0bad45d21 | |
searxng-bot | d5487a157d | |
Markus Heiser | a3500c1efc |
|
@ -1,5 +1,5 @@
|
|||
name: "Checker"
|
||||
on:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: "0 4 * * 5"
|
||||
workflow_dispatch:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
name: "Update searx.data"
|
||||
on:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: "59 23 28 * *"
|
||||
workflow_dispatch:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
name: Integration
|
||||
|
||||
on:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
push:
|
||||
branches: ["master"]
|
||||
pull_request:
|
||||
|
@ -16,70 +16,62 @@ jobs:
|
|||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-20.04]
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Ubuntu packages
|
||||
run: |
|
||||
sudo ./utils/searxng.sh install packages
|
||||
sudo apt install firefox
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements*.txt', 'setup.py') }}
|
||||
- name: Install Python dependencies
|
||||
if: steps.cache-python.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make V=1 install
|
||||
make V=1 gecko.driver
|
||||
- name: Run tests
|
||||
run: make V=1 ci.test
|
||||
- name: Test coverage
|
||||
run: make V=1 test.coverage
|
||||
- name: Store coverage result
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: coverage-${{ matrix.python-version }}
|
||||
path: coverage/
|
||||
retention-days: 60
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Ubuntu packages
|
||||
run: |
|
||||
sudo ./utils/searxng.sh install packages
|
||||
sudo apt install firefox
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements*.txt', 'setup.py') }}
|
||||
- name: Install Python dependencies
|
||||
if: steps.cache-python.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make V=1 install
|
||||
make V=1 gecko.driver
|
||||
- name: Run tests
|
||||
run: make V=1 ci.test
|
||||
|
||||
themes:
|
||||
name: Themes
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Ubuntu packages
|
||||
run: sudo ./utils/searxng.sh install buildhost
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: Install node dependencies
|
||||
run: make V=1 node.env
|
||||
- name: Build themes
|
||||
run: make V=1 themes.all
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install Ubuntu packages
|
||||
run: sudo ./utils/searxng.sh install buildhost
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: Install node dependencies
|
||||
run: make V=1 node.env
|
||||
- name: Build themes
|
||||
run: make V=1 themes.all
|
||||
|
||||
documentation:
|
||||
name: Documentation
|
||||
|
@ -87,40 +79,40 @@ jobs:
|
|||
permissions:
|
||||
contents: write # for JamesIves/github-pages-deploy-action to push changes in repo
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: '0'
|
||||
persist-credentials: false
|
||||
- name: Install Ubuntu packages
|
||||
run: sudo ./utils/searxng.sh install buildhost
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: Build documentation
|
||||
run: |
|
||||
make V=1 docs.clean docs.html
|
||||
- name: Deploy
|
||||
if: github.ref == 'refs/heads/master'
|
||||
uses: JamesIves/github-pages-deploy-action@3.7.1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
BRANCH: gh-pages
|
||||
FOLDER: dist/docs
|
||||
CLEAN: true # Automatically remove deleted files from the deploy branch
|
||||
SINGLE_COMMIT: True
|
||||
COMMIT_MESSAGE: '[doc] build from commit ${{ github.sha }}'
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: '0'
|
||||
persist-credentials: false
|
||||
- name: Install Ubuntu packages
|
||||
run: sudo ./utils/searxng.sh install buildhost
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: Build documentation
|
||||
run: |
|
||||
make V=1 docs.clean docs.html
|
||||
- name: Deploy
|
||||
if: github.ref == 'refs/heads/master'
|
||||
uses: JamesIves/github-pages-deploy-action@3.7.1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ github.token }}
|
||||
BRANCH: gh-pages
|
||||
FOLDER: dist/docs
|
||||
CLEAN: true # Automatically remove deleted files from the deploy branch
|
||||
SINGLE_COMMIT: true
|
||||
COMMIT_MESSAGE: '[doc] build from commit ${{ github.sha }}'
|
||||
|
||||
babel:
|
||||
name: Update translations branch
|
||||
|
@ -133,37 +125,37 @@ jobs:
|
|||
permissions:
|
||||
contents: write # for make V=1 weblate.push.translations
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: '0'
|
||||
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: weblate & git setup
|
||||
env:
|
||||
WEBLATE_CONFIG: ${{ secrets.WEBLATE_CONFIG }}
|
||||
run: |
|
||||
mkdir -p ~/.config
|
||||
echo "${WEBLATE_CONFIG}" > ~/.config/weblate
|
||||
git config --global user.email "searxng-bot@users.noreply.github.com"
|
||||
git config --global user.name "searxng-bot"
|
||||
- name: Update transations
|
||||
id: update
|
||||
run: |
|
||||
make V=1 weblate.push.translations
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: '0'
|
||||
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: weblate & git setup
|
||||
env:
|
||||
WEBLATE_CONFIG: ${{ secrets.WEBLATE_CONFIG }}
|
||||
run: |
|
||||
mkdir -p ~/.config
|
||||
echo "${WEBLATE_CONFIG}" > ~/.config/weblate
|
||||
git config --global user.email "searxng-bot@users.noreply.github.com"
|
||||
git config --global user.name "searxng-bot"
|
||||
- name: Update transations
|
||||
id: update
|
||||
run: |
|
||||
make V=1 weblate.push.translations
|
||||
|
||||
dockers:
|
||||
name: Docker
|
||||
|
@ -183,9 +175,9 @@ jobs:
|
|||
# make sure "make docker.push" can get the git history
|
||||
fetch-depth: '0'
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.9'
|
||||
python-version: '3.12'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
|
@ -195,7 +187,7 @@ jobs:
|
|||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: Set up QEMU
|
||||
if: env.DOCKERHUB_USERNAME != null
|
||||
uses: docker/setup-qemu-action@v1
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
name: "Security checks"
|
||||
on:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: "42 05 * * *"
|
||||
workflow_dispatch:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
name: "Update translations"
|
||||
on:
|
||||
on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: "05 07 * * 5"
|
||||
workflow_dispatch:
|
||||
|
@ -10,50 +10,50 @@ jobs:
|
|||
runs-on: ubuntu-20.04
|
||||
if: ${{ github.repository_owner == 'searxng' && github.ref == 'refs/heads/master' }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: '0'
|
||||
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.9-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: weblate & git setup
|
||||
env:
|
||||
WEBLATE_CONFIG: ${{ secrets.WEBLATE_CONFIG }}
|
||||
run: |
|
||||
mkdir -p ~/.config
|
||||
echo "${WEBLATE_CONFIG}" > ~/.config/weblate
|
||||
git config --global user.email "searxng-bot@users.noreply.github.com"
|
||||
git config --global user.name "searxng-bot"
|
||||
- name: Merge and push transation updates
|
||||
run: |
|
||||
make V=1 weblate.translations.commit
|
||||
- name: Create Pull Request
|
||||
id: cpr
|
||||
uses: peter-evans/create-pull-request@v3
|
||||
with:
|
||||
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
|
||||
commit-message: '[l10n] update translations from Weblate'
|
||||
committer: searxng-bot <searxng-bot@users.noreply.github.com>
|
||||
author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
|
||||
signoff: false
|
||||
branch: translations_update
|
||||
delete-branch: true
|
||||
draft: false
|
||||
title: '[l10n] update translations from Weblate'
|
||||
body: |
|
||||
update translations from Weblate
|
||||
labels: |
|
||||
translation
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: '0'
|
||||
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
architecture: 'x64'
|
||||
- name: Cache Python dependencies
|
||||
id: cache-python
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./local
|
||||
./.nvm
|
||||
./node_modules
|
||||
key: python-ubuntu-20.04-3.12-${{ hashFiles('requirements*.txt', 'setup.py','.nvmrc', 'package.json') }}
|
||||
- name: weblate & git setup
|
||||
env:
|
||||
WEBLATE_CONFIG: ${{ secrets.WEBLATE_CONFIG }}
|
||||
run: |
|
||||
mkdir -p ~/.config
|
||||
echo "${WEBLATE_CONFIG}" > ~/.config/weblate
|
||||
git config --global user.email "searxng-bot@users.noreply.github.com"
|
||||
git config --global user.name "searxng-bot"
|
||||
- name: Merge and push transation updates
|
||||
run: |
|
||||
make V=1 weblate.translations.commit
|
||||
- name: Create Pull Request
|
||||
id: cpr
|
||||
uses: peter-evans/create-pull-request@v3
|
||||
with:
|
||||
token: ${{ secrets.WEBLATE_GITHUB_TOKEN }}
|
||||
commit-message: '[l10n] update translations from Weblate'
|
||||
committer: searxng-bot <searxng-bot@users.noreply.github.com>
|
||||
author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
|
||||
signoff: false
|
||||
branch: translations_update
|
||||
delete-branch: true
|
||||
draft: false
|
||||
title: '[l10n] update translations from Weblate'
|
||||
body: |
|
||||
update translations from Weblate
|
||||
labels: |
|
||||
translation
|
||||
|
|
|
@ -338,6 +338,7 @@ valid-metaclass-classmethod-first-arg=mcs
|
|||
|
||||
# Maximum number of arguments for function / method
|
||||
max-args=8
|
||||
max-positional-arguments=14
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=20
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
FROM alpine:3.19
|
||||
FROM alpine:3.20
|
||||
ENTRYPOINT ["/sbin/tini","--","/usr/local/searxng/dockerfiles/docker-entrypoint.sh"]
|
||||
EXPOSE 8080
|
||||
VOLUME /etc/searxng
|
||||
|
@ -35,7 +35,6 @@ RUN apk add --no-cache -t build-dependencies \
|
|||
git \
|
||||
&& apk add --no-cache \
|
||||
ca-certificates \
|
||||
su-exec \
|
||||
python3 \
|
||||
py3-pip \
|
||||
libxml2 \
|
||||
|
|
|
@ -66,7 +66,7 @@ A user_, admin_ and developer_ handbook is available on the homepage_.
|
|||
Contact
|
||||
=======
|
||||
|
||||
Ask questions or just chat about SearXNG on
|
||||
Ask questions or chat with the SearXNG community (this not a chatbot) on
|
||||
|
||||
IRC
|
||||
`#searxng on libera.chat <https://web.libera.chat/?channel=#searxng>`_
|
||||
|
|
|
@ -175,4 +175,4 @@ unset MORTY_KEY
|
|||
|
||||
# Start uwsgi
|
||||
printf 'Listen on %s\n' "${BIND_ADDRESS}"
|
||||
exec su-exec searxng:searxng uwsgi --master --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}"
|
||||
exec uwsgi --master --uid searxng --gid searxng --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}"
|
||||
|
|
|
@ -84,9 +84,9 @@ HTML of the site. URL of the SearXNG instance and values are customizable.
|
|||
.. code:: html
|
||||
|
||||
<form method="post" action="https://example.org/">
|
||||
<!-- search --> <input type="text" name="q" />
|
||||
<!-- categories --> <input type="hidden" name="categories" value="general,social media" />
|
||||
<!-- language --> <input type="hidden" name="lang" value="all" />
|
||||
<!-- locale --> <input type="hidden" name="locale" value="en" />
|
||||
<!-- date filter --> <input type="hidden" name="time_range" value="month" />
|
||||
<!-- search --> <input type="text" name="q">
|
||||
<!-- categories --> <input type="hidden" name="categories" value="general,social media">
|
||||
<!-- language --> <input type="hidden" name="lang" value="all">
|
||||
<!-- locale --> <input type="hidden" name="locale" value="en">
|
||||
<!-- date filter --> <input type="hidden" name="time_range" value="month">
|
||||
</form>
|
||||
|
|
|
@ -15,6 +15,7 @@ Administrator documentation
|
|||
installation-apache
|
||||
update-searxng
|
||||
answer-captcha
|
||||
searx.favicons
|
||||
searx.limiter
|
||||
api
|
||||
architecture
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
.. _favicons:
|
||||
|
||||
========
|
||||
Favicons
|
||||
========
|
||||
|
||||
.. sidebar:: warning
|
||||
|
||||
Don't activate the favicons before reading the documentation.
|
||||
|
||||
.. contents::
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
Activating the favicons in SearXNG is very easy, but this **generates a
|
||||
significantly higher load** in the client/server communication and increases
|
||||
resources needed on the server.
|
||||
|
||||
To mitigate these disadvantages, various methods have been implemented,
|
||||
including a *cache*. The cache must be parameterized according to your own
|
||||
requirements and maintained regularly.
|
||||
|
||||
To activate favicons in SearXNG's result list, set a default
|
||||
``favicon_resolver`` in the :ref:`search <settings search>` settings:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
search:
|
||||
favicon_resolver: "duckduckgo"
|
||||
|
||||
By default and without any extensions, SearXNG serves these resolvers:
|
||||
|
||||
- ``duckduckgo``
|
||||
- ``allesedv``
|
||||
- ``google``
|
||||
- ``yandex``
|
||||
|
||||
With the above setting favicons are displayed, the user has the option to
|
||||
deactivate this feature in his settings. If the user is to have the option of
|
||||
selecting from several *resolvers*, a further setting is required / but this
|
||||
setting will be discussed :ref:`later <register resolvers>` in this article,
|
||||
first we have to setup the favicons cache.
|
||||
|
||||
Infrastructure
|
||||
==============
|
||||
|
||||
The infrastructure for providing the favicons essentially consists of three
|
||||
parts:
|
||||
|
||||
- :py:obj:`Favicons-Proxy <.favicons.proxy>` (aka *proxy*)
|
||||
- :py:obj:`Favicons-Resolvers <.favicons.resolvers>` (aka *resolver*)
|
||||
- :py:obj:`Favicons-Cache <.favicons.cache>` (aka *cache*)
|
||||
|
||||
To protect the privacy of users, the favicons are provided via a *proxy*. This
|
||||
*proxy* is automatically activated with the above activation of a *resolver*.
|
||||
Additional requests are required to provide the favicons: firstly, the *proxy*
|
||||
must process the incoming requests and secondly, the *resolver* must make
|
||||
outgoing requests to obtain the favicons from external sources.
|
||||
|
||||
A *cache* has been developed to massively reduce both, incoming and outgoing
|
||||
requests. This *cache* is also activated automatically with the above
|
||||
activation of a *resolver*. In its defaults, however, the *cache* is minimal
|
||||
and not well suitable for a production environment!
|
||||
|
||||
.. _favicon cache setup:
|
||||
|
||||
Setting up the cache
|
||||
====================
|
||||
|
||||
To parameterize the *cache* and more settings of the favicons infrastructure, a
|
||||
TOML_ configuration is created in the file ``/etc/searxng/favicons.toml``.
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[favicons]
|
||||
|
||||
cfg_schema = 1 # config's schema version no.
|
||||
|
||||
[favicons.cache]
|
||||
|
||||
db_url = "/var/cache/searxng/faviconcache.db" # default: "/tmp/faviconcache.db"
|
||||
LIMIT_TOTAL_BYTES = 2147483648 # 2 GB / default: 50 MB
|
||||
# HOLD_TIME = 5184000 # 60 days / default: 30 days
|
||||
# BLOB_MAX_BYTES = 40960 # 40 KB / default 20 KB
|
||||
# MAINTENANCE_MODE = "off" # default: "auto"
|
||||
# MAINTENANCE_PERIOD = 600 # 10min / default: 1h
|
||||
|
||||
:py:obj:`cfg_schema <.FaviconConfig.cfg_schema>`:
|
||||
Is required to trigger any processes required for future upgrades / don't
|
||||
change it.
|
||||
|
||||
:py:obj:`cache.db_url <.FaviconCacheConfig.db_url>`:
|
||||
The path to the (SQLite_) database file. The default path is in the `/tmp`_
|
||||
folder, which is deleted on every reboot and is therefore unsuitable for a
|
||||
production environment. The FHS_ provides the folder for the
|
||||
application cache
|
||||
|
||||
The FHS_ provides the folder `/var/cache`_ for the cache of applications, so a
|
||||
suitable storage location of SearXNG's caches is folder ``/var/cache/searxng``.
|
||||
In container systems, a volume should be mounted for this folder and in a
|
||||
standard installation (compare :ref:`create searxng user`), the folder must be
|
||||
created and the user under which the SearXNG process is running must be given
|
||||
write permission to this folder.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$ sudo mkdir /var/cache/searxng
|
||||
$ sudo chown root:searxng /var/cache/searxng/
|
||||
$ sudo chmod g+w /var/cache/searxng/
|
||||
|
||||
:py:obj:`cache.LIMIT_TOTAL_BYTES <.FaviconCacheConfig.LIMIT_TOTAL_BYTES>`:
|
||||
Maximum of bytes stored in the cache of all blobs. The limit is only reached
|
||||
at each maintenance interval after which the oldest BLOBs are deleted; the
|
||||
limit is exceeded during the maintenance period.
|
||||
|
||||
.. attention::
|
||||
|
||||
If the maintenance period is too long or maintenance is switched
|
||||
off completely, the cache grows uncontrollably.
|
||||
|
||||
SearXNG hosters can change other parameters of the cache as required:
|
||||
|
||||
- :py:obj:`cache.HOLD_TIME <.FaviconCacheConfig.HOLD_TIME>`
|
||||
- :py:obj:`cache.BLOB_MAX_BYTES <.FaviconCacheConfig.BLOB_MAX_BYTES>`
|
||||
|
||||
|
||||
Maintenance of the cache
|
||||
------------------------
|
||||
|
||||
Regular maintenance of the cache is required! By default, regular maintenance
|
||||
is triggered automatically as part of the client requests:
|
||||
|
||||
- :py:obj:`cache.MAINTENANCE_MODE <.FaviconCacheConfig.MAINTENANCE_MODE>` (default ``auto``)
|
||||
- :py:obj:`cache.MAINTENANCE_PERIOD <.FaviconCacheConfig.MAINTENANCE_PERIOD>` (default ``6000`` / 1h)
|
||||
|
||||
As an alternative to maintenance as part of the client request process, it is
|
||||
also possible to carry out maintenance using an external process. For example,
|
||||
by creating a :man:`crontab` entry for maintenance:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$ python -m searx.favicons cache maintenance
|
||||
|
||||
The following command can be used to display the state of the cache:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$ python -m searx.favicons cache state
|
||||
|
||||
|
||||
.. _favicon proxy setup:
|
||||
|
||||
Proxy configuration
|
||||
===================
|
||||
|
||||
Most of the options of the :py:obj:`Favicons-Proxy <.favicons.proxy>` are
|
||||
already set sensibly with settings from the :ref:`settings.yml <searxng
|
||||
settings.yml>` and should not normally be adjusted.
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[favicons.proxy]
|
||||
|
||||
max_age = 5184000 # 60 days / default: 7 days (604800 sec)
|
||||
|
||||
|
||||
:py:obj:`max_age <.FaviconProxyConfig.max_age>`:
|
||||
The `HTTP Cache-Control max-age`_ response directive indicates that the
|
||||
response remains fresh until N seconds after the response is generated. This
|
||||
setting therefore determines how long a favicon remains in the client's cache.
|
||||
As a rule, in the favicons infrastructure of SearXNG's this setting only
|
||||
affects favicons whose byte size exceeds :ref:`BLOB_MAX_BYTES <favicon cache
|
||||
setup>` (the other favicons that are already in the cache are embedded as
|
||||
`data URL`_ in the :py:obj:`generated HTML <.favicons.proxy.favicon_url>`,
|
||||
which can greatly reduce the number of additional requests).
|
||||
|
||||
.. _register resolvers:
|
||||
|
||||
Register resolvers
|
||||
------------------
|
||||
|
||||
A :py:obj:`resolver <.favicon.resolvers>` is a function that obtains the favicon
|
||||
from an external source. The resolver functions available to the user are
|
||||
registered with their fully qualified name (FQN_) in a ``resolver_map``.
|
||||
|
||||
If no ``resolver_map`` is defined in the ``favicon.toml``, the favicon
|
||||
infrastructure of SearXNG generates this ``resolver_map`` automatically
|
||||
depending on the ``settings.yml``. SearXNG would automatically generate the
|
||||
following TOML configuration from the following YAML configuration:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
search:
|
||||
favicon_resolver: "duckduckgo"
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[favicons.proxy.resolver_map]
|
||||
|
||||
"duckduckgo" = "searx.favicons.resolvers.duckduckgo"
|
||||
|
||||
If this automatism is not desired, then (and only then) a separate
|
||||
``resolver_map`` must be created. For example, to give the user two resolvers to
|
||||
choose from, the following configuration could be used:
|
||||
|
||||
.. code:: toml
|
||||
|
||||
[favicons.proxy.resolver_map]
|
||||
|
||||
"duckduckgo" = "searx.favicons.resolvers.duckduckgo"
|
||||
"allesedv" = "searx.favicons.resolvers.allesedv"
|
||||
# "google" = "searx.favicons.resolvers.google"
|
||||
# "yandex" = "searx.favicons.resolvers.yandex"
|
||||
|
||||
.. note::
|
||||
|
||||
With each resolver, the resource requirement increases significantly.
|
||||
|
||||
The number of resolvers increases:
|
||||
|
||||
- the number of incoming/outgoing requests and
|
||||
- the number of favicons to be stored in the cache.
|
||||
|
||||
In the following we list the resolvers available in the core of SearXNG, but via
|
||||
the FQN_ it is also possible to implement your own resolvers and integrate them
|
||||
into the *proxy*:
|
||||
|
||||
- :py:obj:`searx.favicons.resolvers.duckduckgo`
|
||||
- :py:obj:`searx.favicons.resolvers.allesedv`
|
||||
- :py:obj:`searx.favicons.resolvers.google`
|
||||
- :py:obj:`searx.favicons.resolvers.yandex`
|
||||
|
||||
|
||||
|
||||
.. _SQLite:
|
||||
https://www.sqlite.org/
|
||||
.. _FHS:
|
||||
https://refspecs.linuxfoundation.org/FHS_3.0/fhs/index.html
|
||||
.. _`/var/cache`:
|
||||
https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch05s05.html
|
||||
.. _`/tmp`:
|
||||
https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s18.html
|
||||
.. _TOML:
|
||||
https://toml.io/en/
|
||||
.. _HTTP Cache-Control max-age:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
|
||||
.. _data URL:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
|
||||
.. _FQN: https://en.wikipedia.org/wiki/Fully_qualified_name
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
.. _searxng settings.yml:
|
||||
|
||||
========
|
||||
Settings
|
||||
========
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
search:
|
||||
safe_search: 0
|
||||
autocomplete: ""
|
||||
favicon_resolver: ""
|
||||
default_lang: ""
|
||||
ban_time_on_fail: 5
|
||||
max_ban_time_on_fail: 120
|
||||
|
@ -41,6 +42,11 @@
|
|||
- ``qwant``
|
||||
- ``wikipedia``
|
||||
|
||||
``favicon_resolver``:
|
||||
To activate favicons in SearXNG's result list select a default
|
||||
favicon-resolver, leave blank to turn off the feature. Don't activate the
|
||||
favicons before reading the :ref:`Favicons documentation <favicons>`.
|
||||
|
||||
``default_lang``:
|
||||
Default search language - leave blank to detect from browser information or
|
||||
use codes from :origin:`searx/languages.py`.
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
Name of the theme you want to use by default on your SearXNG instance.
|
||||
|
||||
``theme_args.simple_style``:
|
||||
Style of simple theme: ``auto``, ``light``, ``dark``
|
||||
Style of simple theme: ``auto``, ``light``, ``dark``, ``black``
|
||||
|
||||
``results_on_new_tab``:
|
||||
Open result links in a new tab by default.
|
||||
|
|
|
@ -113,7 +113,7 @@ ${fedora_build}
|
|||
|
||||
(${SERVICE_USER})$ command -v python && python --version
|
||||
$SEARXNG_PYENV/bin/python
|
||||
Python 3.8.1
|
||||
Python 3.11.10
|
||||
|
||||
# update pip's boilerplate ..
|
||||
pip install -U pip
|
||||
|
@ -123,7 +123,7 @@ ${fedora_build}
|
|||
|
||||
# jump to SearXNG's working tree and install SearXNG into virtualenv
|
||||
(${SERVICE_USER})$ cd \"$SEARXNG_SRC\"
|
||||
(${SERVICE_USER})$ pip install -e .
|
||||
(${SERVICE_USER})$ pip install --use-pep517 --no-build-isolation -e .
|
||||
|
||||
|
||||
.. END manage.sh update_packages
|
||||
|
|
|
@ -127,6 +127,7 @@ extensions = [
|
|||
"sphinx_tabs.tabs", # https://github.com/djungelorm/sphinx-tabs
|
||||
'myst_parser', # https://www.sphinx-doc.org/en/master/usage/markdown.html
|
||||
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
|
||||
'sphinxcontrib.autodoc_pydantic', # https://github.com/mansenfranzen/autodoc_pydantic
|
||||
]
|
||||
|
||||
autodoc_default_options = {
|
||||
|
|
|
@ -339,6 +339,8 @@ type.
|
|||
content *(not implemented yet)*
|
||||
publishedDate :py:class:`datetime.datetime`, time of publish
|
||||
thumbnail string, url to a small-preview image
|
||||
length :py:class:`datetime.timedelta`, duration of result
|
||||
views string, view count in humanized number format
|
||||
========================= =====================================================
|
||||
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ Relational Database Management System (RDBMS) are supported:
|
|||
|
||||
- :ref:`engine sqlite`
|
||||
- :ref:`engine postgresql`
|
||||
- :ref:`engine mysql_server`
|
||||
- :ref:`engine mysql_server` & :ref:`engine mariadb_server`
|
||||
|
||||
All of the engines above are just commented out in the :origin:`settings.yml
|
||||
<searx/settings.yml>`, as you have to set the required attributes for the
|
||||
|
@ -119,3 +119,16 @@ MySQL
|
|||
.. automodule:: searx.engines.mysql_server
|
||||
:members:
|
||||
|
||||
.. _engine mariadb_server:
|
||||
|
||||
MariaDB
|
||||
--------
|
||||
|
||||
.. sidebar:: info
|
||||
|
||||
- :origin:`mariadb_server.py <searx/engines/mariadb_server.py>`
|
||||
- ``pip install`` :pypi:`mariadb <mariadb>`
|
||||
|
||||
|
||||
.. automodule:: searx.engines.mariadb_server
|
||||
:members:
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
.. _alpinelinux engine:
|
||||
|
||||
=====================
|
||||
Alpine Linux Packages
|
||||
=====================
|
||||
|
||||
.. contents::
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
.. automodule:: searx.engines.alpinelinux
|
||||
:members:
|
|
@ -0,0 +1,8 @@
|
|||
.. _gitea geizhals:
|
||||
|
||||
========
|
||||
Geizhals
|
||||
========
|
||||
|
||||
.. automodule:: searx.engines.geizhals
|
||||
:members:
|
|
@ -0,0 +1,8 @@
|
|||
.. _gitlab engine:
|
||||
|
||||
======
|
||||
GitLab
|
||||
======
|
||||
|
||||
.. automodule:: searx.engines.gitlab
|
||||
:members:
|
|
@ -61,7 +61,7 @@ working tree and release a ``make install`` to get a virtualenv with a
|
|||
$ make install
|
||||
PYENV [virtualenv] installing ./requirements*.txt into local/py3
|
||||
...
|
||||
PYENV [install] pip install -e 'searx[test]'
|
||||
PYENV [install] pip install --use-pep517 --no-build-isolation -e 'searx[test]'
|
||||
...
|
||||
Successfully installed searxng-2023.7.19+a446dea1b
|
||||
|
||||
|
@ -78,7 +78,7 @@ the check fails if you edit the requirements listed in
|
|||
...
|
||||
PYENV [virtualenv] installing ./requirements*.txt into local/py3
|
||||
...
|
||||
PYENV [install] pip install -e 'searx[test]'
|
||||
PYENV [install] pip install --use-pep517 --no-build-isolation -e 'searx[test]'
|
||||
...
|
||||
Successfully installed searxng-2023.7.19+a446dea1b
|
||||
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
Why use a private instance?
|
||||
===========================
|
||||
|
||||
.. sidebar:: Is it worth to run my own instance?
|
||||
.. sidebar:: Is running my own instance worth it?
|
||||
|
||||
\.\. is a common question among SearXNG users. Before answering this
|
||||
\.\.\.is a common question among SearXNG users. Before answering this
|
||||
question, see what options a SearXNG user has.
|
||||
|
||||
.. contents::
|
||||
|
@ -12,13 +12,13 @@ Why use a private instance?
|
|||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
Public instances are open to everyone who has access to its URL. Usually, these
|
||||
Public instances are open to everyone who has access to their URL. Usually, they
|
||||
are operated by unknown parties (from the users' point of view). Private
|
||||
instances can be used by a select group of people. It is for example a SearXNG of
|
||||
group of friends or a company which can be accessed through VPN. Also it can be
|
||||
single user one which runs on the user's laptop.
|
||||
instances can be used by a select group of people, such as a SearXNG instance for a
|
||||
group of friends, or a company which can be accessed through a VPN. Instances can also be
|
||||
single-user instances, which run locally on the user's machine.
|
||||
|
||||
To gain more insight on how these instances work let's dive into how SearXNG
|
||||
To gain more insight on how these instances work, let's dive into how SearXNG
|
||||
protects its users.
|
||||
|
||||
.. _SearXNG protect privacy:
|
||||
|
@ -26,26 +26,26 @@ protects its users.
|
|||
How does SearXNG protect privacy?
|
||||
=================================
|
||||
|
||||
SearXNG protects the privacy of its users in multiple ways regardless of the type
|
||||
of the instance (private, public). Removal of private data from search requests
|
||||
SearXNG protects the privacy of its users in multiple ways, regardless of the type
|
||||
of the instance (private or public). Removal of private data from search requests
|
||||
comes in three forms:
|
||||
|
||||
1. removal of private data from requests going to search services
|
||||
2. not forwarding anything from a third party services through search services
|
||||
1. Removing private data from requests going to search services
|
||||
2. Not forwarding anything from third party services through search services
|
||||
(e.g. advertisement)
|
||||
3. removal of private data from requests going to the result pages
|
||||
3. Removing private data from requests going to the results pages
|
||||
|
||||
Removing private data means not sending cookies to external search engines and
|
||||
generating a random browser profile for every request. Thus, it does not matter
|
||||
if a public or private instance handles the request, because it is anonymized in
|
||||
both cases. IP addresses will be the IP of the instance. But SearXNG can be
|
||||
both cases. The IP address used will be the IP of the instance, but SearXNG can also be
|
||||
configured to use proxy or Tor. `Result proxy
|
||||
<https://github.com/asciimoo/morty>`__ is supported, too.
|
||||
|
||||
SearXNG does not serve ads or tracking content unlike most search services. So
|
||||
SearXNG does not serve ads or tracking content, unlike most search services. Therefore,
|
||||
private data is not forwarded to third parties who might monetize it. Besides
|
||||
protecting users from search services, both referring page and search query are
|
||||
hidden from visited result pages.
|
||||
protecting users from search services, both the referring page and search query are
|
||||
hidden from the results pages being visited.
|
||||
|
||||
|
||||
What are the consequences of using public instances?
|
||||
|
@ -53,11 +53,11 @@ What are the consequences of using public instances?
|
|||
|
||||
If someone uses a public instance, they have to trust the administrator of that
|
||||
instance. This means that the user of the public instance does not know whether
|
||||
their requests are logged, aggregated and sent or sold to a third party.
|
||||
their requests are logged, aggregated, and sent or sold to a third party.
|
||||
|
||||
Also, public instances without proper protection are more vulnerable to abusing
|
||||
the search service, In this case the external service in exchange returns
|
||||
CAPTCHAs or bans the IP of the instance. Thus, search requests return less
|
||||
Also, public instances without proper protection are more vulnerable to abuse of
|
||||
the search service, which may cause the external service to enforce
|
||||
CAPTCHAs or to ban the IP address of the instance. Thus, search requests would return less
|
||||
results.
|
||||
|
||||
I see. What about private instances?
|
||||
|
@ -67,10 +67,10 @@ If users run their :ref:`own instances <installation>`, everything is in their
|
|||
control: the source code, logging settings and private data. Unknown instance
|
||||
administrators do not have to be trusted.
|
||||
|
||||
Furthermore, as the default settings of their instance is editable, there is no
|
||||
need to use cookies to tailor SearXNG to their needs. So preferences will not be
|
||||
Furthermore, as the default settings of their instance are editable, there is no
|
||||
need to use cookies to tailor SearXNG to their needs and preferences will not
|
||||
reset to defaults when clearing browser cookies. As settings are stored on
|
||||
their computer, it will not be accessible to others as long as their computer is
|
||||
the user's computer, they will not be accessible to others as long as their computer is
|
||||
not compromised.
|
||||
|
||||
Conclusion
|
||||
|
@ -80,7 +80,7 @@ Always use an instance which is operated by people you trust. The privacy
|
|||
features of SearXNG are available to users no matter what kind of instance they
|
||||
use.
|
||||
|
||||
If someone is on the go or just wants to try SearXNG for the first time public
|
||||
instances are the best choices. Additionally, public instance are making a
|
||||
world a better place, because those who cannot or do not want to run an
|
||||
instance, have access to a privacy respecting search service.
|
||||
For those on the go, or just wanting to try SearXNG for the first time, public
|
||||
instances are the best choice. Public instances are also making the
|
||||
world a better place by giving those who cannot, or do not want to, run an
|
||||
instance access to a privacy-respecting search service.
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
.. _favicons source:
|
||||
|
||||
=================
|
||||
Favicons (source)
|
||||
=================
|
||||
|
||||
.. contents::
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
.. automodule:: searx.favicons
|
||||
:members:
|
||||
|
||||
.. _favicons.config:
|
||||
|
||||
Favicons Config
|
||||
===============
|
||||
|
||||
.. automodule:: searx.favicons.config
|
||||
:members:
|
||||
|
||||
.. _favicons.proxy:
|
||||
|
||||
Favicons Proxy
|
||||
==============
|
||||
|
||||
.. automodule:: searx.favicons.proxy
|
||||
:members:
|
||||
|
||||
.. _favicons.resolver:
|
||||
|
||||
Favicons Resolver
|
||||
=================
|
||||
|
||||
.. automodule:: searx.favicons.resolvers
|
||||
:members:
|
||||
|
||||
.. _favicons.cache:
|
||||
|
||||
Favicons Cache
|
||||
==============
|
||||
|
||||
.. automodule:: searx.favicons.cache
|
||||
:members:
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
.. _searx.settings_loader:
|
||||
|
||||
===============
|
||||
Settings Loader
|
||||
===============
|
||||
|
||||
.. automodule:: searx.settings_loader
|
||||
:members:
|
|
@ -0,0 +1,8 @@
|
|||
.. _sqlite db:
|
||||
|
||||
=========
|
||||
SQLite DB
|
||||
=========
|
||||
|
||||
.. automodule:: searx.sqlitedb
|
||||
:members:
|
14
manage
14
manage
|
@ -41,7 +41,7 @@ PATH="${REPO_ROOT}/node_modules/.bin:${PATH}"
|
|||
|
||||
PYOBJECTS="searx"
|
||||
PY_SETUP_EXTRAS='[test]'
|
||||
GECKODRIVER_VERSION="v0.34.0"
|
||||
GECKODRIVER_VERSION="v0.35.0"
|
||||
# SPHINXOPTS=
|
||||
BLACK_OPTIONS=("--target-version" "py311" "--line-length" "120" "--skip-string-normalization")
|
||||
BLACK_TARGETS=("--exclude" "(searx/static|searx/languages.py)" "--include" 'searxng.msg|\.pyi?$' "searx" "searxng_extra" "tests")
|
||||
|
@ -54,8 +54,10 @@ fi
|
|||
|
||||
YAMLLINT_FILES=()
|
||||
while IFS= read -r line; do
|
||||
YAMLLINT_FILES+=("$line")
|
||||
done <<< "$(git ls-files './tests/*.yml' './searx/*.yml' './utils/templates/etc/searxng/*.yml')"
|
||||
if [ "$line" != "tests/unit/settings/syntaxerror_settings.yml" ]; then
|
||||
YAMLLINT_FILES+=("$line")
|
||||
fi
|
||||
done <<< "$(git ls-files './tests/*.yml' './searx/*.yml' './utils/templates/etc/searxng/*.yml' '.github/*.yml' '.github/*/*.yml')"
|
||||
|
||||
RST_FILES=(
|
||||
'README.rst'
|
||||
|
@ -231,7 +233,7 @@ gecko.driver() {
|
|||
build_msg INSTALL "geckodriver already installed"
|
||||
return
|
||||
fi
|
||||
PLATFORM="$(python3 -c 'import platform; print(platform.system().lower(), platform.architecture()[0])')"
|
||||
PLATFORM="$(python -c 'import platform; print(platform.system().lower(), platform.architecture()[0])')"
|
||||
case "$PLATFORM" in
|
||||
"linux 32bit" | "linux2 32bit") ARCH="linux32";;
|
||||
"linux 64bit" | "linux2 64bit") ARCH="linux64";;
|
||||
|
@ -297,8 +299,8 @@ pyenv.install() {
|
|||
|
||||
( set -e
|
||||
pyenv
|
||||
build_msg PYENV "[install] pip install -e 'searx${PY_SETUP_EXTRAS}'"
|
||||
"${PY_ENV_BIN}/python" -m pip install -e ".${PY_SETUP_EXTRAS}"
|
||||
build_msg PYENV "[install] pip install --use-pep517 --no-build-isolation -e 'searx${PY_SETUP_EXTRAS}'"
|
||||
"${PY_ENV_BIN}/python" -m pip install --use-pep517 --no-build-isolation -e ".${PY_SETUP_EXTRAS}"
|
||||
)
|
||||
local exit_val=$?
|
||||
if [ ! $exit_val -eq 0 ]; then
|
||||
|
|
|
@ -2,24 +2,23 @@ mock==5.1.0
|
|||
nose2[coverage_plugin]==0.15.1
|
||||
cov-core==1.15.0
|
||||
black==24.3.0
|
||||
pylint==3.2.5
|
||||
pylint==3.3.1
|
||||
splinter==0.21.0
|
||||
selenium==4.22.0
|
||||
Pallets-Sphinx-Themes==2.1.3
|
||||
Sphinx<=7.1.2; python_version == '3.8'
|
||||
Sphinx==7.3.7; python_version > '3.8'
|
||||
sphinx-issues==4.1.0
|
||||
selenium==4.25.0
|
||||
Pallets-Sphinx-Themes==2.3.0
|
||||
Sphinx==7.4.7
|
||||
sphinx-issues==5.0.0
|
||||
sphinx-jinja==2.0.2
|
||||
sphinx-tabs==3.4.5
|
||||
sphinx-tabs==3.4.7
|
||||
sphinxcontrib-programoutput==0.17
|
||||
sphinx-autobuild==2021.3.14
|
||||
sphinx-notfound-page==1.0.2
|
||||
sphinx-autobuild==2024.10.3
|
||||
sphinx-notfound-page==1.0.4
|
||||
myst-parser==3.0.1
|
||||
linuxdoc==20240509
|
||||
linuxdoc==20240924
|
||||
aiounittest==1.4.2
|
||||
yamllint==1.35.1
|
||||
wlc==1.14
|
||||
wlc==1.15
|
||||
coloredlogs==15.0.1
|
||||
docutils<=0.21; python_version == '3.8'
|
||||
docutils>=0.21.2; python_version > '3.8'
|
||||
|
||||
docutils>=0.21.2
|
||||
parameterized==0.9.0
|
||||
autodoc_pydantic==2.2.0
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
certifi==2024.7.4
|
||||
babel==2.15.0
|
||||
certifi==2024.8.30
|
||||
babel==2.16.0
|
||||
flask-babel==4.0.0
|
||||
flask==3.0.3
|
||||
jinja2==3.1.4
|
||||
lxml==5.2.2
|
||||
lxml==5.3.0
|
||||
pygments==2.18.0
|
||||
python-dateutil==2.9.0.post0
|
||||
pyyaml==6.0.1
|
||||
pyyaml==6.0.2
|
||||
httpx[http2]==0.24.1
|
||||
Brotli==1.1.0
|
||||
uvloop==0.19.0
|
||||
uvloop==0.21.0
|
||||
httpx-socks[asyncio]==0.7.7
|
||||
setproctitle==1.3.3
|
||||
redis==5.0.7
|
||||
redis==5.0.8
|
||||
markdown-it-py==3.0.0
|
||||
fasttext-predict==0.9.2.2
|
||||
pytomlpp==1.0.13; python_version < '3.11'
|
||||
tomli==2.0.2; python_version < '3.11'
|
||||
msgspec==0.18.6
|
||||
eval_type_backport; python_version < '3.9'
|
||||
typer-slim==0.12.5
|
||||
|
|
|
@ -14,17 +14,7 @@ import typing
|
|||
import logging
|
||||
import pathlib
|
||||
|
||||
try:
|
||||
import tomllib
|
||||
|
||||
pytomlpp = None
|
||||
USE_TOMLLIB = True
|
||||
except ImportError:
|
||||
import pytomlpp
|
||||
|
||||
tomllib = None
|
||||
USE_TOMLLIB = False
|
||||
|
||||
from ..compat import tomllib
|
||||
|
||||
__all__ = ['Config', 'UNSET', 'SchemaIssue']
|
||||
|
||||
|
@ -183,19 +173,10 @@ class Config:
|
|||
|
||||
|
||||
def toml_load(file_name):
|
||||
if USE_TOMLLIB:
|
||||
# Python >= 3.11
|
||||
try:
|
||||
with open(file_name, "rb") as f:
|
||||
return tomllib.load(f)
|
||||
except tomllib.TOMLDecodeError as exc:
|
||||
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
||||
log.error("%s: %s", file_name, msg)
|
||||
raise
|
||||
# fallback to pytomlpp for Python < 3.11
|
||||
try:
|
||||
return pytomlpp.load(file_name)
|
||||
except pytomlpp.DecodeError as exc:
|
||||
with open(file_name, "rb") as f:
|
||||
return tomllib.load(f)
|
||||
except tomllib.TOMLDecodeError as exc:
|
||||
msg = str(exc).replace('\t', '').replace('\n', ' ')
|
||||
log.error("%s: %s", file_name, msg)
|
||||
raise
|
||||
|
|
|
@ -76,11 +76,11 @@ LONG_MAX = 150
|
|||
LONG_MAX_SUSPICIOUS = 10
|
||||
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
|
||||
|
||||
API_WONDOW = 3600
|
||||
API_WINDOW = 3600
|
||||
"""Time (sec) before sliding window for API requests (format != html) expires."""
|
||||
|
||||
API_MAX = 4
|
||||
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
|
||||
"""Maximum requests from one IP in the :py:obj:`API_WINDOW`"""
|
||||
|
||||
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
|
||||
"""Time (sec) before sliding window for one suspicious IP expires."""
|
||||
|
@ -103,7 +103,7 @@ def filter_request(
|
|||
return None
|
||||
|
||||
if request.args.get('format', 'html') != 'html':
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
|
||||
c = incr_sliding_window(redis_client, 'ip_limit.API_WINDOW:' + network.compressed, API_WINDOW)
|
||||
if c > API_MAX:
|
||||
return too_many_requests(network, "too many request in API_WINDOW")
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ And in the HTML template from flask a stylesheet link is needed (the value of
|
|||
|
||||
<link rel="stylesheet"
|
||||
href="{{ url_for('client_token', token=link_token) }}"
|
||||
type="text/css" />
|
||||
type="text/css" >
|
||||
|
||||
.. _X-Forwarded-For:
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Compatibility with older versions"""
|
||||
|
||||
# pylint: disable=unused-import
|
||||
|
||||
__all__ = [
|
||||
"tomllib",
|
||||
]
|
||||
|
||||
import sys
|
||||
|
||||
# TOML (lib) compatibility
|
||||
# ------------------------
|
||||
|
||||
if sys.version_info >= (3, 11):
|
||||
import tomllib
|
||||
else:
|
||||
import tomli as tomllib
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -5,7 +5,7 @@
|
|||
],
|
||||
"ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
|
||||
"versions": [
|
||||
"127.0",
|
||||
"126.0"
|
||||
"132.0",
|
||||
"131.0"
|
||||
]
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,83 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""`Alpine Linux binary packages`_. `Alpine Linux`_ is a Linux-based operation
|
||||
system designed to be small, simple and secure. Contrary to many other Linux
|
||||
distributions, it uses musl, BusyBox and OpenRC. Alpine is mostly used on
|
||||
servers and for Docker images.
|
||||
|
||||
.. _Alpine Linux binary packages: https://pkgs.alpinelinux.org
|
||||
.. _Alpine Linux: https://www.alpinelinux.org
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
from dateutil import parser
|
||||
|
||||
from searx.utils import eval_xpath, eval_xpath_list, extract_text
|
||||
|
||||
about = {
|
||||
'website': 'https://www.alpinelinux.org',
|
||||
'wikidata_id': 'Q4033826',
|
||||
'use_official_api': False,
|
||||
'official_api_documentation': None,
|
||||
'require_api_key': False,
|
||||
'results': 'HTML',
|
||||
}
|
||||
paging = True
|
||||
categories = ['packages', 'it']
|
||||
|
||||
base_url = "https://pkgs.alpinelinux.org"
|
||||
alpine_arch = 'x86_64'
|
||||
"""Kernel architecture: ``x86_64``, ``x86``, ``aarch64``, ``armhf``,
|
||||
``ppc64le``, ``s390x``, ``armv7`` or ``riscv64``"""
|
||||
|
||||
ARCH_RE = re.compile("x86_64|x86|aarch64|armhf|ppc64le|s390x|armv7|riscv64")
|
||||
"""Regular expression to match supported architectures in the query string."""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query_arch = ARCH_RE.search(query)
|
||||
if query_arch:
|
||||
query_arch = query_arch.group(0)
|
||||
query = query.replace(query_arch, '').strip()
|
||||
|
||||
args = {
|
||||
# use wildcards to match more than just packages with the exact same
|
||||
# name as the query
|
||||
'name': f"*{query}*",
|
||||
'page': params['pageno'],
|
||||
'arch': query_arch or alpine_arch,
|
||||
}
|
||||
params['url'] = f"{base_url}/packages?{urlencode(args)}"
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
doc = html.fromstring(resp.text)
|
||||
for result in eval_xpath_list(doc, "//table/tbody/tr"):
|
||||
|
||||
if len(result.xpath("./td")) < 9:
|
||||
# skip non valid entries in the result table
|
||||
# e.g the "No item found..." message
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'packages.html',
|
||||
'url': base_url + extract_text(eval_xpath(result, './td[contains(@class, "package")]/a/@href')),
|
||||
'title': extract_text(eval_xpath(result, './td[contains(@class, "package")]')),
|
||||
'package_name': extract_text(eval_xpath(result, './td[contains(@class, "package")]')),
|
||||
'publishedDate': parser.parse(extract_text(eval_xpath(result, './td[contains(@class, "bdate")]'))),
|
||||
'version': extract_text(eval_xpath(result, './td[contains(@class, "version")]')),
|
||||
'homepage': extract_text(eval_xpath(result, './td[contains(@class, "url")]/a/@href')),
|
||||
'maintainer': extract_text(eval_xpath(result, './td[contains(@class, "maintainer")]')),
|
||||
'license_name': extract_text(eval_xpath(result, './td[contains(@class, "license")]')),
|
||||
'tags': [extract_text(eval_xpath(result, './td[contains(@class, "repo")]'))],
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
|
@ -34,10 +34,10 @@ Implementations
|
|||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from urllib.parse import quote
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
||||
from searx.utils import extract_text, eval_xpath, eval_xpath_getindex, eval_xpath_list
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.data import ENGINE_TRAITS
|
||||
|
||||
|
@ -53,7 +53,7 @@ about: Dict[str, Any] = {
|
|||
|
||||
# engine dependent config
|
||||
categories: List[str] = ["files"]
|
||||
paging: bool = False
|
||||
paging: bool = True
|
||||
|
||||
# search-url
|
||||
base_url: str = "https://annas-archive.org"
|
||||
|
@ -99,9 +99,18 @@ def init(engine_settings=None): # pylint: disable=unused-argument
|
|||
|
||||
|
||||
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||
q = quote(query)
|
||||
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
|
||||
params["url"] = base_url + f"/search?lang={lang or ''}&content={aa_content}&ext={aa_ext}&sort={aa_sort}&q={q}"
|
||||
args = {
|
||||
'lang': lang,
|
||||
'content': aa_content,
|
||||
'ext': aa_ext,
|
||||
'sort': aa_sort,
|
||||
'q': query,
|
||||
'page': params['pageno'],
|
||||
}
|
||||
# filter out None and empty values
|
||||
filtered_args = dict((k, v) for k, v in args.items() if v)
|
||||
params["url"] = f"{base_url}/search?{urlencode(filtered_args)}"
|
||||
return params
|
||||
|
||||
|
||||
|
@ -128,12 +137,12 @@ def response(resp) -> List[Dict[str, Optional[str]]]:
|
|||
def _get_result(item):
|
||||
return {
|
||||
'template': 'paper.html',
|
||||
'url': base_url + item.xpath('./@href')[0],
|
||||
'url': base_url + extract_text(eval_xpath_getindex(item, './@href', 0)),
|
||||
'title': extract_text(eval_xpath(item, './/h3/text()[1]')),
|
||||
'publisher': extract_text(eval_xpath(item, './/div[contains(@class, "text-sm")]')),
|
||||
'authors': [extract_text(eval_xpath(item, './/div[contains(@class, "italic")]'))],
|
||||
'content': extract_text(eval_xpath(item, './/div[contains(@class, "text-xs")]')),
|
||||
'thumbnail': item.xpath('.//img/@src')[0],
|
||||
'thumbnail': extract_text(eval_xpath_getindex(item, './/img/@src', 0, default=None), allow_none=True),
|
||||
}
|
||||
|
||||
|
||||
|
@ -184,3 +193,8 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
|
||||
engine_traits.custom['sort'].append(x.get("value"))
|
||||
|
||||
# for better diff; sort the persistence of these traits
|
||||
engine_traits.custom['content'].sort()
|
||||
engine_traits.custom['ext'].sort()
|
||||
engine_traits.custom['sort'].sort()
|
||||
|
|
|
@ -9,6 +9,8 @@ import string
|
|||
from urllib.parse import urlencode
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from searx import utils
|
||||
|
||||
# Engine metadata
|
||||
about = {
|
||||
"website": "https://www.bilibili.com",
|
||||
|
@ -56,6 +58,8 @@ def request(query, params):
|
|||
|
||||
# Format the video duration
|
||||
def format_duration(duration):
|
||||
if not ":" in duration:
|
||||
return None
|
||||
minutes, seconds = map(int, duration.split(":"))
|
||||
total_seconds = minutes * 60 + seconds
|
||||
|
||||
|
@ -70,7 +74,7 @@ def response(resp):
|
|||
results = []
|
||||
|
||||
for item in search_res.get("data", {}).get("result", []):
|
||||
title = item["title"]
|
||||
title = utils.html_to_text(item["title"])
|
||||
url = item["arcurl"]
|
||||
thumbnail = item["pic"]
|
||||
description = item["description"]
|
||||
|
|
|
@ -99,7 +99,7 @@ def response(resp):
|
|||
'url': metadata['purl'],
|
||||
'thumbnail_src': metadata['turl'],
|
||||
'img_src': metadata['murl'],
|
||||
'content': metadata['desc'],
|
||||
'content': metadata.get('desc'),
|
||||
'title': title,
|
||||
'source': source,
|
||||
'resolution': img_format[0],
|
||||
|
|
|
@ -123,7 +123,6 @@ from typing import Any, TYPE_CHECKING
|
|||
from urllib.parse import (
|
||||
urlencode,
|
||||
urlparse,
|
||||
parse_qs,
|
||||
)
|
||||
|
||||
from dateutil import parser
|
||||
|
@ -137,6 +136,7 @@ from searx.utils import (
|
|||
eval_xpath_list,
|
||||
eval_xpath_getindex,
|
||||
js_variable_to_python,
|
||||
get_embeded_stream_url,
|
||||
)
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
@ -311,7 +311,7 @@ def _parse_search(resp):
|
|||
# In my tests a video tag in the WEB search was most often not a
|
||||
# video, except the ones from youtube ..
|
||||
|
||||
iframe_src = _get_iframe_src(url)
|
||||
iframe_src = get_embeded_stream_url(url)
|
||||
if iframe_src:
|
||||
item['iframe_src'] = iframe_src
|
||||
item['template'] = 'videos.html'
|
||||
|
@ -328,15 +328,6 @@ def _parse_search(resp):
|
|||
return result_list
|
||||
|
||||
|
||||
def _get_iframe_src(url):
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.path == '/watch' and parsed_url.query:
|
||||
video_id = parse_qs(parsed_url.query).get('v', []) # type: ignore
|
||||
if video_id:
|
||||
return 'https://www.youtube-nocookie.com/embed/' + video_id[0] # type: ignore
|
||||
return None
|
||||
|
||||
|
||||
def _parse_news(json_resp):
|
||||
result_list = []
|
||||
|
||||
|
@ -392,7 +383,7 @@ def _parse_videos(json_resp):
|
|||
if result['thumbnail'] is not None:
|
||||
item['thumbnail'] = result['thumbnail']['src']
|
||||
|
||||
iframe_src = _get_iframe_src(url)
|
||||
iframe_src = get_embeded_stream_url(url)
|
||||
if iframe_src:
|
||||
item['iframe_src'] = iframe_src
|
||||
|
||||
|
@ -430,10 +421,11 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
ui_lang = option.get('value')
|
||||
try:
|
||||
if '-' in ui_lang:
|
||||
l = babel.Locale.parse(ui_lang, sep='-')
|
||||
if l.territory:
|
||||
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
|
||||
else:
|
||||
sxng_tag = language_tag(babel.Locale.parse(ui_lang))
|
||||
sxng_tag = language_tag(babel.Locale.parse(ui_lang, sep='-'))
|
||||
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
|
||||
|
@ -453,7 +445,7 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
if not resp.ok: # type: ignore
|
||||
print("ERROR: response from Brave is not OK.")
|
||||
|
||||
country_js = resp.text[resp.text.index("options:{all") + len('options:') :]
|
||||
country_js = resp.text[resp.text.index("options:{all") + len('options:') :] # type: ignore
|
||||
country_js = country_js[: country_js.index("},k={default")]
|
||||
country_tags = js_variable_to_python(country_js)
|
||||
|
||||
|
|
|
@ -54,7 +54,6 @@ def response(resp):
|
|||
|
||||
excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
|
||||
content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False)
|
||||
# it is better to emit <br/> instead of |, but html tags are verboten
|
||||
content = content.strip().replace('\n', ' | ')
|
||||
content = ' '.join(content.split())
|
||||
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Cloudflare AI engine"""
|
||||
|
||||
from json import loads, dumps
|
||||
from searx.exceptions import SearxEngineAPIException
|
||||
|
||||
about = {
|
||||
"website": 'https://ai.cloudflare.com',
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": 'https://developers.cloudflare.com/workers-ai',
|
||||
"use_official_api": True,
|
||||
"require_api_key": True,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
cf_account_id = ''
|
||||
cf_ai_api = ''
|
||||
cf_ai_gateway = ''
|
||||
|
||||
cf_ai_model = ''
|
||||
cf_ai_model_display_name = 'Cloudflare AI'
|
||||
|
||||
# Assistant messages hint to the AI about the desired output format. Not all models support this role.
|
||||
cf_ai_model_assistant = 'Keep your answers as short and effective as possible.'
|
||||
# System messages define the AI's personality. You can use them to set rules and how you expect the AI to behave.
|
||||
cf_ai_model_system = 'You are a self-aware language model who is honest and direct about any question from the user.'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
params['query'] = query
|
||||
|
||||
params['url'] = f'https://gateway.ai.cloudflare.com/v1/{cf_account_id}/{cf_ai_gateway}/workers-ai/{cf_ai_model}'
|
||||
|
||||
params['method'] = 'POST'
|
||||
|
||||
params['headers']['Authorization'] = f'Bearer {cf_ai_api}'
|
||||
params['headers']['Content-Type'] = 'application/json'
|
||||
|
||||
params['data'] = dumps(
|
||||
{
|
||||
'messages': [
|
||||
{'role': 'assistant', 'content': cf_ai_model_assistant},
|
||||
{'role': 'system', 'content': cf_ai_model_system},
|
||||
{'role': 'user', 'content': params['query']},
|
||||
]
|
||||
}
|
||||
).encode('utf-8')
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
json = loads(resp.text)
|
||||
|
||||
if 'error' in json:
|
||||
raise SearxEngineAPIException('Cloudflare AI error: ' + json['error'])
|
||||
|
||||
if 'result' in json:
|
||||
results.append(
|
||||
{
|
||||
'content': json['result']['response'],
|
||||
'infobox': cf_ai_model_display_name,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
|
@ -10,6 +10,8 @@ engine offers some additional settings:
|
|||
- :py:obj:`api_order`
|
||||
- :py:obj:`search_endpoint`
|
||||
- :py:obj:`show_avatar`
|
||||
- :py:obj:`api_key`
|
||||
- :py:obj:`api_username`
|
||||
|
||||
Example
|
||||
=======
|
||||
|
@ -27,6 +29,20 @@ for the ``paddling.com`` forum:
|
|||
categories: ['social media', 'sports']
|
||||
show_avatar: true
|
||||
|
||||
If the forum is private, you need to add an API key and username for the search:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: paddling
|
||||
engine: discourse
|
||||
shortcut: paddle
|
||||
base_url: 'https://forums.paddling.com/'
|
||||
api_order: views
|
||||
categories: ['social media', 'sports']
|
||||
show_avatar: true
|
||||
api_key: '<KEY>'
|
||||
api_username: 'system'
|
||||
|
||||
|
||||
Implementations
|
||||
===============
|
||||
|
@ -65,6 +81,12 @@ api_order = 'likes'
|
|||
show_avatar = False
|
||||
"""Show avatar of the user who send the post."""
|
||||
|
||||
api_key = ''
|
||||
"""API key of the Discourse forum."""
|
||||
|
||||
api_username = ''
|
||||
"""API username of the Discourse forum."""
|
||||
|
||||
paging = True
|
||||
time_range_support = True
|
||||
|
||||
|
@ -98,6 +120,12 @@ def request(query, params):
|
|||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
|
||||
if api_key != '':
|
||||
params['headers']['Api-Key'] = api_key
|
||||
|
||||
if api_username != '':
|
||||
params['headers']['Api-Username'] = api_username
|
||||
|
||||
return params
|
||||
|
||||
|
||||
|
|
|
@ -18,13 +18,13 @@ from searx import (
|
|||
)
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
extract_text,
|
||||
)
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx import redisdb
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.utils import extr
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
@ -53,31 +53,33 @@ paging = True
|
|||
time_range_support = True
|
||||
safesearch = True # user can't select but the results are filtered
|
||||
|
||||
url = 'https://lite.duckduckgo.com/lite/'
|
||||
# url_ping = 'https://duckduckgo.com/t/sl_l'
|
||||
url = "https://html.duckduckgo.com/html"
|
||||
|
||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
||||
__CACHE = []
|
||||
|
||||
|
||||
def cache_vqd(query, value):
|
||||
def _cache_key(data: dict):
|
||||
return 'SearXNG_ddg_web_vqd' + redislib.secret_hash(f"{data['q']}//{data['kl']}")
|
||||
|
||||
|
||||
def cache_vqd(data: dict, value):
|
||||
"""Caches a ``vqd`` value from a query."""
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
logger.debug("cache vqd value: %s", value)
|
||||
key = 'SearXNG_ddg_web_vqd' + redislib.secret_hash(query)
|
||||
c.set(key, value, ex=600)
|
||||
c.set(_cache_key(data), value, ex=600)
|
||||
|
||||
else:
|
||||
logger.debug("MEM cache vqd value: %s", value)
|
||||
if len(__CACHE) > 100: # cache vqd from last 100 queries
|
||||
__CACHE.pop(0)
|
||||
__CACHE.append((_cache_key(data), value))
|
||||
|
||||
|
||||
def get_vqd(query):
|
||||
"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
|
||||
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
|
||||
response.
|
||||
|
||||
.. hint::
|
||||
|
||||
If an empty string is returned there are no results for the ``query`` and
|
||||
therefore no ``vqd`` value.
|
||||
def get_vqd(data):
|
||||
"""Returns the ``vqd`` that fits to the *query* (``data`` from HTTP POST).
|
||||
|
||||
DDG's bot detection is sensitive to the ``vqd`` value. For some search terms
|
||||
(such as extremely long search terms that are often sent by bots), no ``vqd``
|
||||
|
@ -105,28 +107,23 @@ def get_vqd(query):
|
|||
- DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
|
||||
|
||||
"""
|
||||
|
||||
key = _cache_key(data)
|
||||
value = None
|
||||
c = redisdb.client()
|
||||
if c:
|
||||
key = 'SearXNG_ddg_web_vqd' + redislib.secret_hash(query)
|
||||
value = c.get(key)
|
||||
if value or value == b'':
|
||||
value = value.decode('utf-8')
|
||||
logger.debug("re-use cached vqd value: %s", value)
|
||||
logger.debug("re-use CACHED vqd value: %s", value)
|
||||
return value
|
||||
|
||||
query_url = 'https://duckduckgo.com/?' + urlencode({'q': query})
|
||||
res = get(query_url)
|
||||
doc = lxml.html.fromstring(res.text)
|
||||
for script in doc.xpath("//script[@type='text/javascript']"):
|
||||
script = script.text
|
||||
if 'vqd="' in script:
|
||||
value = extr(script, 'vqd="', '"')
|
||||
break
|
||||
logger.debug("new vqd value: '%s'", value)
|
||||
if value is not None:
|
||||
cache_vqd(query, value)
|
||||
return value
|
||||
else:
|
||||
for k, value in __CACHE:
|
||||
if k == key:
|
||||
logger.debug("MEM re-use CACHED vqd value: %s", value)
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
||||
|
@ -154,9 +151,10 @@ def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
|||
|
||||
.. hint::
|
||||
|
||||
`DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
|
||||
selection to the user, only a region can be selected by the user
|
||||
(``eng_region`` from the example above). DDG-lite stores the selected
|
||||
`DDG-lite <https://lite.duckduckgo.com/lite>`__ and the *no Javascript*
|
||||
page https://html.duckduckgo.com/html do not offer a language selection
|
||||
to the user, only a region can be selected by the user (``eng_region``
|
||||
from the example above). DDG-lite and *no Javascript* store the selected
|
||||
region in a cookie::
|
||||
|
||||
params['cookies']['kl'] = eng_region # 'ar-es'
|
||||
|
@ -240,10 +238,25 @@ def request(query, params):
|
|||
|
||||
query = quote_ddg_bangs(query)
|
||||
|
||||
# request needs a vqd argument
|
||||
vqd = get_vqd(query)
|
||||
if len(query) >= 500:
|
||||
# DDG does not accept queries with more than 499 chars
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
# Advanced search syntax ends in CAPTCHA
|
||||
# https://duckduckgo.com/duckduckgo-help-pages/results/syntax/
|
||||
query = [
|
||||
x.removeprefix("site:").removeprefix("intitle:").removeprefix("inurl:").removeprefix("filetype:")
|
||||
for x in query.split()
|
||||
]
|
||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
||||
if eng_region == "wt-wt":
|
||||
# https://html.duckduckgo.com/html sets an empty value for "all".
|
||||
eng_region = ""
|
||||
|
||||
params['data']['kl'] = eng_region
|
||||
params['cookies']['kl'] = eng_region
|
||||
|
||||
# eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
||||
|
||||
params['url'] = url
|
||||
|
@ -251,45 +264,82 @@ def request(query, params):
|
|||
params['data']['q'] = query
|
||||
|
||||
# The API is not documented, so we do some reverse engineering and emulate
|
||||
# what https://lite.duckduckgo.com/lite/ does when you press "next Page"
|
||||
# link again and again ..
|
||||
# what https://html.duckduckgo.com/html does when you press "next Page" link
|
||||
# again and again ..
|
||||
|
||||
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
params['data']['vqd'] = vqd
|
||||
|
||||
# initial page does not have an offset
|
||||
params['headers']['Sec-Fetch-Dest'] = "document"
|
||||
params['headers']['Sec-Fetch-Mode'] = "navigate" # at least this one is used by ddg's bot detection
|
||||
params['headers']['Sec-Fetch-Site'] = "same-origin"
|
||||
params['headers']['Sec-Fetch-User'] = "?1"
|
||||
|
||||
# Form of the initial search page does have empty values in the form
|
||||
if params['pageno'] == 1:
|
||||
|
||||
params['data']['b'] = ""
|
||||
|
||||
params['data']['df'] = ''
|
||||
if params['time_range'] in time_range_dict:
|
||||
|
||||
params['data']['df'] = time_range_dict[params['time_range']]
|
||||
params['cookies']['df'] = time_range_dict[params['time_range']]
|
||||
|
||||
if params['pageno'] == 2:
|
||||
|
||||
# second page does have an offset of 20
|
||||
offset = (params['pageno'] - 1) * 20
|
||||
params['data']['s'] = offset
|
||||
params['data']['dc'] = offset + 1
|
||||
|
||||
elif params['pageno'] > 2:
|
||||
|
||||
# third and following pages do have an offset of 20 + n*50
|
||||
offset = 20 + (params['pageno'] - 2) * 50
|
||||
params['data']['s'] = offset
|
||||
params['data']['dc'] = offset + 1
|
||||
|
||||
# initial page does not have additional data in the input form
|
||||
if params['pageno'] > 1:
|
||||
|
||||
# initial page does not have these additional data in the input form
|
||||
params['data']['o'] = form_data.get('o', 'json')
|
||||
params['data']['api'] = form_data.get('api', 'd.js')
|
||||
params['data']['nextParams'] = form_data.get('nextParams', '')
|
||||
params['data']['v'] = form_data.get('v', 'l')
|
||||
params['headers']['Referer'] = 'https://lite.duckduckgo.com/'
|
||||
params['headers']['Referer'] = url
|
||||
|
||||
params['data']['kl'] = eng_region
|
||||
params['cookies']['kl'] = eng_region
|
||||
# from here on no more params['data'] shuld be set, since this dict is
|
||||
# needed to get a vqd value from the cache ..
|
||||
|
||||
params['data']['df'] = ''
|
||||
if params['time_range'] in time_range_dict:
|
||||
params['data']['df'] = time_range_dict[params['time_range']]
|
||||
params['cookies']['df'] = time_range_dict[params['time_range']]
|
||||
vqd = get_vqd(params['data'])
|
||||
|
||||
# Certain conditions must be met in order to call up one of the
|
||||
# following pages ...
|
||||
|
||||
if vqd:
|
||||
params['data']['vqd'] = vqd # follow up pages / requests needs a vqd argument
|
||||
else:
|
||||
# Don't try to call follow up pages without a vqd value. DDG
|
||||
# recognizes this as a request from a bot. This lowers the
|
||||
# reputation of the SearXNG IP and DDG starts to activate CAPTCHAs.
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
if params['searxng_locale'].startswith("zh"):
|
||||
# Some locales (at least China) do not have a "next page" button and ddg
|
||||
# will return a HTTP/2 403 Forbidden for a request of such a page.
|
||||
params["url"] = None
|
||||
return
|
||||
|
||||
logger.debug("param data: %s", params['data'])
|
||||
logger.debug("param cookies: %s", params['cookies'])
|
||||
return params
|
||||
|
||||
|
||||
def is_ddg_captcha(dom):
|
||||
"""In case of CAPTCHA ddg response its own *not a Robot* dialog and is not
|
||||
redirected to a CAPTCHA page."""
|
||||
|
||||
return bool(eval_xpath(dom, "//form[@id='challenge-form']"))
|
||||
|
||||
|
||||
def response(resp):
|
||||
|
@ -300,38 +350,36 @@ def response(resp):
|
|||
results = []
|
||||
doc = lxml.html.fromstring(resp.text)
|
||||
|
||||
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
||||
if is_ddg_captcha(doc):
|
||||
# set suspend time to zero is OK --> ddg does not block the IP
|
||||
raise SearxEngineCaptchaException(suspended_time=0, message=f"CAPTCHA ({resp.search_params['data'].get('kl')})")
|
||||
|
||||
if len(result_table) == 2:
|
||||
# some locales (at least China) does not have a "next page" button and
|
||||
# the layout of the HTML tables is different.
|
||||
result_table = result_table[1]
|
||||
elif not len(result_table) >= 3:
|
||||
# no more results
|
||||
return []
|
||||
else:
|
||||
result_table = result_table[2]
|
||||
# update form data from response
|
||||
form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
|
||||
if len(form):
|
||||
form = eval_xpath(doc, '//input[@name="vqd"]/..')
|
||||
if len(form):
|
||||
# some locales (at least China) does not have a "next page" button
|
||||
form = form[0]
|
||||
form_vqd = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
||||
|
||||
form = form[0]
|
||||
form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
|
||||
form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
|
||||
form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
|
||||
logger.debug('form_data: %s', form_data)
|
||||
cache_vqd(resp.search_params["data"], form_vqd)
|
||||
|
||||
tr_rows = eval_xpath(result_table, './/tr')
|
||||
# In the last <tr> is the form of the 'previous/next page' links
|
||||
tr_rows = tr_rows[:-1]
|
||||
# just select "web-result" and ignore results of class "result--ad result--ad--small"
|
||||
for div_result in eval_xpath(doc, '//div[@id="links"]/div[contains(@class, "web-result")]'):
|
||||
|
||||
len_tr_rows = len(tr_rows)
|
||||
offset = 0
|
||||
item = {}
|
||||
title = eval_xpath(div_result, './/h2/a')
|
||||
if not title:
|
||||
# this is the "No results." item in the result list
|
||||
continue
|
||||
item["title"] = extract_text(title)
|
||||
item["url"] = eval_xpath(div_result, './/h2/a/@href')[0]
|
||||
item["content"] = extract_text(eval_xpath(div_result, './/a[contains(@class, "result__snippet")]')[0])
|
||||
|
||||
zero_click_info_xpath = '//html/body/form/div/table[2]/tr[2]/td/text()'
|
||||
results.append(item)
|
||||
|
||||
zero_click_info_xpath = '//div[@id="zero_click_abstract"]'
|
||||
zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip()
|
||||
|
||||
if zero_click and "Your IP address is" not in zero_click:
|
||||
if zero_click and "Your IP address is" not in zero_click and "Your user agent:" not in zero_click:
|
||||
current_query = resp.search_params["data"].get("q")
|
||||
|
||||
results.append(
|
||||
|
@ -341,33 +389,6 @@ def response(resp):
|
|||
}
|
||||
)
|
||||
|
||||
while len_tr_rows >= offset + 4:
|
||||
|
||||
# assemble table rows we need to scrap
|
||||
tr_title = tr_rows[offset]
|
||||
tr_content = tr_rows[offset + 1]
|
||||
offset += 4
|
||||
|
||||
# ignore sponsored Adds <tr class="result-sponsored">
|
||||
if tr_content.get('class') == 'result-sponsored':
|
||||
continue
|
||||
|
||||
a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
|
||||
if a_tag is None:
|
||||
continue
|
||||
|
||||
td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
|
||||
if td_content is None:
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
'title': a_tag.text_content(),
|
||||
'content': extract_text(td_content),
|
||||
'url': a_tag.get('href'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ DuckDuckGo Extra (images, videos, news)
|
|||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import get_embeded_stream_url
|
||||
|
||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
||||
from searx.engines.duckduckgo import (
|
||||
|
@ -108,7 +109,7 @@ def _video_result(result):
|
|||
'title': result['title'],
|
||||
'content': result['description'],
|
||||
'thumbnail': result['images'].get('small') or result['images'].get('medium'),
|
||||
'iframe_src': result['embed_url'],
|
||||
'iframe_src': get_embeded_stream_url(result['content']),
|
||||
'source': result['provider'],
|
||||
'length': result['duration'],
|
||||
'metadata': result.get('uploader'),
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Geizhals is a German website to compare the price of a product on the
|
||||
most common German shopping sites and find the lowest price.
|
||||
|
||||
The sorting of the search results can be influenced by the following additions
|
||||
to the search term:
|
||||
|
||||
``asc`` or ``price``
|
||||
To sort by price in ascending order.
|
||||
|
||||
``desc``
|
||||
To sort by price in descending order.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import eval_xpath, eval_xpath_list, extract_text
|
||||
|
||||
about = {
|
||||
'website': 'https://geizhals.de',
|
||||
'wikidata_id': 'Q15977657',
|
||||
'use_official_api': False,
|
||||
'official_api_documentation': None,
|
||||
'require_api_key': False,
|
||||
'results': 'HTML',
|
||||
'language': 'de',
|
||||
}
|
||||
paging = True
|
||||
categories = ['shopping']
|
||||
|
||||
base_url = "https://geizhals.de"
|
||||
sort_order = 'relevance'
|
||||
|
||||
SORT_RE = re.compile(r"sort:(\w+)")
|
||||
sort_order_map = {
|
||||
'relevance': None,
|
||||
'price': 'p',
|
||||
'asc': 'p',
|
||||
'desc': '-p',
|
||||
}
|
||||
|
||||
|
||||
def request(query, params):
|
||||
sort = None
|
||||
|
||||
sort_order_path = SORT_RE.search(query)
|
||||
if sort_order_path:
|
||||
sort = sort_order_map.get(sort_order_path.group(1))
|
||||
query = SORT_RE.sub("", query)
|
||||
logger.debug(query)
|
||||
|
||||
args = {
|
||||
'fs': query,
|
||||
'pg': params['pageno'],
|
||||
'toggle_all': 1, # load item specs
|
||||
'sort': sort,
|
||||
}
|
||||
params['url'] = f"{base_url}/?{urlencode(args)}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
for result in eval_xpath_list(dom, "//article[contains(@class, 'listview__item')]"):
|
||||
content = []
|
||||
for spec in eval_xpath_list(result, ".//div[contains(@class, 'specs-grid__item')]"):
|
||||
content.append(f"{extract_text(eval_xpath(spec, './dt'))}: {extract_text(eval_xpath(spec, './dd'))}")
|
||||
|
||||
metadata = [
|
||||
extract_text(eval_xpath(result, ".//div[contains(@class, 'stars-rating-label')]")),
|
||||
extract_text(eval_xpath(result, ".//div[contains(@class, 'listview__offercount')]")),
|
||||
]
|
||||
|
||||
item = {
|
||||
'template': 'products.html',
|
||||
'url': (
|
||||
base_url + "/" + extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__name-link')]/@href"))
|
||||
),
|
||||
'title': extract_text(eval_xpath(result, ".//h3[contains(@class, 'listview__name')]")),
|
||||
'content': ' | '.join(content),
|
||||
'thumbnail': extract_text(eval_xpath(result, ".//img[contains(@class, 'listview__image')]/@src")),
|
||||
'metadata': ', '.join(item for item in metadata if item),
|
||||
}
|
||||
|
||||
best_price = extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__price-link')]")).split(" ")
|
||||
if len(best_price) > 1:
|
||||
item["price"] = f"Bestes Angebot: {best_price[1]}€"
|
||||
results.append(item)
|
||||
|
||||
return results
|
|
@ -1,7 +1,8 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Engine to search in collaborative software platforms based on Gitea_.
|
||||
"""Engine to search in collaborative software platforms based on Gitea_ or Forgejo_.
|
||||
|
||||
.. _Gitea: https://about.gitea.com/
|
||||
.. _Forgejo: https://forgejo.org/
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
@ -23,6 +24,11 @@ Optional settings are:
|
|||
base_url: https://gitea.com
|
||||
shortcut: gitea
|
||||
|
||||
- name: forgejo.com
|
||||
engine: gitea
|
||||
base_url: https://code.forgejo.org
|
||||
shortcut: forgejo
|
||||
|
||||
If you would like to use additional instances, just configure new engines in the
|
||||
:ref:`settings <settings engine>` and set the ``base_url``.
|
||||
|
||||
|
@ -95,13 +101,14 @@ def response(resp):
|
|||
'url': item.get('html_url'),
|
||||
'title': item.get('full_name'),
|
||||
'content': ' / '.join(content),
|
||||
'img_src': item.get('owner', {}).get('avatar_url'),
|
||||
# Use Repository Avatar and fall back to Owner Avatar if not set.
|
||||
'thumbnail': item.get('avatar_url') or item.get('owner', {}).get('avatar_url'),
|
||||
'package_name': item.get('name'),
|
||||
'maintainer': item.get('owner', {}).get('login'),
|
||||
'maintainer': item.get('owner', {}).get('username'),
|
||||
'publishedDate': parser.parse(item.get("updated_at") or item.get("created_at")),
|
||||
'tags': item.get('topics', []),
|
||||
'popularity': item.get('stargazers_count'),
|
||||
'homepage': item.get('homepage'),
|
||||
'popularity': item.get('stars_count'),
|
||||
'homepage': item.get('website'),
|
||||
'source_code_url': item.get('clone_url'),
|
||||
}
|
||||
)
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Engine to search in collaborative software platforms based on GitLab_ with
|
||||
the `GitLab REST API`_.
|
||||
|
||||
.. _GitLab: https://about.gitlab.com/install/
|
||||
.. _GitLab REST API: https://docs.gitlab.com/ee/api/
|
||||
|
||||
Configuration
|
||||
=============
|
||||
|
||||
The engine has the following mandatory setting:
|
||||
|
||||
- :py:obj:`base_url`
|
||||
|
||||
Optional settings are:
|
||||
|
||||
- :py:obj:`api_path`
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: gitlab
|
||||
engine: gitlab
|
||||
base_url: https://gitlab.com
|
||||
shortcut: gl
|
||||
about:
|
||||
website: https://gitlab.com/
|
||||
wikidata_id: Q16639197
|
||||
|
||||
- name: gnome
|
||||
engine: gitlab
|
||||
base_url: https://gitlab.gnome.org
|
||||
shortcut: gn
|
||||
about:
|
||||
website: https://gitlab.gnome.org
|
||||
wikidata_id: Q44316
|
||||
|
||||
Implementations
|
||||
===============
|
||||
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from dateutil import parser
|
||||
|
||||
about = {
|
||||
"website": None,
|
||||
"wikidata_id": None,
|
||||
"official_api_documentation": "https://docs.gitlab.com/ee/api/",
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
categories = ['it', 'repos']
|
||||
paging = True
|
||||
|
||||
base_url: str = ""
|
||||
"""Base URL of the GitLab host."""
|
||||
|
||||
api_path: str = 'api/v4/projects'
|
||||
"""The path the `project API <https://docs.gitlab.com/ee/api/projects.html>`_.
|
||||
|
||||
The default path should work fine usually.
|
||||
"""
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = {'search': query, 'page': params['pageno']}
|
||||
params['url'] = f"{base_url}/{api_path}?{urlencode(args)}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
for item in resp.json():
|
||||
results.append(
|
||||
{
|
||||
'template': 'packages.html',
|
||||
'url': item.get('web_url'),
|
||||
'title': item.get('name'),
|
||||
'content': item.get('description'),
|
||||
'thumbnail': item.get('avatar_url'),
|
||||
'package_name': item.get('name'),
|
||||
'maintainer': item.get('namespace', {}).get('name'),
|
||||
'publishedDate': parser.parse(item.get('last_activity_at') or item.get("created_at")),
|
||||
'tags': item.get('tag_list', []),
|
||||
'popularity': item.get('star_count'),
|
||||
'homepage': item.get('readme_url'),
|
||||
'source_code_url': item.get('http_url_to_repo'),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
|
@ -62,7 +62,7 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
|
|||
results_xpath = './/div[contains(@jscontroller, "SC7lYd")]'
|
||||
title_xpath = './/a/h3[1]'
|
||||
href_xpath = './/a[h3]/@href'
|
||||
content_xpath = './/div[@data-sncf="1"]'
|
||||
content_xpath = './/div[contains(@data-sncf, "1")]'
|
||||
|
||||
# Suggestions are links placed in a *card-section*, we extract only the text
|
||||
# from the links not the links itself.
|
||||
|
@ -334,9 +334,11 @@ def response(resp):
|
|||
# results --> answer
|
||||
answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
|
||||
for item in answer_list:
|
||||
for bubble in eval_xpath(item, './/div[@class="nnFGuf"]'):
|
||||
bubble.drop_tree()
|
||||
results.append(
|
||||
{
|
||||
'answer': item.xpath("normalize-space()"),
|
||||
'answer': extract_text(item),
|
||||
'url': (eval_xpath(item, '../..//a/@href') + [None])[0],
|
||||
}
|
||||
)
|
||||
|
@ -439,7 +441,7 @@ def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
|
|||
try:
|
||||
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
|
||||
print("INFO: google UI language %s (%s) is unknown by babel" % (eng_lang, x.text.split("(")[0].strip()))
|
||||
continue
|
||||
sxng_lang = language_tag(locale)
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ from searx.engines.google import (
|
|||
detect_google_sorry,
|
||||
)
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
from searx.utils import get_embeded_stream_url
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
@ -125,6 +126,7 @@ def response(resp):
|
|||
'content': content,
|
||||
'author': pub_info,
|
||||
'thumbnail': thumbnail,
|
||||
'iframe_src': get_embeded_stream_url(url),
|
||||
'template': 'videos.html',
|
||||
}
|
||||
)
|
||||
|
|
|
@ -57,7 +57,11 @@ def request(query, params):
|
|||
|
||||
if params['time_range']:
|
||||
search_type = 'search_by_date'
|
||||
timestamp = (datetime.now() - relativedelta(**{f"{params['time_range']}s": 1})).timestamp()
|
||||
timestamp = (
|
||||
# pylint: disable=unexpected-keyword-arg
|
||||
datetime.now()
|
||||
- relativedelta(**{f"{params['time_range']}s": 1}) # type: ignore
|
||||
).timestamp()
|
||||
query_params["numericFilters"] = f"created_at_i>{timestamp}"
|
||||
|
||||
params["url"] = f"{base_url}/{search_type}?{urlencode(query_params)}"
|
||||
|
|
|
@ -7,6 +7,8 @@ import random
|
|||
from urllib.parse import quote_plus, urlparse
|
||||
from dateutil import parser
|
||||
|
||||
from searx.utils import humanize_number
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://api.invidious.io/',
|
||||
|
@ -91,7 +93,8 @@ def response(resp):
|
|||
"url": url,
|
||||
"title": result.get("title", ""),
|
||||
"content": result.get("description", ""),
|
||||
'length': length,
|
||||
"length": length,
|
||||
"views": humanize_number(result['viewCount']),
|
||||
"template": "videos.html",
|
||||
"author": result.get("author"),
|
||||
"publishedDate": publishedDate,
|
||||
|
|
|
@ -146,7 +146,11 @@ def response(resp):
|
|||
}
|
||||
)
|
||||
else:
|
||||
for url, title, content in zip(query(json, url_query), query(json, title_query), query(json, content_query)):
|
||||
for result in json:
|
||||
url = query(result, url_query)[0]
|
||||
title = query(result, title_query)[0]
|
||||
content = query(result, content_query)[0]
|
||||
|
||||
results.append(
|
||||
{
|
||||
'url': url_prefix + to_string(url),
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""MariaDB is a community driven fork of MySQL. Before enabling MariaDB engine,
|
||||
you must the install the pip package ``mariadb`` along with the necessary
|
||||
prerequities.
|
||||
|
||||
`See the following documentation for more details
|
||||
<https://mariadb.com/docs/server/connect/programming-languages/c/install/>`_
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
This is an example configuration for querying a MariaDB server:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: my_database
|
||||
engine: mariadb_server
|
||||
database: my_database
|
||||
username: searxng
|
||||
password: password
|
||||
limit: 5
|
||||
query_str: 'SELECT * from my_table WHERE my_column=%(query)s'
|
||||
|
||||
Implementations
|
||||
===============
|
||||
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
try:
|
||||
import mariadb
|
||||
except ImportError:
|
||||
# import error is ignored because the admin has to install mysql manually to use
|
||||
# the engine
|
||||
pass
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
host = "127.0.0.1"
|
||||
"""Hostname of the DB connector"""
|
||||
|
||||
port = 3306
|
||||
"""Port of the DB connector"""
|
||||
|
||||
database = ""
|
||||
"""Name of the database."""
|
||||
|
||||
username = ""
|
||||
"""Username for the DB connection."""
|
||||
|
||||
password = ""
|
||||
"""Password for the DB connection."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
_connection = None
|
||||
|
||||
|
||||
def init(engine_settings):
|
||||
global _connection # pylint: disable=global-statement
|
||||
|
||||
if 'query_str' not in engine_settings:
|
||||
raise ValueError('query_str cannot be empty')
|
||||
|
||||
if not engine_settings['query_str'].lower().startswith('select '):
|
||||
raise ValueError('only SELECT query is supported')
|
||||
|
||||
_connection = mariadb.connect(database=database, user=username, password=password, host=host, port=port)
|
||||
|
||||
|
||||
def search(query, params):
|
||||
query_params = {'query': query}
|
||||
query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit)
|
||||
logger.debug("SQL Query: %s", query_to_run)
|
||||
|
||||
with _connection.cursor() as cur:
|
||||
cur.execute(query_to_run, query_params)
|
||||
results = []
|
||||
col_names = [i[0] for i in cur.description]
|
||||
for res in cur:
|
||||
result = dict(zip(col_names, map(str, res)))
|
||||
result['template'] = result_template
|
||||
results.append(result)
|
||||
return results
|
|
@ -1,12 +1,15 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Mojeek (general, images, news)"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from lxml import html
|
||||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from searx.utils import eval_xpath, eval_xpath_list, extract_text
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
about = {
|
||||
'website': 'https://mojeek.com',
|
||||
|
@ -42,6 +45,18 @@ news_url_xpath = './/h2/a/@href'
|
|||
news_title_xpath = './/h2/a'
|
||||
news_content_xpath = './/p[@class="s"]'
|
||||
|
||||
language_param = 'lb'
|
||||
region_param = 'arc'
|
||||
|
||||
_delta_kwargs = {'day': 'days', 'week': 'weeks', 'month': 'months', 'year': 'years'}
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
traits: EngineTraits
|
||||
|
||||
|
||||
def init(_):
|
||||
if search_type not in ('', 'images', 'news'):
|
||||
|
@ -53,13 +68,16 @@ def request(query, params):
|
|||
'q': query,
|
||||
'safe': min(params['safesearch'], 1),
|
||||
'fmt': search_type,
|
||||
language_param: traits.get_language(params['searxng_locale'], traits.custom['language_all']),
|
||||
region_param: traits.get_region(params['searxng_locale'], traits.custom['region_all']),
|
||||
}
|
||||
|
||||
if search_type == '':
|
||||
args['s'] = 10 * (params['pageno'] - 1)
|
||||
|
||||
if params['time_range'] and search_type != 'images':
|
||||
args["since"] = (datetime.now() - relativedelta(**{f"{params['time_range']}s": 1})).strftime("%Y%m%d")
|
||||
kwargs = {_delta_kwargs[params['time_range']]: 1}
|
||||
args["since"] = (datetime.now() - relativedelta(**kwargs)).strftime("%Y%m%d") # type: ignore
|
||||
logger.debug(args["since"])
|
||||
|
||||
params['url'] = f"{base_url}/search?{urlencode(args)}"
|
||||
|
@ -94,7 +112,7 @@ def _image_results(dom):
|
|||
'template': 'images.html',
|
||||
'url': extract_text(eval_xpath(result, image_url_xpath)),
|
||||
'title': extract_text(eval_xpath(result, image_title_xpath)),
|
||||
'img_src': base_url + extract_text(eval_xpath(result, image_img_src_xpath)),
|
||||
'img_src': base_url + extract_text(eval_xpath(result, image_img_src_xpath)), # type: ignore
|
||||
'content': '',
|
||||
}
|
||||
)
|
||||
|
@ -130,3 +148,31 @@ def response(resp):
|
|||
return _news_results(dom)
|
||||
|
||||
raise ValueError(f"Invalid search type {search_type}")
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx import network
|
||||
from searx.locales import get_official_locales, region_tag
|
||||
from babel import Locale, UnknownLocaleError
|
||||
import contextlib
|
||||
|
||||
resp = network.get(base_url + "/preferences", headers={'Accept-Language': 'en-US,en;q=0.5'})
|
||||
dom = html.fromstring(resp.text) # type: ignore
|
||||
|
||||
languages = eval_xpath_list(dom, f'//select[@name="{language_param}"]/option/@value')
|
||||
|
||||
engine_traits.custom['language_all'] = languages[0]
|
||||
|
||||
for code in languages[1:]:
|
||||
with contextlib.suppress(UnknownLocaleError):
|
||||
locale = Locale(code)
|
||||
engine_traits.languages[locale.language] = code
|
||||
|
||||
regions = eval_xpath_list(dom, f'//select[@name="{region_param}"]/option/@value')
|
||||
|
||||
engine_traits.custom['region_all'] = regions[1]
|
||||
|
||||
for code in regions[2:]:
|
||||
for locale in get_official_locales(code, engine_traits.languages):
|
||||
engine_traits.regions[region_tag(locale)] = code
|
||||
|
|
|
@ -128,7 +128,14 @@ def request(query: str, params: dict):
|
|||
|
||||
|
||||
def extract_result(dom_result: list[html.HtmlElement]):
|
||||
[a_elem, h3_elem, p_elem] = dom_result
|
||||
# Infoboxes sometimes appear in the beginning and will have a length of 0
|
||||
if len(dom_result) == 3:
|
||||
[a_elem, h3_elem, p_elem] = dom_result
|
||||
elif len(dom_result) == 4:
|
||||
[_, a_elem, h3_elem, p_elem] = dom_result
|
||||
else:
|
||||
return None
|
||||
|
||||
return {
|
||||
'url': extract_text(a_elem.text),
|
||||
'title': extract_text(h3_elem),
|
||||
|
@ -139,9 +146,9 @@ def extract_result(dom_result: list[html.HtmlElement]):
|
|||
def extract_results(search_results: html.HtmlElement):
|
||||
for search_result in search_results:
|
||||
dom_result = eval_xpath_list(search_result, 'div/div/*')
|
||||
# sometimes an info box pops up, will need to filter that out
|
||||
if len(dom_result) == 3:
|
||||
yield extract_result(dom_result)
|
||||
result = extract_result(dom_result)
|
||||
if result is not None:
|
||||
yield result
|
||||
|
||||
|
||||
def response(resp: Response):
|
||||
|
|
|
@ -34,12 +34,25 @@ except ImportError:
|
|||
|
||||
engine_type = 'offline'
|
||||
auth_plugin = 'caching_sha2_password'
|
||||
|
||||
host = "127.0.0.1"
|
||||
"""Hostname of the DB connector"""
|
||||
|
||||
port = 3306
|
||||
"""Port of the DB connector"""
|
||||
|
||||
database = ""
|
||||
"""Name of the database."""
|
||||
|
||||
username = ""
|
||||
"""Username for the DB connection."""
|
||||
|
||||
password = ""
|
||||
"""Password for the DB connection."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Open library (books)
|
||||
"""
|
||||
from urllib.parse import urlencode
|
||||
import re
|
||||
|
||||
from dateutil import parser
|
||||
|
||||
about = {
|
||||
'website': 'https://openlibrary.org',
|
||||
'wikidata_id': 'Q1201876',
|
||||
'require_api_key': False,
|
||||
'use_official_api': False,
|
||||
'official_api_documentation': 'https://openlibrary.org/developers/api',
|
||||
}
|
||||
|
||||
paging = True
|
||||
categories = []
|
||||
|
||||
base_url = "https://openlibrary.org"
|
||||
results_per_page = 10
|
||||
|
||||
|
||||
def request(query, params):
|
||||
args = {
|
||||
'q': query,
|
||||
'page': params['pageno'],
|
||||
'limit': results_per_page,
|
||||
}
|
||||
params['url'] = f"{base_url}/search.json?{urlencode(args)}"
|
||||
return params
|
||||
|
||||
|
||||
def _parse_date(date):
|
||||
try:
|
||||
return parser.parse(date)
|
||||
except parser.ParserError:
|
||||
return None
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
|
||||
for item in resp.json().get("docs", []):
|
||||
cover = None
|
||||
if 'lending_identifier_s' in item:
|
||||
cover = f"https://archive.org/services/img/{item['lending_identifier_s']}"
|
||||
|
||||
published = item.get('publish_date')
|
||||
if published:
|
||||
published_dates = [date for date in map(_parse_date, published) if date]
|
||||
if published_dates:
|
||||
published = min(published_dates)
|
||||
|
||||
if not published:
|
||||
published = parser.parse(str(item.get('first_published_year')))
|
||||
|
||||
result = {
|
||||
'template': 'paper.html',
|
||||
'url': f"{base_url}{item['key']}",
|
||||
'title': item['title'],
|
||||
'content': re.sub(r"\{|\}", "", item['first_sentence'][0]) if item.get('first_sentence') else '',
|
||||
'isbn': item.get('isbn', [])[:5],
|
||||
'authors': item.get('author_name', []),
|
||||
'thumbnail': cover,
|
||||
'publishedDate': published,
|
||||
'tags': item.get('subject', [])[:10] + item.get('place', [])[:10],
|
||||
}
|
||||
results.append(result)
|
||||
|
||||
return results
|
|
@ -14,7 +14,7 @@ import babel
|
|||
|
||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
||||
from searx.locales import language_tag
|
||||
from searx.utils import html_to_text
|
||||
from searx.utils import html_to_text, humanize_number
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
traits: EngineTraits
|
||||
|
@ -124,6 +124,7 @@ def video_response(resp):
|
|||
'content': html_to_text(result.get('description') or ''),
|
||||
'author': result.get('account', {}).get('displayName'),
|
||||
'length': minute_to_hm(result.get('duration')),
|
||||
'views': humanize_number(result['views']),
|
||||
'template': 'videos.html',
|
||||
'publishedDate': parse(result['publishedAt']),
|
||||
'iframe_src': result.get('embedUrl'),
|
||||
|
|
|
@ -53,6 +53,8 @@ from urllib.parse import urlencode
|
|||
import datetime
|
||||
from dateutil import parser
|
||||
|
||||
from searx.utils import humanize_number
|
||||
|
||||
# about
|
||||
about = {
|
||||
"website": 'https://github.com/TeamPiped/Piped/',
|
||||
|
@ -138,6 +140,7 @@ def response(resp):
|
|||
"title": result.get("title", ""),
|
||||
"publishedDate": parser.parse(time.ctime(uploaded / 1000)) if uploaded != -1 else None,
|
||||
"iframe_src": _frontend_url() + '/embed' + result.get("url", ""),
|
||||
"views": humanize_number(result["views"]),
|
||||
}
|
||||
length = result.get("duration")
|
||||
if length:
|
||||
|
|
|
@ -29,12 +29,25 @@ except ImportError:
|
|||
pass
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
host = "127.0.0.1"
|
||||
"""Hostname of the DB connector"""
|
||||
|
||||
port = "5432"
|
||||
"""Port of the DB connector"""
|
||||
|
||||
database = ""
|
||||
"""Name of the database."""
|
||||
|
||||
username = ""
|
||||
"""Username for the DB connection."""
|
||||
|
||||
password = ""
|
||||
"""Password for the DB connection."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
|
|
|
@ -49,7 +49,11 @@ from flask_babel import gettext
|
|||
import babel
|
||||
import lxml
|
||||
|
||||
from searx.exceptions import SearxEngineAPIException, SearxEngineTooManyRequestsException
|
||||
from searx.exceptions import (
|
||||
SearxEngineAPIException,
|
||||
SearxEngineTooManyRequestsException,
|
||||
SearxEngineCaptchaException,
|
||||
)
|
||||
from searx.network import raise_for_httperror
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
@ -57,6 +61,7 @@ from searx.utils import (
|
|||
eval_xpath,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
get_embeded_stream_url,
|
||||
)
|
||||
|
||||
traits: EngineTraits
|
||||
|
@ -187,6 +192,8 @@ def parse_web_api(resp):
|
|||
error_code = data.get('error_code')
|
||||
if error_code == 24:
|
||||
raise SearxEngineTooManyRequestsException()
|
||||
if search_results.get("data", {}).get("error_data", {}).get("captchaUrl") is not None:
|
||||
raise SearxEngineCaptchaException()
|
||||
msg = ",".join(data.get('message', ['unknown']))
|
||||
raise SearxEngineAPIException(f"{msg} ({error_code})")
|
||||
|
||||
|
@ -297,6 +304,7 @@ def parse_web_api(resp):
|
|||
'title': title,
|
||||
'url': res_url,
|
||||
'content': content,
|
||||
'iframe_src': get_embeded_stream_url(res_url),
|
||||
'publishedDate': pub_date,
|
||||
'thumbnail': thumbnail,
|
||||
'template': 'videos.html',
|
||||
|
|
|
@ -165,10 +165,12 @@ def fetch_traits(engine_traits: EngineTraits):
|
|||
|
||||
countrycodes = set()
|
||||
for region in country_list:
|
||||
if region['iso_3166_1'] not in babel_reg_list:
|
||||
# country_list contains duplicates that differ only in upper/lower case
|
||||
_reg = region['iso_3166_1'].upper()
|
||||
if _reg not in babel_reg_list:
|
||||
print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel")
|
||||
continue
|
||||
countrycodes.add(region['iso_3166_1'])
|
||||
countrycodes.add(_reg)
|
||||
|
||||
countrycodes = list(countrycodes)
|
||||
countrycodes.sort()
|
||||
|
|
|
@ -41,8 +41,13 @@ import sqlite3
|
|||
import contextlib
|
||||
|
||||
engine_type = 'offline'
|
||||
|
||||
database = ""
|
||||
"""Filename of the SQLite DB."""
|
||||
|
||||
query_str = ""
|
||||
"""SQL query that returns the result items."""
|
||||
|
||||
limit = 10
|
||||
paging = True
|
||||
result_template = 'key-value.html'
|
||||
|
|
|
@ -7,6 +7,7 @@ ends.
|
|||
|
||||
from json import dumps
|
||||
from searx.utils import searx_useragent
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
about = {
|
||||
"website": "https://stract.com/",
|
||||
|
@ -18,7 +19,10 @@ about = {
|
|||
categories = ['general']
|
||||
paging = True
|
||||
|
||||
search_url = "https://stract.com/beta/api/search"
|
||||
base_url = "https://stract.com/beta/api"
|
||||
search_url = base_url + "/search"
|
||||
|
||||
traits: EngineTraits
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
@ -29,7 +33,14 @@ def request(query, params):
|
|||
'Content-Type': 'application/json',
|
||||
'User-Agent': searx_useragent(),
|
||||
}
|
||||
params['data'] = dumps({'query': query, 'page': params['pageno'] - 1})
|
||||
region = traits.get_region(params["searxng_locale"], default=traits.all_locale)
|
||||
params['data'] = dumps(
|
||||
{
|
||||
'query': query,
|
||||
'page': params['pageno'] - 1,
|
||||
'selectedRegion': region,
|
||||
}
|
||||
)
|
||||
|
||||
return params
|
||||
|
||||
|
@ -47,3 +58,24 @@ def response(resp):
|
|||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from searx import network
|
||||
from babel import Locale, languages
|
||||
from searx.locales import region_tag
|
||||
|
||||
territories = Locale("en").territories
|
||||
|
||||
json = network.get(base_url + "/docs/openapi.json").json()
|
||||
regions = json['components']['schemas']['Region']['enum']
|
||||
|
||||
engine_traits.all_locale = regions[0]
|
||||
|
||||
for region in regions[1:]:
|
||||
for code, name in territories.items():
|
||||
if region not in (code, name):
|
||||
continue
|
||||
for lang in languages.get_official_languages(code, de_facto=True):
|
||||
engine_traits.regions[region_tag(Locale(lang, code))] = region
|
||||
|
|
|
@ -14,10 +14,16 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
|
|||
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
from flask_babel import gettext
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
about = {
|
||||
"website": 'https://tineye.com',
|
||||
"wikidata_id": 'Q2382535',
|
||||
|
@ -34,7 +40,7 @@ categories = ['general']
|
|||
paging = True
|
||||
safesearch = False
|
||||
base_url = 'https://tineye.com'
|
||||
search_string = '/result_json/?page={page}&{query}'
|
||||
search_string = '/api/v1/result_json/?page={page}&{query}'
|
||||
|
||||
FORMAT_NOT_SUPPORTED = gettext(
|
||||
"Could not read that image url. This may be due to an unsupported file"
|
||||
|
@ -120,7 +126,7 @@ def parse_tineye_match(match_json):
|
|||
|
||||
crawl_date = backlink_json.get("crawl_date")
|
||||
if crawl_date:
|
||||
crawl_date = datetime.fromisoformat(crawl_date[:-3])
|
||||
crawl_date = datetime.strptime(crawl_date, '%Y-%m-%d')
|
||||
else:
|
||||
crawl_date = datetime.min
|
||||
|
||||
|
@ -150,29 +156,15 @@ def parse_tineye_match(match_json):
|
|||
|
||||
def response(resp):
|
||||
"""Parse HTTP response from TinEye."""
|
||||
results = []
|
||||
|
||||
try:
|
||||
# handle the 422 client side errors, and the possible 400 status code error
|
||||
if resp.status_code in (400, 422):
|
||||
json_data = resp.json()
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
msg = "can't parse JSON response // %s" % exc
|
||||
logger.error(msg)
|
||||
json_data = {'error': msg}
|
||||
|
||||
# handle error codes from Tineye
|
||||
|
||||
if resp.is_error:
|
||||
if resp.status_code in (400, 422):
|
||||
|
||||
message = 'HTTP status: %s' % resp.status_code
|
||||
error = json_data.get('error')
|
||||
s_key = json_data.get('suggestions', {}).get('key', '')
|
||||
|
||||
if error and s_key:
|
||||
message = "%s (%s)" % (error, s_key)
|
||||
elif error:
|
||||
message = error
|
||||
suggestions = json_data.get('suggestions', {})
|
||||
message = f'HTTP Status Code: {resp.status_code}'
|
||||
|
||||
if resp.status_code == 422:
|
||||
s_key = suggestions.get('key', '')
|
||||
if s_key == "Invalid image URL":
|
||||
# test https://docs.searxng.org/_static/searxng-wordmark.svg
|
||||
message = FORMAT_NOT_SUPPORTED
|
||||
|
@ -182,16 +174,23 @@ def response(resp):
|
|||
elif s_key == 'Download Error':
|
||||
# test https://notexists
|
||||
message = DOWNLOAD_ERROR
|
||||
else:
|
||||
logger.warning("Unknown suggestion key encountered: %s", s_key)
|
||||
else: # 400
|
||||
description = suggestions.get('description')
|
||||
if isinstance(description, list):
|
||||
message = ','.join(description)
|
||||
|
||||
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
|
||||
# results.append({'answer': message})
|
||||
logger.error(message)
|
||||
# see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023
|
||||
# results.append({'answer': message})
|
||||
logger.error(message)
|
||||
return []
|
||||
|
||||
return results
|
||||
# Raise for all other responses
|
||||
resp.raise_for_status()
|
||||
|
||||
resp.raise_for_status()
|
||||
|
||||
# append results from matches
|
||||
results = []
|
||||
json_data = resp.json()
|
||||
|
||||
for match_json in json_data['matches']:
|
||||
|
||||
|
|
|
@ -118,6 +118,8 @@ def _base_url() -> str:
|
|||
url = engines['yacy'].base_url # type: ignore
|
||||
if isinstance(url, list):
|
||||
url = random.choice(url)
|
||||
if url.endswith("/"):
|
||||
url = url[:-1]
|
||||
return url
|
||||
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ from searx.utils import (
|
|||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
html_to_text,
|
||||
)
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
|
@ -133,12 +134,20 @@ def response(resp):
|
|||
url = parse_url(url)
|
||||
|
||||
title = eval_xpath_getindex(result, './/h3//a/@aria-label', 0, default='')
|
||||
title = extract_text(title)
|
||||
title: str = extract_text(title)
|
||||
content = eval_xpath_getindex(result, './/div[contains(@class, "compText")]', 0, default='')
|
||||
content = extract_text(content, allow_none=True)
|
||||
content: str = extract_text(content, allow_none=True)
|
||||
|
||||
# append result
|
||||
results.append({'url': url, 'title': title, 'content': content})
|
||||
results.append(
|
||||
{
|
||||
'url': url,
|
||||
# title sometimes contains HTML tags / see
|
||||
# https://github.com/searxng/searxng/issues/3790
|
||||
'title': " ".join(html_to_text(title).strip().split()),
|
||||
'content': " ".join(html_to_text(content).strip().split()),
|
||||
}
|
||||
)
|
||||
|
||||
for suggestion in eval_xpath_list(dom, '//div[contains(@class, "AlsoTry")]//table//a'):
|
||||
# append suggestion
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Yandex (Web, images)"""
|
||||
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from html import unescape
|
||||
from lxml import html
|
||||
from searx.exceptions import SearxEngineCaptchaException
|
||||
from searx.utils import humanize_bytes, eval_xpath, eval_xpath_list, extract_text, extr
|
||||
|
||||
|
||||
# Engine metadata
|
||||
about = {
|
||||
"website": 'https://yandex.com/',
|
||||
"wikidata_id": 'Q5281',
|
||||
"official_api_documentation": "?",
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'HTML',
|
||||
}
|
||||
|
||||
# Engine configuration
|
||||
categories = []
|
||||
paging = True
|
||||
search_type = ""
|
||||
|
||||
# Search URL
|
||||
base_url_web = 'https://yandex.com/search/site/'
|
||||
base_url_images = 'https://yandex.com/images/search'
|
||||
|
||||
results_xpath = '//li[contains(@class, "serp-item")]'
|
||||
url_xpath = './/a[@class="b-serp-item__title-link"]/@href'
|
||||
title_xpath = './/h3[@class="b-serp-item__title"]/a[@class="b-serp-item__title-link"]/span'
|
||||
content_xpath = './/div[@class="b-serp-item__content"]//div[@class="b-serp-item__text"]'
|
||||
|
||||
|
||||
def catch_bad_response(resp):
|
||||
if resp.url.path.startswith('/showcaptcha'):
|
||||
raise SearxEngineCaptchaException()
|
||||
|
||||
|
||||
def request(query, params):
|
||||
query_params_web = {
|
||||
"tmpl_version": "releases",
|
||||
"text": query,
|
||||
"web": "1",
|
||||
"frame": "1",
|
||||
"searchid": "3131712",
|
||||
}
|
||||
|
||||
query_params_images = {
|
||||
"text": query,
|
||||
"uinfo": "sw-1920-sh-1080-ww-1125-wh-999",
|
||||
}
|
||||
|
||||
if params['pageno'] > 1:
|
||||
query_params_web.update({"p": params["pageno"] - 1})
|
||||
query_params_images.update({"p": params["pageno"] - 1})
|
||||
|
||||
params["cookies"] = {'cookie': "yp=1716337604.sp.family%3A0#1685406411.szm.1:1920x1080:1920x999"}
|
||||
|
||||
if search_type == 'web':
|
||||
params['url'] = f"{base_url_web}?{urlencode(query_params_web)}"
|
||||
elif search_type == 'images':
|
||||
params['url'] = f"{base_url_images}?{urlencode(query_params_images)}"
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
if search_type == 'web':
|
||||
|
||||
catch_bad_response(resp)
|
||||
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
results = []
|
||||
|
||||
for result in eval_xpath_list(dom, results_xpath):
|
||||
results.append(
|
||||
{
|
||||
'url': extract_text(eval_xpath(result, url_xpath)),
|
||||
'title': extract_text(eval_xpath(result, title_xpath)),
|
||||
'content': extract_text(eval_xpath(result, content_xpath)),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
if search_type == 'images':
|
||||
|
||||
catch_bad_response(resp)
|
||||
|
||||
html_data = html.fromstring(resp.text)
|
||||
html_sample = unescape(html.tostring(html_data, encoding='unicode'))
|
||||
|
||||
content_between_tags = extr(
|
||||
html_sample, '{"location":"/images/search/', 'advRsyaSearchColumn":null}}', default="fail"
|
||||
)
|
||||
json_data = '{"location":"/images/search/' + content_between_tags + 'advRsyaSearchColumn":null}}'
|
||||
|
||||
if content_between_tags == "fail":
|
||||
content_between_tags = extr(html_sample, '{"location":"/images/search/', 'false}}}')
|
||||
json_data = '{"location":"/images/search/' + content_between_tags + 'false}}}'
|
||||
|
||||
json_resp = loads(json_data)
|
||||
|
||||
results = []
|
||||
for _, item_data in json_resp['initialState']['serpList']['items']['entities'].items():
|
||||
title = item_data['snippet']['title']
|
||||
source = item_data['snippet']['url']
|
||||
thumb = item_data['image']
|
||||
fullsize_image = item_data['viewerData']['dups'][0]['url']
|
||||
height = item_data['viewerData']['dups'][0]['h']
|
||||
width = item_data['viewerData']['dups'][0]['w']
|
||||
filesize = item_data['viewerData']['dups'][0]['fileSizeInBytes']
|
||||
humanized_filesize = humanize_bytes(filesize)
|
||||
|
||||
results.append(
|
||||
{
|
||||
'title': title,
|
||||
'url': source,
|
||||
'img_src': fullsize_image,
|
||||
'filesize': humanized_filesize,
|
||||
'thumbnail_src': thumb,
|
||||
'template': 'images.html',
|
||||
'resolution': f'{width} x {height}',
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
return []
|
|
@ -67,6 +67,8 @@ def response(resp):
|
|||
|
||||
for result in resp.json()[1]['results']:
|
||||
if search_type == "web":
|
||||
if result['type'] != 'Organic':
|
||||
continue
|
||||
results.append(_web_result(result))
|
||||
elif search_type == "images":
|
||||
results.append(_images_result(result))
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Exception types raised by SearXNG modules.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
|
@ -61,7 +62,7 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
|||
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||
day)."""
|
||||
|
||||
def __init__(self, suspended_time: int = None, message: str = 'Access denied'):
|
||||
def __init__(self, suspended_time: int | None = None, message: str = 'Access denied'):
|
||||
"""Generic exception to raise when an engine denies access to the results.
|
||||
|
||||
:param suspended_time: How long the engine is going to be suspended in
|
||||
|
@ -70,12 +71,13 @@ class SearxEngineAccessDeniedException(SearxEngineResponseException):
|
|||
:param message: Internal message. Defaults to ``Access denied``
|
||||
:type message: str
|
||||
"""
|
||||
suspended_time = suspended_time or self._get_default_suspended_time()
|
||||
if suspended_time is None:
|
||||
suspended_time = self._get_default_suspended_time()
|
||||
super().__init__(message + ', suspended_time=' + str(suspended_time))
|
||||
self.suspended_time = suspended_time
|
||||
self.message = message
|
||||
|
||||
def _get_default_suspended_time(self):
|
||||
def _get_default_suspended_time(self) -> int:
|
||||
from searx import get_setting # pylint: disable=C0415
|
||||
|
||||
return get_setting(self.SUSPEND_TIME_SETTING)
|
||||
|
@ -88,7 +90,7 @@ class SearxEngineCaptchaException(SearxEngineAccessDeniedException):
|
|||
"""This settings contains the default suspended time (default 86400 sec / 1
|
||||
day)."""
|
||||
|
||||
def __init__(self, suspended_time=None, message='CAPTCHA'):
|
||||
def __init__(self, suspended_time: int | None = None, message='CAPTCHA'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
@ -102,7 +104,7 @@ class SearxEngineTooManyRequestsException(SearxEngineAccessDeniedException):
|
|||
"""This settings contains the default suspended time (default 3660 sec / 1
|
||||
hour)."""
|
||||
|
||||
def __init__(self, suspended_time=None, message='Too many request'):
|
||||
def __init__(self, suspended_time: int | None = None, message='Too many request'):
|
||||
super().__init__(message=message, suspended_time=suspended_time)
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for providing the favicons in SearXNG"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["init", "favicon_url", "favicon_proxy"]
|
||||
|
||||
import pathlib
|
||||
from searx import logger
|
||||
from searx import get_setting
|
||||
from .proxy import favicon_url, favicon_proxy
|
||||
|
||||
logger = logger.getChild('favicons')
|
||||
|
||||
|
||||
def is_active():
|
||||
return bool(get_setting("search.favicon_resolver", False))
|
||||
|
||||
|
||||
def init():
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
from . import config, cache, proxy
|
||||
from .. import settings_loader
|
||||
|
||||
cfg_file = (settings_loader.get_user_cfg_folder() or pathlib.Path("/etc/searxng")) / "favicons.toml"
|
||||
if not cfg_file.exists():
|
||||
if is_active():
|
||||
logger.error(f"missing favicon config: {cfg_file}")
|
||||
cfg_file = config.DEFAULT_CFG_TOML_PATH
|
||||
|
||||
logger.debug(f"load favicon config: {cfg_file}")
|
||||
cfg = config.FaviconConfig.from_toml_file(cfg_file, use_cache=True)
|
||||
cache.init(cfg.cache)
|
||||
proxy.init(cfg.proxy)
|
||||
|
||||
del cache, config, proxy, cfg, settings_loader
|
|
@ -0,0 +1,12 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Command line implementation"""
|
||||
|
||||
import typer
|
||||
|
||||
from . import cache
|
||||
from . import init
|
||||
|
||||
init()
|
||||
app = typer.Typer()
|
||||
app.add_typer(cache.app, name="cache", help="commands related to the cache")
|
||||
app()
|
|
@ -0,0 +1,476 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for caching favicons.
|
||||
|
||||
:py:obj:`FaviconCacheConfig`:
|
||||
Configuration of the favicon cache
|
||||
|
||||
:py:obj:`FaviconCache`:
|
||||
Abstract base class for the implementation of a favicon cache.
|
||||
|
||||
:py:obj:`FaviconCacheSQLite`:
|
||||
Favicon cache that manages the favicon BLOBs in a SQLite DB.
|
||||
|
||||
:py:obj:`FaviconCacheNull`:
|
||||
Fallback solution if the configured cache cannot be used for system reasons.
|
||||
|
||||
----
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from typing import Literal
|
||||
|
||||
import os
|
||||
import abc
|
||||
import dataclasses
|
||||
import hashlib
|
||||
import logging
|
||||
import sqlite3
|
||||
import tempfile
|
||||
import time
|
||||
import typer
|
||||
|
||||
import msgspec
|
||||
|
||||
from searx import sqlitedb
|
||||
from searx import logger
|
||||
from searx.utils import humanize_bytes, humanize_number
|
||||
|
||||
CACHE: "FaviconCache"
|
||||
FALLBACK_ICON = b"FALLBACK_ICON"
|
||||
|
||||
logger = logger.getChild('favicons.cache')
|
||||
app = typer.Typer()
|
||||
|
||||
|
||||
@app.command()
|
||||
def state():
|
||||
"""show state of the cache"""
|
||||
print(CACHE.state().report())
|
||||
|
||||
|
||||
@app.command()
|
||||
def maintenance(force: bool = True, debug: bool = False):
|
||||
"""perform maintenance of the cache"""
|
||||
root_log = logging.getLogger()
|
||||
if debug:
|
||||
root_log.setLevel(logging.DEBUG)
|
||||
else:
|
||||
root_log.handlers = []
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter("%(message)s"))
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
state_t0 = CACHE.state()
|
||||
CACHE.maintenance(force=force)
|
||||
state_t1 = CACHE.state()
|
||||
state_delta = state_t0 - state_t1
|
||||
print("The cache has been reduced by:")
|
||||
print(state_delta.report("\n- {descr}: {val}").lstrip("\n"))
|
||||
|
||||
|
||||
def init(cfg: "FaviconCacheConfig"):
|
||||
"""Initialization of a global ``CACHE``"""
|
||||
|
||||
global CACHE # pylint: disable=global-statement
|
||||
if cfg.db_type == "sqlite":
|
||||
if sqlite3.sqlite_version_info <= (3, 35):
|
||||
logger.critical(
|
||||
"Disable favicon caching completely: SQLite library (%s) is too old! (require >= 3.35)",
|
||||
sqlite3.sqlite_version,
|
||||
)
|
||||
CACHE = FaviconCacheNull(cfg)
|
||||
else:
|
||||
CACHE = FaviconCacheSQLite(cfg)
|
||||
elif cfg.db_type == "mem":
|
||||
logger.error("Favicons are cached in memory, don't use this in production!")
|
||||
CACHE = FaviconCacheMEM(cfg)
|
||||
else:
|
||||
raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown")
|
||||
|
||||
|
||||
class FaviconCacheConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||
"""Configuration of the favicon cache."""
|
||||
|
||||
db_type: Literal["sqlite", "mem"] = "sqlite"
|
||||
"""Type of the database:
|
||||
|
||||
``sqlite``:
|
||||
:py:obj:`.cache.FaviconCacheSQLite`
|
||||
|
||||
``mem``:
|
||||
:py:obj:`.cache.FaviconCacheMEM` (not recommended)
|
||||
"""
|
||||
|
||||
db_url: str = tempfile.gettempdir() + os.sep + "faviconcache.db"
|
||||
"""URL of the SQLite DB, the path to the database file."""
|
||||
|
||||
HOLD_TIME: int = 60 * 60 * 24 * 30 # 30 days
|
||||
"""Hold time (default in sec.), after which a BLOB is removed from the cache."""
|
||||
|
||||
LIMIT_TOTAL_BYTES: int = 1024 * 1024 * 50 # 50 MB
|
||||
"""Maximum of bytes (default) stored in the cache of all blobs. Note: The
|
||||
limit is only reached at each maintenance interval after which the oldest
|
||||
BLOBs are deleted; the limit is exceeded during the maintenance period. If
|
||||
the maintenance period is *too long* or maintenance is switched off
|
||||
completely, the cache grows uncontrollably."""
|
||||
|
||||
BLOB_MAX_BYTES: int = 1024 * 20 # 20 KB
|
||||
"""The maximum BLOB size in bytes that a favicon may have so that it can be
|
||||
saved in the cache. If the favicon is larger, it is not saved in the cache
|
||||
and must be requested by the client via the proxy."""
|
||||
|
||||
MAINTENANCE_PERIOD: int = 60 * 60
|
||||
"""Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
|
||||
``auto``."""
|
||||
|
||||
MAINTENANCE_MODE: Literal["auto", "off"] = "auto"
|
||||
"""Type of maintenance mode
|
||||
|
||||
``auto``:
|
||||
Maintenance is carried out automatically as part of the maintenance
|
||||
intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
|
||||
|
||||
``off``:
|
||||
Maintenance is switched off and must be carried out by an external process
|
||||
if required.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FaviconCacheStats:
|
||||
"""Dataclass wich provides information on the status of the cache."""
|
||||
|
||||
favicons: int | None = None
|
||||
bytes: int | None = None
|
||||
domains: int | None = None
|
||||
resolvers: int | None = None
|
||||
|
||||
field_descr = (
|
||||
("favicons", "number of favicons in cache", humanize_number),
|
||||
("bytes", "total size (approx. bytes) of cache", humanize_bytes),
|
||||
("domains", "total number of domains in cache", humanize_number),
|
||||
("resolvers", "number of resolvers", str),
|
||||
)
|
||||
|
||||
def __sub__(self, other) -> FaviconCacheStats:
|
||||
if not isinstance(other, self.__class__):
|
||||
raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
|
||||
kwargs = {}
|
||||
for field, _, _ in self.field_descr:
|
||||
self_val, other_val = getattr(self, field), getattr(other, field)
|
||||
if None in (self_val, other_val):
|
||||
continue
|
||||
if isinstance(self_val, int):
|
||||
kwargs[field] = self_val - other_val
|
||||
else:
|
||||
kwargs[field] = self_val
|
||||
return self.__class__(**kwargs)
|
||||
|
||||
def report(self, fmt: str = "{descr}: {val}\n"):
|
||||
s = []
|
||||
for field, descr, cast in self.field_descr:
|
||||
val = getattr(self, field)
|
||||
if val is None:
|
||||
val = "--"
|
||||
else:
|
||||
val = cast(val)
|
||||
s.append(fmt.format(descr=descr, val=val))
|
||||
return "".join(s)
|
||||
|
||||
|
||||
class FaviconCache(abc.ABC):
|
||||
"""Abstract base class for the implementation of a favicon cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
"""An instance of the favicon cache is build up from the configuration."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
"""Returns ``None`` or the tuple of ``(data, mime)`` that has been
|
||||
registered in the cache. The ``None`` indicates that there was no entry
|
||||
in the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
"""Set data and mime-type in the cache. If data is None, the
|
||||
:py:obj:`FALLBACK_ICON` is registered. in the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def state(self) -> FaviconCacheStats:
|
||||
"""Returns a :py:obj:`FaviconCacheStats` (key/values) with information
|
||||
on the state of the cache."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def maintenance(self, force=False):
|
||||
"""Performs maintenance on the cache"""
|
||||
|
||||
|
||||
class FaviconCacheNull(FaviconCache):
|
||||
"""A dummy favicon cache that caches nothing / a fallback solution. The
|
||||
NullCache is used when more efficient caches such as the
|
||||
:py:obj:`FaviconCacheSQLite` cannot be used because, for example, the SQLite
|
||||
library is only available in an old version and does not meet the
|
||||
requirements."""
|
||||
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
return None
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
return None
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
return False
|
||||
|
||||
def state(self):
|
||||
return FaviconCacheStats(favicons=0)
|
||||
|
||||
def maintenance(self, force=False):
|
||||
pass
|
||||
|
||||
|
||||
class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
|
||||
"""Favicon cache that manages the favicon BLOBs in a SQLite DB. The DB
|
||||
model in the SQLite DB is implemented using the abstract class
|
||||
:py:obj:`sqlitedb.SQLiteAppl`.
|
||||
|
||||
The following configurations are required / supported:
|
||||
|
||||
- :py:obj:`FaviconCacheConfig.db_url`
|
||||
- :py:obj:`FaviconCacheConfig.HOLD_TIME`
|
||||
- :py:obj:`FaviconCacheConfig.LIMIT_TOTAL_BYTES`
|
||||
- :py:obj:`FaviconCacheConfig.BLOB_MAX_BYTES`
|
||||
- :py:obj:`MAINTENANCE_PERIOD`
|
||||
- :py:obj:`MAINTENANCE_MODE`
|
||||
"""
|
||||
|
||||
DB_SCHEMA = 1
|
||||
|
||||
DDL_BLOBS = """\
|
||||
CREATE TABLE IF NOT EXISTS blobs (
|
||||
sha256 TEXT,
|
||||
bytes_c INTEGER,
|
||||
mime TEXT NOT NULL,
|
||||
data BLOB NOT NULL,
|
||||
PRIMARY KEY (sha256))"""
|
||||
|
||||
"""Table to store BLOB objects by their sha256 hash values."""
|
||||
|
||||
DDL_BLOB_MAP = """\
|
||||
CREATE TABLE IF NOT EXISTS blob_map (
|
||||
m_time INTEGER DEFAULT (strftime('%s', 'now')), -- last modified (unix epoch) time in sec.
|
||||
sha256 TEXT,
|
||||
resolver TEXT,
|
||||
authority TEXT,
|
||||
PRIMARY KEY (resolver, authority))"""
|
||||
|
||||
"""Table to map from (resolver, authority) to sha256 hash values."""
|
||||
|
||||
DDL_CREATE_TABLES = {
|
||||
"blobs": DDL_BLOBS,
|
||||
"blob_map": DDL_BLOB_MAP,
|
||||
}
|
||||
|
||||
SQL_DROP_LEFTOVER_BLOBS = (
|
||||
"DELETE FROM blobs WHERE sha256 IN ("
|
||||
" SELECT b.sha256"
|
||||
" FROM blobs b"
|
||||
" LEFT JOIN blob_map bm"
|
||||
" ON b.sha256 = bm.sha256"
|
||||
" WHERE bm.sha256 IS NULL)"
|
||||
)
|
||||
"""Delete blobs.sha256 (BLOBs) no longer in blob_map.sha256."""
|
||||
|
||||
SQL_ITER_BLOBS_SHA256_BYTES_C = (
|
||||
"SELECT b.sha256, b.bytes_c FROM blobs b"
|
||||
" JOIN blob_map bm "
|
||||
" ON b.sha256 = bm.sha256"
|
||||
" ORDER BY bm.m_time ASC"
|
||||
)
|
||||
|
||||
SQL_INSERT_BLOBS = (
|
||||
"INSERT INTO blobs (sha256, bytes_c, mime, data) VALUES (?, ?, ?, ?)"
|
||||
" ON CONFLICT (sha256) DO NOTHING"
|
||||
) # fmt: skip
|
||||
|
||||
SQL_INSERT_BLOB_MAP = (
|
||||
"INSERT INTO blob_map (sha256, resolver, authority) VALUES (?, ?, ?)"
|
||||
" ON CONFLICT DO UPDATE "
|
||||
" SET sha256=excluded.sha256, m_time=strftime('%s', 'now')"
|
||||
)
|
||||
|
||||
def __init__(self, cfg: FaviconCacheConfig):
|
||||
"""An instance of the favicon cache is build up from the configuration.""" #
|
||||
|
||||
if cfg.db_url == ":memory:":
|
||||
logger.critical("don't use SQLite DB in :memory: in production!!")
|
||||
super().__init__(cfg.db_url)
|
||||
self.cfg = cfg
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
|
||||
|
||||
sql = "SELECT sha256 FROM blob_map WHERE resolver = ? AND authority = ?"
|
||||
res = self.DB.execute(sql, (resolver, authority)).fetchone()
|
||||
if res is None:
|
||||
return None
|
||||
|
||||
data, mime = (None, None)
|
||||
sha256 = res[0]
|
||||
if sha256 == FALLBACK_ICON:
|
||||
return data, mime
|
||||
|
||||
sql = "SELECT data, mime FROM blobs WHERE sha256 = ?"
|
||||
res = self.DB.execute(sql, (sha256,)).fetchone()
|
||||
if res is not None:
|
||||
data, mime = res
|
||||
return data, mime
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
|
||||
if self.cfg.MAINTENANCE_MODE == "auto" and int(time.time()) > self.next_maintenance_time:
|
||||
# Should automatic maintenance be moved to a new thread?
|
||||
self.maintenance()
|
||||
|
||||
if data is not None and mime is None:
|
||||
logger.error(
|
||||
"favicon resolver %s tries to cache mime-type None for authority %s",
|
||||
resolver,
|
||||
authority,
|
||||
)
|
||||
return False
|
||||
|
||||
bytes_c = len(data or b"")
|
||||
if bytes_c > self.cfg.BLOB_MAX_BYTES:
|
||||
logger.info(
|
||||
"favicon of resolver: %s / authority: %s to big to cache (bytes: %s) " % (resolver, authority, bytes_c)
|
||||
)
|
||||
return False
|
||||
|
||||
if data is None:
|
||||
sha256 = FALLBACK_ICON
|
||||
else:
|
||||
sha256 = hashlib.sha256(data).hexdigest()
|
||||
|
||||
with self.connect() as conn:
|
||||
if sha256 != FALLBACK_ICON:
|
||||
conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data))
|
||||
conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority))
|
||||
|
||||
return True
|
||||
|
||||
@property
|
||||
def next_maintenance_time(self) -> int:
|
||||
"""Returns (unix epoch) time of the next maintenance."""
|
||||
|
||||
return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE")
|
||||
|
||||
def maintenance(self, force=False):
|
||||
|
||||
# Prevent parallel DB maintenance cycles from other DB connections
|
||||
# (e.g. in multi thread or process environments).
|
||||
|
||||
if not force and int(time.time()) < self.next_maintenance_time:
|
||||
logger.debug("no maintenance required yet, next maintenance interval is in the future")
|
||||
return
|
||||
self.properties.set("LAST_MAINTENANCE", "") # hint: this (also) sets the m_time of the property!
|
||||
|
||||
# do maintenance tasks
|
||||
|
||||
with self.connect() as conn:
|
||||
|
||||
# drop items not in HOLD time
|
||||
res = conn.execute(
|
||||
f"DELETE FROM blob_map"
|
||||
f" WHERE cast(m_time as integer) < cast(strftime('%s', 'now') as integer) - {self.cfg.HOLD_TIME}"
|
||||
)
|
||||
logger.debug("dropped %s obsolete blob_map items from db", res.rowcount)
|
||||
res = conn.execute(self.SQL_DROP_LEFTOVER_BLOBS)
|
||||
logger.debug("dropped %s obsolete BLOBS from db", res.rowcount)
|
||||
|
||||
# drop old items to be in LIMIT_TOTAL_BYTES
|
||||
total_bytes = conn.execute("SELECT SUM(bytes_c) FROM blobs").fetchone()[0] or 0
|
||||
if total_bytes > self.cfg.LIMIT_TOTAL_BYTES:
|
||||
|
||||
x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES
|
||||
c = 0
|
||||
sha_list = []
|
||||
for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C):
|
||||
sha256, bytes_c = row
|
||||
sha_list.append(sha256)
|
||||
c += bytes_c
|
||||
if c > x:
|
||||
break
|
||||
if sha_list:
|
||||
conn.execute("DELETE FROM blobs WHERE sha256 IN ('%s')" % "','".join(sha_list))
|
||||
conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list))
|
||||
logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c)
|
||||
|
||||
def _query_val(self, sql, default=None):
|
||||
val = self.DB.execute(sql).fetchone()
|
||||
if val is not None:
|
||||
val = val[0]
|
||||
if val is None:
|
||||
val = default
|
||||
return val
|
||||
|
||||
def state(self) -> FaviconCacheStats:
|
||||
return FaviconCacheStats(
|
||||
favicons=self._query_val("SELECT count(*) FROM blobs", 0),
|
||||
bytes=self._query_val("SELECT SUM(bytes_c) FROM blobs", 0),
|
||||
domains=self._query_val("SELECT count(*) FROM (SELECT authority FROM blob_map GROUP BY authority)", 0),
|
||||
resolvers=self._query_val("SELECT count(*) FROM (SELECT resolver FROM blob_map GROUP BY resolver)", 0),
|
||||
)
|
||||
|
||||
|
||||
class FaviconCacheMEM(FaviconCache):
|
||||
"""Favicon cache in process' memory. Its just a POC that stores the
|
||||
favicons in the memory of the process.
|
||||
|
||||
.. attention::
|
||||
|
||||
Don't use it in production, it will blow up your memory!!
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, cfg):
|
||||
|
||||
self.cfg = cfg
|
||||
self._data = {}
|
||||
self._sha_mime = {}
|
||||
|
||||
def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]:
|
||||
|
||||
sha, mime = self._sha_mime.get(f"{resolver}:{authority}", (None, None))
|
||||
if sha is None:
|
||||
return None
|
||||
data = self._data.get(sha)
|
||||
if data == FALLBACK_ICON:
|
||||
data = None
|
||||
return data, mime
|
||||
|
||||
def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
|
||||
|
||||
if data is None:
|
||||
data = FALLBACK_ICON
|
||||
mime = None
|
||||
|
||||
elif mime is None:
|
||||
logger.error(
|
||||
"favicon resolver %s tries to cache mime-type None for authority %s",
|
||||
resolver,
|
||||
authority,
|
||||
)
|
||||
return False
|
||||
|
||||
digest = hashlib.sha256(data).hexdigest()
|
||||
self._data[digest] = data
|
||||
self._sha_mime[f"{resolver}:{authority}"] = (digest, mime)
|
||||
return True
|
||||
|
||||
def state(self):
|
||||
return FaviconCacheStats(favicons=len(self._data.keys()))
|
||||
|
||||
def maintenance(self, force=False):
|
||||
pass
|
|
@ -0,0 +1,65 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
import msgspec
|
||||
|
||||
from .cache import FaviconCacheConfig
|
||||
from .proxy import FaviconProxyConfig
|
||||
|
||||
CONFIG_SCHEMA: int = 1
|
||||
"""Version of the configuration schema."""
|
||||
|
||||
TOML_CACHE_CFG: dict[str, "FaviconConfig"] = {}
|
||||
"""Cache config objects by TOML's filename."""
|
||||
|
||||
DEFAULT_CFG_TOML_PATH = pathlib.Path(__file__).parent / "favicons.toml"
|
||||
|
||||
|
||||
class FaviconConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||
"""The class aggregates configurations of the favicon tools"""
|
||||
|
||||
cfg_schema: int
|
||||
"""Config's schema version. The specification of the version of the schema
|
||||
is mandatory, currently only version :py:obj:`CONFIG_SCHEMA` is supported.
|
||||
By specifying a version, it is possible to ensure downward compatibility in
|
||||
the event of future changes to the configuration schema"""
|
||||
|
||||
cache: FaviconCacheConfig = msgspec.field(default_factory=FaviconCacheConfig)
|
||||
"""Setup of the :py:obj:`.cache.FaviconCacheConfig`."""
|
||||
|
||||
proxy: FaviconProxyConfig = msgspec.field(default_factory=FaviconProxyConfig)
|
||||
"""Setup of the :py:obj:`.proxy.FaviconProxyConfig`."""
|
||||
|
||||
@classmethod
|
||||
def from_toml_file(cls, cfg_file: pathlib.Path, use_cache: bool) -> "FaviconConfig":
|
||||
"""Create a config object from a TOML file, the ``use_cache`` argument
|
||||
specifies whether a cache should be used.
|
||||
"""
|
||||
|
||||
cached = TOML_CACHE_CFG.get(str(cfg_file))
|
||||
if use_cache and cached:
|
||||
return cached
|
||||
|
||||
with cfg_file.open("rb") as f:
|
||||
data = f.read()
|
||||
|
||||
cfg = msgspec.toml.decode(data, type=_FaviconConfig)
|
||||
schema = cfg.favicons.cfg_schema
|
||||
if schema != CONFIG_SCHEMA:
|
||||
raise ValueError(
|
||||
f"config schema version {CONFIG_SCHEMA} is needed, version {schema} is given in {cfg_file}"
|
||||
)
|
||||
|
||||
cfg = cfg.favicons
|
||||
if use_cache and cached:
|
||||
TOML_CACHE_CFG[str(cfg_file.resolve())] = cfg
|
||||
|
||||
return cfg
|
||||
|
||||
|
||||
class _FaviconConfig(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||
# wrapper struct for root object "favicons."
|
||||
favicons: FaviconConfig
|
|
@ -0,0 +1,25 @@
|
|||
[favicons]
|
||||
|
||||
cfg_schema = 1 # config's schema version no.
|
||||
|
||||
[favicons.proxy]
|
||||
|
||||
# max_age = 5184000 # 60 days / default: 7 days (604800 sec)
|
||||
|
||||
# [favicons.proxy.resolver_map]
|
||||
#
|
||||
# The available favicon resolvers are registered here.
|
||||
#
|
||||
# "duckduckgo" = "searx.favicons.resolvers.duckduckgo"
|
||||
# "allesedv" = "searx.favicons.resolvers.allesedv"
|
||||
# "google" = "searx.favicons.resolvers.google"
|
||||
# "yandex" = "searx.favicons.resolvers.yandex"
|
||||
|
||||
[favicons.cache]
|
||||
|
||||
# db_url = "/var/cache/searxng/faviconcache.db" # default: "/tmp/faviconcache.db"
|
||||
# HOLD_TIME = 5184000 # 60 days / default: 30 days
|
||||
# LIMIT_TOTAL_BYTES = 2147483648 # 2 GB / default: 50 MB
|
||||
# BLOB_MAX_BYTES = 40960 # 40 KB / default 20 KB
|
||||
# MAINTENANCE_MODE = "off" # default: "auto"
|
||||
# MAINTENANCE_PERIOD = 600 # 10min / default: 1h
|
|
@ -0,0 +1,237 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations for a favicon proxy"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Callable
|
||||
|
||||
import importlib
|
||||
import base64
|
||||
import pathlib
|
||||
import urllib.parse
|
||||
|
||||
import flask
|
||||
from httpx import HTTPError
|
||||
import msgspec
|
||||
|
||||
from searx import get_setting
|
||||
|
||||
from searx.webutils import new_hmac, is_hmac_of
|
||||
from searx.exceptions import SearxEngineResponseException
|
||||
|
||||
from .resolvers import DEFAULT_RESOLVER_MAP
|
||||
from . import cache
|
||||
|
||||
DEFAULT_FAVICON_URL = {}
|
||||
CFG: FaviconProxyConfig = None # type: ignore
|
||||
|
||||
|
||||
def init(cfg: FaviconProxyConfig):
|
||||
global CFG # pylint: disable=global-statement
|
||||
CFG = cfg
|
||||
|
||||
|
||||
def _initial_resolver_map():
|
||||
d = {}
|
||||
name: str = get_setting("search.favicon_resolver", None) # type: ignore
|
||||
if name:
|
||||
func = DEFAULT_RESOLVER_MAP.get(name)
|
||||
if func:
|
||||
d = {name: f"searx.favicons.resolvers.{func.__name__}"}
|
||||
return d
|
||||
|
||||
|
||||
class FaviconProxyConfig(msgspec.Struct):
|
||||
"""Configuration of the favicon proxy."""
|
||||
|
||||
max_age: int = 60 * 60 * 24 * 7 # seven days
|
||||
"""HTTP header Cache-Control_ ``max-age``
|
||||
|
||||
.. _Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
|
||||
"""
|
||||
|
||||
secret_key: str = get_setting("server.secret_key") # type: ignore
|
||||
"""By default, the value from :ref:`server.secret_key <settings server>`
|
||||
setting is used."""
|
||||
|
||||
resolver_timeout: int = get_setting("outgoing.request_timeout") # type: ignore
|
||||
"""Timeout which the resolvers should not exceed, is usually passed to the
|
||||
outgoing request of the resolver. By default, the value from
|
||||
:ref:`outgoing.request_timeout <settings outgoing>` setting is used."""
|
||||
|
||||
resolver_map: dict[str, str] = msgspec.field(default_factory=_initial_resolver_map)
|
||||
"""The resolver_map is a key / value dictionary where the key is the name of
|
||||
the resolver and the value is the fully qualifying name (fqn) of resolver's
|
||||
function (the callable). The resolvers from the python module
|
||||
:py:obj:`searx.favicons.resolver` are available by default."""
|
||||
|
||||
def get_resolver(self, name: str) -> Callable | None:
|
||||
"""Returns the callable object (function) of the resolver with the
|
||||
``name``. If no resolver is registered for the ``name``, ``None`` is
|
||||
returned.
|
||||
"""
|
||||
fqn = self.resolver_map.get(name)
|
||||
if fqn is None:
|
||||
return None
|
||||
mod_name, _, func_name = fqn.rpartition('.')
|
||||
mod = importlib.import_module(mod_name)
|
||||
func = getattr(mod, func_name)
|
||||
if func is None:
|
||||
raise ValueError(f"resolver {fqn} is not implemented")
|
||||
return func
|
||||
|
||||
favicon_path: str = get_setting("ui.static_path") + "/themes/{theme}/img/empty_favicon.svg" # type: ignore
|
||||
favicon_mime_type: str = "image/svg+xml"
|
||||
|
||||
def favicon(self, **replacements):
|
||||
"""Returns pathname and mimetype of the default favicon."""
|
||||
return (
|
||||
pathlib.Path(self.favicon_path.format(**replacements)),
|
||||
self.favicon_mime_type,
|
||||
)
|
||||
|
||||
def favicon_data_url(self, **replacements):
|
||||
"""Returns data image URL of the default favicon."""
|
||||
|
||||
cache_key = ", ".join(f"{x}:{replacements[x]}" for x in sorted(list(replacements.keys()), key=str))
|
||||
data_url = DEFAULT_FAVICON_URL.get(cache_key)
|
||||
if data_url is not None:
|
||||
return data_url
|
||||
|
||||
fav, mimetype = CFG.favicon(**replacements)
|
||||
# hint: encoding utf-8 limits favicons to be a SVG image
|
||||
with fav.open("r", encoding="utf-8") as f:
|
||||
data_url = f.read()
|
||||
|
||||
data_url = urllib.parse.quote(data_url)
|
||||
data_url = f"data:{mimetype};utf8,{data_url}"
|
||||
DEFAULT_FAVICON_URL[cache_key] = data_url
|
||||
return data_url
|
||||
|
||||
|
||||
def favicon_proxy():
|
||||
"""REST API of SearXNG's favicon proxy service
|
||||
|
||||
::
|
||||
|
||||
/favicon_proxy?authority=<...>&h=<...>
|
||||
|
||||
``authority``:
|
||||
Domain name :rfc:`3986` / see :py:obj:`favicon_url`
|
||||
|
||||
``h``:
|
||||
HMAC :rfc:`2104`, build up from the :ref:`server.secret_key <settings
|
||||
server>` setting.
|
||||
|
||||
"""
|
||||
authority = flask.request.args.get('authority')
|
||||
|
||||
# malformed request or RFC 3986 authority
|
||||
if not authority or "/" in authority:
|
||||
return '', 400
|
||||
|
||||
# malformed request / does not have authorisation
|
||||
if not is_hmac_of(
|
||||
CFG.secret_key,
|
||||
authority.encode(),
|
||||
flask.request.args.get('h', ''),
|
||||
):
|
||||
return '', 400
|
||||
|
||||
resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
|
||||
# if resolver is empty or not valid, just return HTTP 400.
|
||||
if not resolver or resolver not in CFG.resolver_map.keys():
|
||||
return "", 400
|
||||
|
||||
data, mime = search_favicon(resolver, authority)
|
||||
|
||||
if data is not None and mime is not None:
|
||||
resp = flask.Response(data, mimetype=mime) # type: ignore
|
||||
resp.headers['Cache-Control'] = f"max-age={CFG.max_age}"
|
||||
return resp
|
||||
|
||||
# return default favicon from static path
|
||||
theme = flask.request.preferences.get_value("theme") # type: ignore
|
||||
fav, mimetype = CFG.favicon(theme=theme)
|
||||
return flask.send_from_directory(fav.parent, fav.name, mimetype=mimetype)
|
||||
|
||||
|
||||
def search_favicon(resolver: str, authority: str) -> tuple[None | bytes, None | str]:
|
||||
"""Sends the request to the favicon resolver and returns a tuple for the
|
||||
favicon. The tuple consists of ``(data, mime)``, if the resolver has not
|
||||
determined a favicon, both values are ``None``.
|
||||
|
||||
``data``:
|
||||
Binary data of the favicon.
|
||||
|
||||
``mime``:
|
||||
Mime type of the favicon.
|
||||
|
||||
"""
|
||||
|
||||
data, mime = (None, None)
|
||||
|
||||
func = CFG.get_resolver(resolver)
|
||||
if func is None:
|
||||
return data, mime
|
||||
|
||||
# to avoid superfluous requests to the resolver, first look in the cache
|
||||
data_mime = cache.CACHE(resolver, authority)
|
||||
if data_mime is not None:
|
||||
return data_mime
|
||||
|
||||
try:
|
||||
data, mime = func(authority, timeout=CFG.resolver_timeout)
|
||||
if data is None or mime is None:
|
||||
data, mime = (None, None)
|
||||
|
||||
except (HTTPError, SearxEngineResponseException):
|
||||
pass
|
||||
|
||||
cache.CACHE.set(resolver, authority, mime, data)
|
||||
return data, mime
|
||||
|
||||
|
||||
def favicon_url(authority: str) -> str:
|
||||
"""Function to generate the image URL used for favicons in SearXNG's result
|
||||
lists. The ``authority`` argument (aka netloc / :rfc:`3986`) is usually a
|
||||
(sub-) domain name. This function is used in the HTML (jinja) templates.
|
||||
|
||||
.. code:: html
|
||||
|
||||
<div class="favicon">
|
||||
<img src="{{ favicon_url(result.parsed_url.netloc) }}">
|
||||
</div>
|
||||
|
||||
The returned URL is a route to :py:obj:`favicon_proxy` REST API.
|
||||
|
||||
If the favicon is already in the cache, the returned URL is a `data URL`_
|
||||
(something like ``data:image/png;base64,...``). By generating a data url from
|
||||
the :py:obj:`.cache.FaviconCache`, additional HTTP roundtripps via the
|
||||
:py:obj:`favicon_proxy` are saved. However, it must also be borne in mind
|
||||
that data urls are not cached in the client (web browser).
|
||||
|
||||
.. _data URL: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
|
||||
|
||||
"""
|
||||
|
||||
resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
|
||||
# if resolver is empty or not valid, just return nothing.
|
||||
if not resolver or resolver not in CFG.resolver_map.keys():
|
||||
return ""
|
||||
|
||||
data_mime = cache.CACHE(resolver, authority)
|
||||
|
||||
if data_mime == (None, None):
|
||||
# we have already checked, the resolver does not have a favicon
|
||||
theme = flask.request.preferences.get_value("theme") # type: ignore
|
||||
return CFG.favicon_data_url(theme=theme)
|
||||
|
||||
if data_mime is not None:
|
||||
data, mime = data_mime
|
||||
return f"data:{mime};base64,{str(base64.b64encode(data), 'utf-8')}" # type: ignore
|
||||
|
||||
h = new_hmac(CFG.secret_key, authority.encode())
|
||||
proxy_url = flask.url_for('favicon_proxy')
|
||||
query = urllib.parse.urlencode({"authority": authority, "h": h})
|
||||
return f"{proxy_url}?{query}"
|
|
@ -0,0 +1,100 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Implementations of the favicon *resolvers* that are available in the favicon
|
||||
proxy by default. A *resolver* is a function that obtains the favicon from an
|
||||
external source. The *resolver* function receives two arguments (``domain,
|
||||
timeout``) and returns a tuple ``(data, mime)``.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["DEFAULT_RESOLVER_MAP", "allesedv", "duckduckgo", "google", "yandex"]
|
||||
|
||||
from typing import Callable
|
||||
from searx import network
|
||||
from searx import logger
|
||||
|
||||
DEFAULT_RESOLVER_MAP: dict[str, Callable]
|
||||
logger = logger.getChild('favicons.resolvers')
|
||||
|
||||
|
||||
def _req_args(**kwargs):
|
||||
# add the request arguments from the searx.network
|
||||
d = {"raise_for_httperror": False}
|
||||
d.update(kwargs)
|
||||
return d
|
||||
|
||||
|
||||
def allesedv(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from allesedv.com / https://favicon.allesedv.com/"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://f1.allesedv.com/32/{domain}"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will just return a 200 regardless of the favicon existing or not
|
||||
# sometimes will be correct size, sometimes not
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
mime = response.headers['Content-Type']
|
||||
if mime != 'image/gif':
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def duckduckgo(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from duckduckgo.com / https://blog.jim-nielsen.com/2021/displaying-favicons-for-any-domain/"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://icons.duckduckgo.com/ip2/{domain}.ico"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
# api will respond with a 32x32 png image
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def google(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from google.com"""
|
||||
data, mime = (None, None)
|
||||
|
||||
# URL https://www.google.com/s2/favicons?sz=32&domain={domain}" will be
|
||||
# redirected (HTTP 301 Moved Permanently) to t1.gstatic.com/faviconV2:
|
||||
url = (
|
||||
f"https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL"
|
||||
f"&url=https://{domain}&size=32"
|
||||
)
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# will return a 404 if the favicon does not exist and a 200 if it does,
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200:
|
||||
# api will respond with a 32x32 png image
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
def yandex(domain: str, timeout: int) -> tuple[None | bytes, None | str]:
|
||||
"""Favicon Resolver from yandex.com"""
|
||||
data, mime = (None, None)
|
||||
url = f"https://favicon.yandex.net/favicon/{domain}"
|
||||
logger.debug("fetch favicon from: %s", url)
|
||||
|
||||
# api will respond with a 16x16 png image, if it doesn't exist, it will be a
|
||||
# 1x1 png image (70 bytes)
|
||||
response = network.get(url, **_req_args(timeout=timeout))
|
||||
if response and response.status_code == 200 and len(response.content) > 70:
|
||||
mime = response.headers['Content-Type']
|
||||
data = response.content
|
||||
return data, mime
|
||||
|
||||
|
||||
DEFAULT_RESOLVER_MAP = {
|
||||
"allesedv": allesedv,
|
||||
"duckduckgo": duckduckgo,
|
||||
"google": google,
|
||||
"yandex": yandex,
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
# A propos de SearXNG
|
||||
|
||||
SearXNG est un [Métamoteur] qui agrège les résultats d'autres
|
||||
{{link('moteurs de recherche', 'preferences')}} tout en ne sauvegardant
|
||||
aucune informations à propos de ses utilisateurs.
|
||||
|
||||
Le projet SearXNG est maintenu par une communauté ouverte.
|
||||
Rejoignez-nous sur Matrix si vous avez des questions ou simplement pour
|
||||
discuter de SearXNG: [#searxng:matrix.org].
|
||||
|
||||
Aidez-nous à rendre SearXNG meilleur.
|
||||
|
||||
- Vous pouvez améliorer les traductions de SearXNG avec l'outil
|
||||
[Weblate].
|
||||
- Suivez le développement, contribuez au projet ou remontez des erreurs
|
||||
en utilisant le [dépôt de sources].
|
||||
- Pour obtenir de plus amples informations, consultez la documentation
|
||||
en ligne du [projet SearXNG].
|
||||
|
||||
## Pourquoi l'utiliser ?
|
||||
|
||||
- SearXNG ne vous fournira pas de résultats aussi personnalisés que
|
||||
Google, mais il ne générera pas non plus de suivi sur vous.
|
||||
- SearXNG ne se soucis pas des recherches que vous faites, ne partage
|
||||
aucune information avec des tiers et ne peut pas être utilisé contre
|
||||
vous.
|
||||
- SearXNG est un logiciel libre. Son code source est 100% ouvert et tout
|
||||
le mode est encouragé à l'améliorer.
|
||||
|
||||
Si vous êtes soucieux du respect de la vie privée et des libertés sur
|
||||
Internet, faites de SearXNG votre moteur de recherche par défaut. Vous
|
||||
pouvez aussi installer et utiliser SearXNG sur votre propre serveur.
|
||||
|
||||
## Comment le configurer comme moteur de recherche par défaut ?
|
||||
|
||||
SearXNG prend en charge [OpenSearch]. Pour plus d'informations sur la
|
||||
manière de modifier votre moteur de recherche par défaut, veuillez
|
||||
consulter la documentation de votre navigateur :
|
||||
|
||||
- [Firefox]
|
||||
- [Microsoft Edge] - Ce lien propose aussi les instructions pour les
|
||||
navigateurs Chrome et Safari.
|
||||
- Les navigateurs basés sur [Chromium] permettent d'ajouter des sites de
|
||||
navigation sans même y accéder.
|
||||
|
||||
Lorsqu'un moteur de recherche est ajouté, son nom doit être unique. Si
|
||||
vous ne pouvez pas ajouter un moteur de recherche, veuillez :
|
||||
|
||||
- Supprimer le doublon (le nom par défaut est SearXNG) ou bien
|
||||
- Contacter le propriétaire de l'instance que vous souhaitez utiliser
|
||||
afin qu'il modifie le nom de celle-ci.
|
||||
|
||||
## Comment ça marche ?
|
||||
|
||||
SearXNG est une reprise logicielle du projet [searx] [Métamoteur],
|
||||
lui-même inspiré du [projet Seeks]. Il assure la confidentialité en
|
||||
mélangeant vos recherches vers d'autres plateformes sans stocker aucune
|
||||
données de recherche. SearXNG peut être ajouté à la barre de recherche
|
||||
de votre navigateur et même être utilisé comme moteur de recherche par
|
||||
défaut.
|
||||
|
||||
Le lien "{{link('statistiques des moteurs', 'stats')}}" présente des
|
||||
informations anonymisées concernant l'utilisation des divers moteurs de
|
||||
recherche.
|
||||
|
||||
## Comment reprendre la main ?
|
||||
|
||||
SearXNG apprécie votre préoccupation concernant les traces de recherche.
|
||||
N'hésitez pas à utiliser le [dépôt de sources] et à maintenir votre
|
||||
propre instance de recherche.
|
||||
|
||||
Ajouter votre instance à la [liste d'instances
|
||||
publiques]({{get_setting('brand.public_instances')}}) afin d'aider
|
||||
d'autres personnes à protéger leur vie privée et rendre l'Internet plus
|
||||
libre. Plus Internet sera décentralisé, plus nous aurons de liberté !
|
||||
|
||||
[dépôt de sources]: {{GIT_URL}}
|
||||
[#searxng:matrix.org]: https://matrix.to/#/#searxng:matrix.org
|
||||
[projet SearXNG]: {{get_setting('brand.docs_url')}}
|
||||
[searx]: https://github.com/searx/searx
|
||||
[Métamoteur]: https://fr.wikipedia.org/wiki/M%C3%A9tamoteur
|
||||
[Weblate]: https://translate.codeberg.org/projects/searxng/
|
||||
[projet Seeks]: https://beniz.github.io/seeks/
|
||||
[OpenSearch]: https://github.com/dewitt/opensearch/blob/master/opensearch-1-1-draft-6.md
|
||||
[Firefox]: https://support.mozilla.org/en-US/kb/add-or-remove-search-engine-firefox
|
||||
[Microsoft Edge]: https://support.microsoft.com/en-us/help/4028574/microsoft-edge-change-the-default-search-engine
|
||||
[Chromium]: https://www.chromium.org/tab-to-search
|
|
@ -0,0 +1,97 @@
|
|||
# Syntaxe de recherche
|
||||
|
||||
SearXNG permet de modifier les catégories de recherche, les moteurs
|
||||
utilisés ou encore la langue de recherche par l'intermédiaire d'une
|
||||
syntaxe dédiée. La liste des moteurs de recherche, de catégories et de
|
||||
langues disponibles est accessible depuis la page de
|
||||
{{link('préférences', 'preferences')}}.
|
||||
|
||||
## `!` Spécifier un moteur ou une catégorie
|
||||
|
||||
Pour restreindre la recherche à un moteur ou une catégorie, utilisez le
|
||||
caractère "!". Voici quelques exemples d'utilisation :
|
||||
|
||||
- Rechercher **paris** sur Wikipédia.
|
||||
|
||||
- {{search('!wp paris')}}
|
||||
- {{search('!wikipedia paris')}}
|
||||
|
||||
- Rechercher **paris** dans la catégorie **Carte**.
|
||||
|
||||
- {{search('!map paris')}}
|
||||
|
||||
- Rechercher des **Images**.
|
||||
|
||||
- {{search('!images Wau Holland')}}
|
||||
|
||||
Les abréviations de moteurs et de langues sont aussi valides. Il est
|
||||
possible d'accumuler les moteurs et catégories dans une requête
|
||||
complexe. Par exemple, {{search('!map !ddg !wp paris')}} recherchera
|
||||
**paris** dans la catégorie **Carte** de DuckDuckGo et Wikipédia.
|
||||
|
||||
## `:` Spécifier une langue
|
||||
|
||||
Utilisez le préfixe ":" pour limiter la recherche à une langue en
|
||||
particulier. Par exemple :
|
||||
|
||||
- Rechercher dans les pages françaises de Wikipédia.
|
||||
|
||||
- {{search(':fr !wp Wau Holland')}}
|
||||
|
||||
## `!!<bang>` Recherches externes (!Bang)
|
||||
|
||||
SearXNG supporte les recherches [DuckDuckGo] de type "!Bang". Utilisez
|
||||
le préfixe "!!" pour être automatiquement redirigé vers un moteur de
|
||||
recherche externe. Par exemple :
|
||||
|
||||
- Rechercher sur Wikipédia en langue française.
|
||||
|
||||
- {{search('!!wfr Wau Holland')}}
|
||||
|
||||
Prenez garde au fait que de telles recherches sont exécutées directement
|
||||
sur le moteur externe. Dans ce cas, SearXNG ne peut pas protéger votre
|
||||
vie privée.
|
||||
|
||||
[DuckDuckGo]: https://duckduckgo.com/bang
|
||||
|
||||
## `!!` Redirection automatique
|
||||
|
||||
En utilisant "!!" suivi d'un ou plusieurs espaces lors de votre
|
||||
recherche, vous serez automatiquement redirigé vers le premier résultat
|
||||
de recherche. Cela correspondant au fonctionnement "J'ai de la chance"
|
||||
du moteur Google. Par exemple :
|
||||
|
||||
- Rechercher et être redirigé directement vers le premier lien
|
||||
correspondant.
|
||||
|
||||
- {{search('!! Wau Holland')}}
|
||||
|
||||
Prenez garde au fait qu'aucune vérification ne peut être faite
|
||||
concernant le premier lien retourné. Il pourrait même s'agir d'un site
|
||||
dangereux. Dans ce cas, SearXNG ne peut pas protéger votre vie
|
||||
privée. Soyez prudent en utilisant cette fonctionnalité.
|
||||
|
||||
## Requêtes spéciales
|
||||
|
||||
Dans la section _requêtes spéciales_ de la page de {{link('préférences',
|
||||
'preferences')}} se trouve une liste de mots clés à usage particulier.
|
||||
Par exemple :
|
||||
|
||||
- Générer une valeur aléatoire.
|
||||
|
||||
- {{search('random uuid')}}
|
||||
|
||||
- Calculer une moyenne.
|
||||
|
||||
- {{search('avg 123 548 2.04 24.2')}}
|
||||
|
||||
- Afficher la valeur de la variable _User-Agent_ utilisée par votre
|
||||
navigateur (doit être activé manuellement).
|
||||
|
||||
- {{search('user-agent')}}
|
||||
|
||||
- Convertir une chaîne de caractères en valeurs de hachage ("hash digests")
|
||||
(doit être activé manuellement).
|
||||
|
||||
- {{search('md5 lorem ipsum')}}
|
||||
- {{search('sha512 lorem ipsum')}}
|
|
@ -128,9 +128,6 @@ _INSTALLED = False
|
|||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
||||
"""Base configuration (schema) of the botdetection."""
|
||||
|
||||
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
|
||||
"""Local Limiter configuration."""
|
||||
|
||||
CFG_DEPRECATED = {
|
||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
||||
}
|
||||
|
@ -138,8 +135,12 @@ CFG_DEPRECATED = {
|
|||
|
||||
def get_cfg() -> config.Config:
|
||||
global CFG # pylint: disable=global-statement
|
||||
|
||||
if CFG is None:
|
||||
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
|
||||
from . import settings_loader # pylint: disable=import-outside-toplevel
|
||||
|
||||
cfg_file = (settings_loader.get_user_cfg_folder() or Path("/etc/searxng")) / "limiter.toml"
|
||||
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, cfg_file, CFG_DEPRECATED)
|
||||
return CFG
|
||||
|
||||
|
||||
|
|
|
@ -152,7 +152,7 @@ def locales_initialize():
|
|||
def region_tag(locale: babel.Locale) -> str:
|
||||
"""Returns SearXNG's region tag from the locale (e.g. zh-TW , en-US)."""
|
||||
if not locale.territory:
|
||||
raise ValueError('%s missed a territory')
|
||||
raise ValueError('babel.Locale %s: missed a territory' % locale)
|
||||
return locale.language + '-' + locale.territory
|
||||
|
||||
|
||||
|
|
|
@ -11,16 +11,12 @@ from typing import Any, Dict
|
|||
import httpx
|
||||
from httpx_socks import AsyncProxyTransport
|
||||
from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError
|
||||
import uvloop
|
||||
|
||||
from searx import logger
|
||||
|
||||
# Optional uvloop (support Python 3.6)
|
||||
try:
|
||||
import uvloop
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
uvloop.install()
|
||||
|
||||
uvloop.install()
|
||||
|
||||
|
||||
logger = logger.getChild('searx.network.client')
|
||||
|
|
|
@ -3,19 +3,27 @@
|
|||
"""
|
||||
|
||||
import ast
|
||||
import re
|
||||
import operator
|
||||
from multiprocessing import Process, Queue
|
||||
from typing import Callable
|
||||
|
||||
import flask
|
||||
import babel
|
||||
from flask_babel import gettext
|
||||
from searx import settings
|
||||
|
||||
from searx.plugins import logger
|
||||
|
||||
name = "Basic Calculator"
|
||||
description = gettext("Calculate mathematical expressions via the search bar")
|
||||
default_on = False
|
||||
default_on = True
|
||||
|
||||
preference_section = 'general'
|
||||
plugin_id = 'calculator'
|
||||
|
||||
operators = {
|
||||
logger = logger.getChild(plugin_id)
|
||||
|
||||
operators: dict[type, Callable] = {
|
||||
ast.Add: operator.add,
|
||||
ast.Sub: operator.sub,
|
||||
ast.Mult: operator.mul,
|
||||
|
@ -35,11 +43,15 @@ def _eval_expr(expr):
|
|||
>>> _eval_expr('1 + 2*3**(4^5) / (6 + -7)')
|
||||
-5.0
|
||||
"""
|
||||
return _eval(ast.parse(expr, mode='eval').body)
|
||||
try:
|
||||
return _eval(ast.parse(expr, mode='eval').body)
|
||||
except ZeroDivisionError:
|
||||
# This is undefined
|
||||
return ""
|
||||
|
||||
|
||||
def _eval(node):
|
||||
if isinstance(node, ast.Constant) and isinstance(node.value, int):
|
||||
if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
|
||||
return node.value
|
||||
|
||||
if isinstance(node, ast.BinOp):
|
||||
|
@ -51,10 +63,31 @@ def _eval(node):
|
|||
raise TypeError(node)
|
||||
|
||||
|
||||
def timeout_func(timeout, func, *args, **kwargs):
|
||||
|
||||
def handler(q: Queue, func, args, **kwargs): # pylint:disable=invalid-name
|
||||
try:
|
||||
q.put(func(*args, **kwargs))
|
||||
except:
|
||||
q.put(None)
|
||||
raise
|
||||
|
||||
que = Queue()
|
||||
p = Process(target=handler, args=(que, func, args), kwargs=kwargs)
|
||||
p.start()
|
||||
p.join(timeout=timeout)
|
||||
ret_val = None
|
||||
if not p.is_alive():
|
||||
ret_val = que.get()
|
||||
else:
|
||||
logger.debug("terminate function after timeout is exceeded")
|
||||
p.terminate()
|
||||
p.join()
|
||||
p.close()
|
||||
return ret_val
|
||||
|
||||
|
||||
def post_search(_request, search):
|
||||
# don't run on public instances due to possible attack surfaces
|
||||
if settings['server']['public_instance']:
|
||||
return True
|
||||
|
||||
# only show the result of the expression on the first page
|
||||
if search.search_query.pageno > 1:
|
||||
|
@ -68,21 +101,30 @@ def post_search(_request, search):
|
|||
# replace commonly used math operators with their proper Python operator
|
||||
query = query.replace("x", "*").replace(":", "/")
|
||||
|
||||
# use UI language
|
||||
ui_locale = babel.Locale.parse(flask.request.preferences.get_value('locale'), sep='-')
|
||||
|
||||
# parse the number system in a localized way
|
||||
def _decimal(match: re.Match) -> str:
|
||||
val = match.string[match.start() : match.end()]
|
||||
val = babel.numbers.parse_decimal(val, ui_locale, numbering_system="latn")
|
||||
return str(val)
|
||||
|
||||
decimal = ui_locale.number_symbols["latn"]["decimal"]
|
||||
group = ui_locale.number_symbols["latn"]["group"]
|
||||
query = re.sub(f"[0-9]+[{decimal}|{group}][0-9]+[{decimal}|{group}]?[0-9]?", _decimal, query)
|
||||
|
||||
# only numbers and math operators are accepted
|
||||
if any(str.isalpha(c) for c in query):
|
||||
return True
|
||||
|
||||
# in python, powers are calculated via **
|
||||
query_py_formatted = query.replace("^", "**")
|
||||
try:
|
||||
result = str(_eval_expr(query_py_formatted))
|
||||
if result != query:
|
||||
search.result_container.answers['calculate'] = {'answer': f"{query} = {result}"}
|
||||
except (TypeError, SyntaxError, ArithmeticError):
|
||||
pass
|
||||
|
||||
# Prevent the runtime from being longer than 50 ms
|
||||
result = timeout_func(0.05, _eval_expr, query_py_formatted)
|
||||
if result is None or result == "":
|
||||
return True
|
||||
result = babel.numbers.format_decimal(result, locale=ui_locale)
|
||||
search.result_container.answers['calculate'] = {'answer': f"{search.search_query.query} = {result}"}
|
||||
return True
|
||||
|
||||
|
||||
def is_allowed():
|
||||
return not settings['server']['public_instance']
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=missing-module-docstring
|
||||
|
||||
from flask_babel import gettext
|
||||
from searx.plugins import logger
|
||||
|
||||
name = gettext('Hostname replace')
|
||||
description = "Deprecated / contact system admin to configure 'Hostnames plugin'!!"
|
||||
default_on = False
|
||||
preference_section = 'general'
|
||||
|
||||
plugin_id = 'hostname_replace'
|
||||
logger = logger.getChild(plugin_id)
|
||||
|
||||
REPORTED = False
|
||||
|
||||
|
||||
def deprecated_msg():
|
||||
global REPORTED # pylint: disable=global-statement
|
||||
if REPORTED:
|
||||
return
|
||||
logger.error(
|
||||
"'Hostname replace' plugin is deprecated and will be dropped soon!"
|
||||
" Configure 'Hostnames plugin':"
|
||||
" https://docs.searxng.org/src/searx.plugins.hostnames.html"
|
||||
)
|
||||
REPORTED = True
|
||||
|
||||
|
||||
def on_result(_request, _search, result):
|
||||
# pylint: disable=import-outside-toplevel, cyclic-import
|
||||
from searx.plugins.hostnames import on_result as hostnames_on_result
|
||||
|
||||
deprecated_msg()
|
||||
return hostnames_on_result(_request, _search, result)
|
|
@ -1,17 +1,19 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=too-many-branches
|
||||
"""In addition to rewriting/replace reslut URLs, the *hoostnames* plugin offers
|
||||
other features.
|
||||
|
||||
"""
|
||||
.. attention::
|
||||
|
||||
The 'Hostnames plugin' from `PR-3463
|
||||
<https://github.com/searxng/searxng/pull/3463>`_ is a rewrite of the
|
||||
'Hostname replace' plugin. Backwards compatibility is guaranteed for a
|
||||
transitional period, but this will end soon.
|
||||
The **"Hostname replace"** plugin has been replace by **"Hostnames
|
||||
plugin"**, see :pull:`3463` & :pull:`3552`.
|
||||
|
||||
**To maintainers of SearXNG instances, please modify your old plugin config
|
||||
to the new.**
|
||||
The **Hostnames plugin** can be enabled by adding it to the
|
||||
``enabled_plugins`` **list** in the ``setting.yml`` like so.
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
enabled_plugins:
|
||||
- 'Hostnames plugin'
|
||||
...
|
||||
|
||||
- ``hostnames.replace``: A **mapping** of regular expressions to hostnames to be
|
||||
replaced by other hostnames.
|
||||
|
@ -96,7 +98,7 @@ from flask_babel import gettext
|
|||
|
||||
from searx import settings
|
||||
from searx.plugins import logger
|
||||
from searx.settings_loader import get_yaml_file
|
||||
from searx.settings_loader import get_yaml_cfg
|
||||
|
||||
name = gettext('Hostnames plugin')
|
||||
description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname')
|
||||
|
@ -118,7 +120,7 @@ def _load_regular_expressions(settings_key):
|
|||
|
||||
# load external file with configuration
|
||||
if isinstance(setting_value, str):
|
||||
setting_value = get_yaml_file(setting_value)
|
||||
setting_value = get_yaml_cfg(setting_value)
|
||||
|
||||
if isinstance(setting_value, list):
|
||||
return {re.compile(r) for r in setting_value}
|
||||
|
@ -129,29 +131,8 @@ def _load_regular_expressions(settings_key):
|
|||
return {}
|
||||
|
||||
|
||||
# compatibility fallback for old hostname replace plugin
|
||||
# TODO: remove in the future once most/all instance maintainers finished migrating # pylint: disable=fixme
|
||||
def _load_regular_expressions_with_fallback(settings_key):
|
||||
expressions = _load_regular_expressions(settings_key)
|
||||
if expressions:
|
||||
return expressions
|
||||
|
||||
# fallback to the old `hostname_replace` settings format
|
||||
# pylint: disable=import-outside-toplevel, cyclic-import
|
||||
hostname_replace_config = settings.get('hostname_replace', {})
|
||||
if hostname_replace_config:
|
||||
from searx.plugins.hostname_replace import deprecated_msg
|
||||
|
||||
deprecated_msg()
|
||||
|
||||
if settings_key == 'replace':
|
||||
return {re.compile(p): r for (p, r) in hostname_replace_config.items() if r}
|
||||
|
||||
return {re.compile(p) for (p, r) in hostname_replace_config.items() if not r}
|
||||
|
||||
|
||||
replacements = _load_regular_expressions_with_fallback('replace')
|
||||
removables = _load_regular_expressions_with_fallback('remove')
|
||||
replacements = _load_regular_expressions('replace')
|
||||
removables = _load_regular_expressions('remove')
|
||||
high_priority = _load_regular_expressions('high_priority')
|
||||
low_priority = _load_regular_expressions('low_priority')
|
||||
|
||||
|
@ -163,10 +144,10 @@ def _matches_parsed_url(result, pattern):
|
|||
def on_result(_request, _search, result):
|
||||
for pattern, replacement in replacements.items():
|
||||
if _matches_parsed_url(result, pattern):
|
||||
logger.debug(result['url'])
|
||||
# logger.debug(result['url'])
|
||||
result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
|
||||
result['url'] = urlunparse(result[parsed])
|
||||
logger.debug(result['url'])
|
||||
# logger.debug(result['url'])
|
||||
|
||||
for url_field in _url_fields:
|
||||
if not result.get(url_field):
|
||||
|
|
|
@ -28,5 +28,5 @@ def post_search(request, search):
|
|||
search.result_container.answers['ip'] = {'answer': gettext('Your IP is: ') + ip}
|
||||
elif ua_regex.match(search.search_query.query):
|
||||
ua = request.user_agent
|
||||
search.result_container.answers['user-agent'] = {'answer': gettext('Your user-agent is: ') + ua}
|
||||
search.result_container.answers['user-agent'] = {'answer': gettext('Your user-agent is: ') + ua.string}
|
||||
return True
|
||||
|
|
|
@ -13,7 +13,7 @@ from collections import OrderedDict
|
|||
import flask
|
||||
import babel
|
||||
|
||||
from searx import settings, autocomplete
|
||||
from searx import settings, autocomplete, favicons
|
||||
from searx.enginelib import Engine
|
||||
from searx.plugins import Plugin
|
||||
from searx.locales import LOCALE_NAMES
|
||||
|
@ -406,6 +406,11 @@ class Preferences:
|
|||
locked=is_locked('autocomplete'),
|
||||
choices=list(autocomplete.backends.keys()) + ['']
|
||||
),
|
||||
'favicon_resolver': EnumStringSetting(
|
||||
settings['search']['favicon_resolver'],
|
||||
locked=is_locked('favicon_resolver'),
|
||||
choices=list(favicons.proxy.CFG.resolver_map.keys()) + ['']
|
||||
),
|
||||
'image_proxy': BooleanSetting(
|
||||
settings['server']['image_proxy'],
|
||||
locked=is_locked('image_proxy')
|
||||
|
@ -441,7 +446,7 @@ class Preferences:
|
|||
'simple_style': EnumStringSetting(
|
||||
settings['ui']['theme_args']['simple_style'],
|
||||
locked=is_locked('simple_style'),
|
||||
choices=['', 'auto', 'light', 'dark']
|
||||
choices=['', 'auto', 'light', 'dark', 'black']
|
||||
),
|
||||
'center_alignment': BooleanSetting(
|
||||
settings['ui']['center_alignment'],
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# pylint: disable=invalid-name, missing-module-docstring, missing-class-docstring
|
||||
|
||||
from __future__ import annotations
|
||||
from abc import abstractmethod, ABC
|
||||
import re
|
||||
|
||||
|
@ -258,7 +259,7 @@ class RawTextQuery:
|
|||
FeelingLuckyParser, # redirect to the first link in the results list
|
||||
]
|
||||
|
||||
def __init__(self, query, disabled_engines):
|
||||
def __init__(self, query: str, disabled_engines: list):
|
||||
assert isinstance(query, str)
|
||||
# input parameters
|
||||
self.query = query
|
||||
|
|
|
@ -9,7 +9,6 @@ from typing import List, NamedTuple, Set
|
|||
from urllib.parse import urlparse, unquote
|
||||
|
||||
from searx import logger
|
||||
from searx import utils
|
||||
from searx.engines import engines
|
||||
from searx.metrics import histogram_observe, counter_add, count_error
|
||||
|
||||
|
@ -366,9 +365,9 @@ class ResultContainer:
|
|||
result['score'] = result_score(result, result.get('priority'))
|
||||
# removing html content and whitespace duplications
|
||||
if result.get('content'):
|
||||
result['content'] = utils.html_to_text(result['content']).strip()
|
||||
result['content'] = result['content'].strip()
|
||||
if result.get('title'):
|
||||
result['title'] = ' '.join(utils.html_to_text(result['title']).strip().split())
|
||||
result['title'] = ' '.join(result['title'].strip().split())
|
||||
|
||||
for result_engine in result['engines']:
|
||||
counter_add(result['score'], 'engine', result_engine, 'score')
|
||||
|
|
|
@ -23,7 +23,7 @@ def name_to_iso4217(name):
|
|||
currency = CURRENCIES['names'].get(name, [name])
|
||||
if isinstance(currency, str):
|
||||
return currency
|
||||
return currency[0]
|
||||
return currency[-1]
|
||||
|
||||
|
||||
def iso4217_to_name(iso4217, language):
|
||||
|
|
|
@ -55,6 +55,7 @@ STYLE_NAMES = {
|
|||
'AUTO': 'auto',
|
||||
'LIGHT': 'light',
|
||||
'DARK': 'dark',
|
||||
'BLACK': 'black',
|
||||
}
|
||||
|
||||
BRAND_CUSTOM_LINKS = {
|
||||
|
|
|
@ -35,6 +35,9 @@ search:
|
|||
autocomplete: ""
|
||||
# minimun characters to type before autocompleter starts
|
||||
autocomplete_min: 4
|
||||
# backend for the favicon near URL in search results.
|
||||
# Available resolvers: "allesedv", "duckduckgo", "google", "yandex" - leave blank to turn it off by default.
|
||||
favicon_resolver: ""
|
||||
# Default search language - leave blank to detect from browser information or
|
||||
# use codes from 'languages.py'
|
||||
default_lang: "auto"
|
||||
|
@ -219,13 +222,13 @@ outgoing:
|
|||
#
|
||||
# enabled_plugins:
|
||||
# # these plugins are enabled if nothing is configured ..
|
||||
# - 'Basic Calculator'
|
||||
# - 'Hash plugin'
|
||||
# - 'Self Information'
|
||||
# - 'Tracker URL remover'
|
||||
# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
|
||||
# # these plugins are disabled if nothing is configured ..
|
||||
# - 'Hostnames plugin' # see 'hostnames' configuration below
|
||||
# - 'Basic Calculator'
|
||||
# - 'Open Access DOI rewrite'
|
||||
# - 'Tor check plugin'
|
||||
# # Read the docs before activate: auto-detection of the language could be
|
||||
|
@ -325,6 +328,11 @@ engines:
|
|||
shortcut: 9g
|
||||
disabled: true
|
||||
|
||||
- name: alpine linux packages
|
||||
engine: alpinelinux
|
||||
disabled: true
|
||||
shortcut: alp
|
||||
|
||||
- name: annas archive
|
||||
engine: annas_archive
|
||||
disabled: true
|
||||
|
@ -477,6 +485,23 @@ engines:
|
|||
# to show premium or plus results too:
|
||||
# skip_premium: false
|
||||
|
||||
- name: cloudflareai
|
||||
engine: cloudflareai
|
||||
shortcut: cfai
|
||||
# get api token and accont id from https://developers.cloudflare.com/workers-ai/get-started/rest-api/
|
||||
cf_account_id: 'your_cf_accout_id'
|
||||
cf_ai_api: 'your_cf_api'
|
||||
# create your ai gateway by https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/
|
||||
cf_ai_gateway: 'your_cf_ai_gateway_name'
|
||||
# find the model name from https://developers.cloudflare.com/workers-ai/models/#text-generation
|
||||
cf_ai_model: 'ai_model_name'
|
||||
# custom your preferences
|
||||
# cf_ai_model_display_name: 'Cloudflare AI'
|
||||
# cf_ai_model_assistant: 'prompts_for_assistant_role'
|
||||
# cf_ai_model_system: 'prompts_for_system_role'
|
||||
timeout: 30
|
||||
disabled: true
|
||||
|
||||
# - name: core.ac.uk
|
||||
# engine: core
|
||||
# categories: science
|
||||
|
@ -557,33 +582,6 @@ engines:
|
|||
categories: general
|
||||
shortcut: cc
|
||||
|
||||
- name: bahnhof
|
||||
engine: json_engine
|
||||
search_url: https://www.bahnhof.de/api/stations/search/{query}
|
||||
url_prefix: https://www.bahnhof.de/
|
||||
url_query: slug
|
||||
title_query: name
|
||||
content_query: state
|
||||
shortcut: bf
|
||||
disabled: true
|
||||
about:
|
||||
website: https://www.bahn.de
|
||||
wikidata_id: Q22811603
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
language: de
|
||||
tests:
|
||||
bahnhof:
|
||||
matrix:
|
||||
query: berlin
|
||||
lang: en
|
||||
result_container:
|
||||
- not_empty
|
||||
- ['one_title_contains', 'Berlin Hauptbahnhof']
|
||||
test:
|
||||
- unique_results
|
||||
|
||||
- name: deezer
|
||||
engine: deezer
|
||||
shortcut: dz
|
||||
|
@ -618,6 +616,24 @@ engines:
|
|||
shortcut: dh
|
||||
categories: [it, packages]
|
||||
|
||||
- name: encyclosearch
|
||||
engine: json_engine
|
||||
shortcut: es
|
||||
categories: general
|
||||
paging: true
|
||||
search_url: https://encyclosearch.org/encyclosphere/search?q={query}&page={pageno}&resultsPerPage=15
|
||||
results_query: Results
|
||||
url_query: SourceURL
|
||||
title_query: Title
|
||||
content_query: Description
|
||||
disabled: true
|
||||
about:
|
||||
website: https://encyclosearch.org
|
||||
official_api_documentation: https://encyclosearch.org/docs/#/rest-api
|
||||
use_official_api: true
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: erowid
|
||||
engine: xpath
|
||||
paging: true
|
||||
|
@ -792,6 +808,11 @@ engines:
|
|||
timeout: 8.0
|
||||
disabled: true
|
||||
|
||||
- name: geizhals
|
||||
engine: geizhals
|
||||
shortcut: geiz
|
||||
disabled: true
|
||||
|
||||
- name: genius
|
||||
engine: genius
|
||||
shortcut: gen
|
||||
|
@ -806,24 +827,21 @@ engines:
|
|||
timeout: 10
|
||||
|
||||
- name: gitlab
|
||||
engine: json_engine
|
||||
paging: true
|
||||
search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno}
|
||||
url_query: web_url
|
||||
title_query: name_with_namespace
|
||||
content_query: description
|
||||
page_size: 20
|
||||
categories: [it, repos]
|
||||
engine: gitlab
|
||||
base_url: https://gitlab.com
|
||||
shortcut: gl
|
||||
timeout: 10.0
|
||||
disabled: true
|
||||
about:
|
||||
website: https://about.gitlab.com/
|
||||
website: https://gitlab.com/
|
||||
wikidata_id: Q16639197
|
||||
official_api_documentation: https://docs.gitlab.com/ee/api/
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
# - name: gnome
|
||||
# engine: gitlab
|
||||
# base_url: https://gitlab.gnome.org
|
||||
# shortcut: gn
|
||||
# about:
|
||||
# website: https://gitlab.gnome.org
|
||||
# wikidata_id: Q44316
|
||||
|
||||
- name: github
|
||||
engine: github
|
||||
|
@ -902,26 +920,6 @@ engines:
|
|||
shortcut: mi
|
||||
disabled: true
|
||||
|
||||
- name: gpodder
|
||||
engine: json_engine
|
||||
shortcut: gpod
|
||||
timeout: 4.0
|
||||
paging: false
|
||||
search_url: https://gpodder.net/search.json?q={query}
|
||||
url_query: url
|
||||
title_query: title
|
||||
content_query: description
|
||||
page_size: 19
|
||||
categories: music
|
||||
disabled: true
|
||||
about:
|
||||
website: https://gpodder.net
|
||||
wikidata_id: Q3093354
|
||||
official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/
|
||||
use_official_api: false
|
||||
requires_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: habrahabr
|
||||
engine: xpath
|
||||
paging: true
|
||||
|
@ -1285,6 +1283,12 @@ engines:
|
|||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: openlibrary
|
||||
engine: openlibrary
|
||||
shortcut: ol
|
||||
timeout: 5
|
||||
disabled: true
|
||||
|
||||
- name: openmeteo
|
||||
engine: open_meteo
|
||||
shortcut: om
|
||||
|
@ -1547,6 +1551,24 @@ engines:
|
|||
page_size: 25
|
||||
disabled: true
|
||||
|
||||
- name: right dao
|
||||
engine: xpath
|
||||
paging: true
|
||||
page_size: 12
|
||||
search_url: https://rightdao.com/search?q={query}&start={pageno}
|
||||
results_xpath: //div[contains(@class, "description")]
|
||||
url_xpath: ../div[contains(@class, "title")]/a/@href
|
||||
title_xpath: ../div[contains(@class, "title")]
|
||||
content_xpath: .
|
||||
categories: general
|
||||
shortcut: rd
|
||||
disabled: true
|
||||
about:
|
||||
website: https://rightdao.com/
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: HTML
|
||||
|
||||
- name: rottentomatoes
|
||||
engine: rottentomatoes
|
||||
shortcut: rt
|
||||
|
@ -1786,6 +1808,22 @@ engines:
|
|||
engine: unsplash
|
||||
shortcut: us
|
||||
|
||||
- name: yandex
|
||||
engine: yandex
|
||||
categories: general
|
||||
search_type: web
|
||||
shortcut: yd
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: yandex images
|
||||
engine: yandex
|
||||
categories: images
|
||||
search_type: images
|
||||
shortcut: ydi
|
||||
disabled: true
|
||||
inactive: true
|
||||
|
||||
- name: yandex music
|
||||
engine: yandex_music
|
||||
shortcut: ydm
|
||||
|
@ -1834,25 +1872,6 @@ engines:
|
|||
about:
|
||||
website: https://wiby.me/
|
||||
|
||||
- name: alexandria
|
||||
engine: json_engine
|
||||
shortcut: alx
|
||||
categories: general
|
||||
paging: true
|
||||
search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno}
|
||||
results_query: results
|
||||
title_query: title
|
||||
url_query: url
|
||||
content_query: snippet
|
||||
timeout: 1.5
|
||||
disabled: true
|
||||
about:
|
||||
website: https://alexandria.org/
|
||||
official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md
|
||||
use_official_api: true
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
|
||||
- name: wikibooks
|
||||
engine: mediawiki
|
||||
weight: 0.5
|
||||
|
@ -2021,6 +2040,16 @@ engines:
|
|||
# query_str: 'SELECT * from mytable WHERE fieldname=%(query)s'
|
||||
# shortcut: mysql
|
||||
|
||||
# Required dependency: mariadb
|
||||
# - name: mariadb
|
||||
# engine: mariadb_server
|
||||
# database: mydatabase
|
||||
# username: user
|
||||
# password: pass
|
||||
# limit: 10
|
||||
# query_str: 'SELECT * from mytable WHERE fieldname=%(query)s'
|
||||
# shortcut: mdb
|
||||
|
||||
- name: 1337x
|
||||
engine: 1337x
|
||||
shortcut: 1337x
|
||||
|
@ -2130,28 +2159,35 @@ engines:
|
|||
disabled: true
|
||||
|
||||
- name: yacy
|
||||
# https://docs.searxng.org/dev/engines/online/yacy.html
|
||||
engine: yacy
|
||||
categories: general
|
||||
search_type: text
|
||||
base_url:
|
||||
- https://yacy.searchlab.eu
|
||||
- https://search.lomig.me
|
||||
- https://yacy.ecosys.eu
|
||||
- https://search.webproject.link
|
||||
# see https://github.com/searxng/searxng/pull/3631#issuecomment-2240903027
|
||||
# - https://search.kyun.li
|
||||
# - https://yacy.securecomcorp.eu
|
||||
# - https://yacy.myserv.ca
|
||||
# - https://yacy.nsupdate.info
|
||||
# - https://yacy.electroncash.de
|
||||
shortcut: ya
|
||||
disabled: true
|
||||
# required if you aren't using HTTPS for your local yacy instance
|
||||
# https://docs.searxng.org/dev/engines/online/yacy.html
|
||||
# enable_http: true
|
||||
# timeout: 3.0
|
||||
# search_mode: 'global'
|
||||
# if you aren't using HTTPS for your local yacy instance disable https
|
||||
# enable_http: false
|
||||
search_mode: 'global'
|
||||
# timeout can be reduced in 'local' search mode
|
||||
timeout: 5.0
|
||||
|
||||
- name: yacy images
|
||||
engine: yacy
|
||||
network: yacy
|
||||
categories: images
|
||||
search_type: image
|
||||
shortcut: yai
|
||||
disabled: true
|
||||
# timeout can be reduced in 'local' search mode
|
||||
timeout: 5.0
|
||||
|
||||
- name: rumble
|
||||
engine: rumble
|
||||
|
|
|
@ -18,7 +18,7 @@ searx_dir = abspath(dirname(__file__))
|
|||
logger = logging.getLogger('searx')
|
||||
OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
|
||||
SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
|
||||
SIMPLE_STYLE = ('auto', 'light', 'dark')
|
||||
SIMPLE_STYLE = ('auto', 'light', 'dark', 'black')
|
||||
CATEGORIES_AS_TABS = {
|
||||
'general': {},
|
||||
'images': {},
|
||||
|
@ -156,6 +156,7 @@ SCHEMA = {
|
|||
'safe_search': SettingsValue((0, 1, 2), 0),
|
||||
'autocomplete': SettingsValue(str, ''),
|
||||
'autocomplete_min': SettingsValue(int, 4),
|
||||
'favicon_resolver': SettingsValue(str, ''),
|
||||
'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
|
||||
'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
|
||||
'ban_time_on_fail': SettingsValue(numbers.Real, 5),
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue