Compare commits

..

3 Commits

Author SHA1 Message Date
dependabot[bot] a958ad85c2
Merge 08384a312e into c4b874e9b0 2024-11-23 15:18:05 +01:00
Markus Heiser c4b874e9b0 [fix] engine Library of Congress: fix API URL loc.gov -> www.loc.gov
Avoid HTTP 404 and redirects. Requests to the JSON/YAML API use the base url [1]

    https://www.loc.gov/{endpoint}/?fo=json

[1] https://www.loc.gov/apis/json-and-yaml/requests/

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-23 13:02:24 +01:00
Markus Heiser 7c4e4ebd40 [log] warning with URL in case of 'raise_for_httperror'
In order to be able to implement error handling, it is necessary to know which
URL triggered the exception / the URL has not yet been logged.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-11-23 11:33:19 +01:00
3 changed files with 8 additions and 9 deletions

View File

@ -27,7 +27,7 @@ categories = ['images']
paging = True paging = True
endpoint = 'photos' endpoint = 'photos'
base_url = 'https://loc.gov' base_url = 'https://www.loc.gov'
search_string = "/{endpoint}/?sp={page}&{query}&fo=json" search_string = "/{endpoint}/?sp={page}&{query}&fo=json"

View File

@ -233,8 +233,7 @@ class Network:
del kwargs['raise_for_httperror'] del kwargs['raise_for_httperror']
return do_raise_for_httperror return do_raise_for_httperror
@staticmethod def patch_response(self, response, do_raise_for_httperror):
def patch_response(response, do_raise_for_httperror):
if isinstance(response, httpx.Response): if isinstance(response, httpx.Response):
# requests compatibility (response is not streamed) # requests compatibility (response is not streamed)
# see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses # see also https://www.python-httpx.org/compatibility/#checking-for-4xx5xx-responses
@ -242,8 +241,11 @@ class Network:
# raise an exception # raise an exception
if do_raise_for_httperror: if do_raise_for_httperror:
raise_for_httperror(response) try:
raise_for_httperror(response)
except:
self._logger.warning(f"HTTP Request failed: {response.request.method} {response.request.url}")
raise
return response return response
def is_valid_response(self, response): def is_valid_response(self, response):
@ -269,7 +271,7 @@ class Network:
else: else:
response = await client.request(method, url, **kwargs) response = await client.request(method, url, **kwargs)
if self.is_valid_response(response) or retries <= 0: if self.is_valid_response(response) or retries <= 0:
return Network.patch_response(response, do_raise_for_httperror) return self.patch_response(response, do_raise_for_httperror)
except httpx.RemoteProtocolError as e: except httpx.RemoteProtocolError as e:
if not was_disconnected: if not was_disconnected:
# the server has closed the connection: # the server has closed the connection:

View File

@ -137,9 +137,6 @@ class OnlineProcessor(EngineProcessor):
self.engine.request(query, params) self.engine.request(query, params)
# ignoring empty urls # ignoring empty urls
if params['url'] is None:
return None
if not params['url']: if not params['url']:
return None return None