Fix relative urls that do not start with '/'

This commit is contained in:
xywei
2020-07-23 11:12:19 -05:00
parent 9f2446c444
commit 1d4657b714

View File

@@ -61,6 +61,10 @@ def extract_url(xpath_results, search_url):
# fix relative url to the search engine
url = urljoin(search_url, url)
# fix relative urls that fall through the crack
if '://' not in url:
url = urljoin(search_url, url)
# normalize url
url = normalize_url(url)