Bing news engine corrections

XPath *never* return None.

(I found the HTML report of coverage)
This commit is contained in:
Cqoicebordel 2015-01-29 21:19:59 +01:00
parent efde2c21c8
commit 5761d6f0ab
1 changed files with 2 additions and 4 deletions

View File

@ -59,16 +59,14 @@ def response(resp):
url = link.attrib.get('href') url = link.attrib.get('href')
title = extract_text(link) title = extract_text(link)
contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]') contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]')
if contentXPath is not None: content = escape(extract_text(contentXPath))
content = escape(extract_text(contentXPath))
# parse publishedDate # parse publishedDate
publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div' publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div'
'//span[contains(@class,"sn_ST")]' '//span[contains(@class,"sn_ST")]'
'//span[contains(@class,"sn_tm")]') '//span[contains(@class,"sn_tm")]')
if publishedDateXPath is not None: publishedDate = escape(extract_text(publishedDateXPath))
publishedDate = escape(extract_text(publishedDateXPath))
if re.match("^[0-9]+ minute(s|) ago$", publishedDate): if re.match("^[0-9]+ minute(s|) ago$", publishedDate):
timeNumbers = re.findall(r'\d+', publishedDate) timeNumbers = re.findall(r'\d+', publishedDate)