Suggest Title decodes html entities

This commit is contained in:
Andre D
2011-11-11 14:02:23 -05:00
committed by Neil Williams
parent fea1cf81a2
commit 8eb63214ae

View File

@@ -251,7 +251,7 @@ def get_title(url):
opener = urlopen(url, timeout=15)
text = opener.read(1024)
opener.close()
bs = BeautifulSoup(text)
bs = BeautifulSoup(text, convertEntities=BeautifulSoup.HTML_ENTITIES)
if not bs:
return
@@ -260,7 +260,7 @@ def get_title(url):
if not title_bs or not title_bs.string:
return
return title_bs.string.encode('utf-8')
return title_bs.string.encode('utf-8').strip()
except:
return None