sanitize_url: Convert unicode domain names to ascii.

Previously, unicode domain names were only checked for validity, and the
coversion was dropped. This can cause problems down the line where we expect
URL's to be ascii.
This commit is contained in:
Ricky Ramirez
2013-09-11 16:48:11 -07:00
parent b87d87f9a5
commit c46309c338

View File

@@ -349,21 +349,21 @@ def sanitize_url(url, require_scheme = False):
return
if u.username is not None or u.password is not None:
return
labels = u.hostname.split('.')
for label in labels:
try:
#if this succeeds, this portion of the dns is almost
#valid and converted to ascii
label = label.encode('idna')
except TypeError:
print "label sucks: [%r]" % label
raise
except UnicodeError:
try:
idna_hostname = u.hostname.encode('idna')
except TypeError as e:
g.log.warning("Bad hostname given [%r]: %s", u.hostname, e)
raise
except UnicodeError:
return
for label in idna_hostname.split('.'):
if not re.match(valid_dns, label):
return
else:
#then if this success, this portion of the dns is really valid
if not re.match(valid_dns, label):
return
if idna_hostname != u.hostname:
url = urlunparse((u[0], idna_hostname, u[2], u[3], u[4], u[5]))
return url
def trunc_string(text, length):