mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-04-27 03:00:12 -04:00
Fix some issues with unicode in URLs
First, `UrlParser.update_query` didn't like 7-bit unclean values. `unicode()` should work everywhere `str()` did. Second, the check for emedded NBSPs in `UrlParser.is_web_safe_url` could be bypassed since `b'\xa0'` couldn't automatically be promoted to unicode (thus `u'\xa0'` != b'\xa0'.) The check was fixed to handle the NBSP char in either unicode or byte strings.
This commit is contained in:
@@ -522,7 +522,7 @@ class UrlParser(object):
|
||||
# Since in HTTP everything's a string, coercing values to strings now
|
||||
# makes equality testing easier. Python will throw an error if you try
|
||||
# to pass in a non-string key, so that's already taken care of for us.
|
||||
updates = {k: str(v) for k, v in updates.iteritems()}
|
||||
updates = {k: _force_unicode(v) for k, v in updates.iteritems()}
|
||||
self.query_dict.update(updates)
|
||||
|
||||
@property
|
||||
@@ -715,7 +715,8 @@ class UrlParser(object):
|
||||
# should be safe enough to allow after three slashes. Opera 12's the
|
||||
# only browser that trips over them, and it doesn't fall for
|
||||
# `http:///foo.com/`.
|
||||
if match.group(0) == '\xa0':
|
||||
# Check both in case unicode promotion fails
|
||||
if match.group(0) in {u'\xa0', '\xa0'}:
|
||||
if match.string[0:match.start(0)].count('/') < 3:
|
||||
return False
|
||||
else:
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
# The contents of this file are subject to the Common Public Attribution
|
||||
# License Version 1.0. (the "License"); you may not use this file except in
|
||||
# compliance with the License. You may obtain a copy of the License at
|
||||
@@ -138,6 +139,12 @@ class TestIsRedditURL(unittest.TestCase):
|
||||
self.assertIsNotSafeRedditUrl("\xa0http://%s/" % g.domain)
|
||||
self.assertIsSafeRedditUrl("http://%s/\xa0" % g.domain)
|
||||
self.assertIsSafeRedditUrl("/foo/bar/\xa0baz")
|
||||
# Make sure this works if the URL is unicode
|
||||
self.assertIsNotSafeRedditUrl(u"http://\xa0.%s/" % g.domain)
|
||||
self.assertIsNotSafeRedditUrl(u"\xa0http://%s/" % g.domain)
|
||||
self.assertIsSafeRedditUrl(u"http://%s/\xa0" % g.domain)
|
||||
self.assertIsSafeRedditUrl(u"/foo/bar/\xa0baz")
|
||||
|
||||
|
||||
|
||||
class TestSwitchSubdomainByExtension(unittest.TestCase):
|
||||
@@ -281,3 +288,9 @@ class TestEquality(unittest.TestCase):
|
||||
u2 = UrlParser('http://example.com/')
|
||||
u2.update_query(page=1234)
|
||||
self.assertEquals(u, u2)
|
||||
|
||||
def test_unicode_query_params(self):
|
||||
u = UrlParser(u'http://example.com/?page=unicode:(')
|
||||
u2 = UrlParser('http://example.com/')
|
||||
u2.update_query(page=u'unicode:(')
|
||||
self.assertEquals(u, u2)
|
||||
|
||||
Reference in New Issue
Block a user