diff --git a/r2/r2/lib/utils/utils.py b/r2/r2/lib/utils/utils.py index 80ee91305..2d9e36f85 100644 --- a/r2/r2/lib/utils/utils.py +++ b/r2/r2/lib/utils/utils.py @@ -1488,3 +1488,27 @@ class GoldPrice(object): def config_gold_price(v, key=None, data=None): return GoldPrice(v) + +def canonicalize_email(email): + """Return the given email address without various localpart manglings. + + a.s.d.f+something@gmail.com --> asdf@gmail.com + + This is not at all RFC-compliant or correct. It's only intended to be a + quick heuristic to remove commonly used mangling techniques. + + """ + + if not email: + return "" + + email = _force_utf8(email.lower()) + + localpart, at, domain = email.partition("@") + if not at or "@" in domain: + return "" + + localpart = localpart.replace(".", "") + localpart = localpart.partition("+")[0] + + return localpart + "@" + domain diff --git a/r2/r2/models/account.py b/r2/r2/models/account.py index 8cefb2f87..f3ac77432 100644 --- a/r2/r2/models/account.py +++ b/r2/r2/models/account.py @@ -27,7 +27,7 @@ from r2.lib.db import tdb_cassandra from r2.lib.memoize import memoize from r2.lib.utils import modhash, valid_hash, randstr, timefromnow from r2.lib.utils import UrlParser -from r2.lib.utils import constant_time_compare +from r2.lib.utils import constant_time_compare, canonicalize_email from r2.lib.cache import sgm from r2.lib import filters from r2.lib.log import log_text @@ -570,19 +570,7 @@ class Account(Thing): return which.get(canon, None) def canonical_email(self): - email = self.email.lower().encode('utf-8') - if email.count("@") != 1: - return "invalid@invalid.invalid" - - localpart, domain = email.split("@") - - # a.s.d.f+something@gmail.com --> asdf@gmail.com - localpart.replace(".", "") - plus = localpart.find("+") - if plus > 0: - localpart = localpart[:plus] - - return localpart + "@" + domain + return canonicalize_email(self.email) def cromulent(self): """Return whether the user has validated their email address and diff --git a/r2/r2/models/admintools.py b/r2/r2/models/admintools.py index 20c34ade9..385747076 100644 --- a/r2/r2/models/admintools.py +++ b/r2/r2/models/admintools.py @@ -142,47 +142,6 @@ class AdminTools(object): author = authors[aid] author._incr('spammer', len(author_things) if spam else -len(author_things)) - - def email_attrs(self, account_ids, return_dict=True): - account_ids, single = tup(account_ids, True) - - accounts = Account._byID(account_ids, data=True, return_dict=False) - - rv = {} - canons = {} - aids_by_canon = {} # sounds terrifying - - for a in accounts: - attrs = [] - rv[a._id] = attrs - - if not getattr(a, "email", None): - attrs.append(("gray", "no email specified", "X")) - else: - canon = a.canonical_email() - aids_by_canon.setdefault(canon, []) - aids_by_canon[canon].append(a._id) - - verify_str = "verified email: " + canon - if getattr(a, "email_verified", None): - attrs.append(("green", verify_str, "V")) - else: - attrs.append(("gray", "un" + verify_str, "@")) - - ban_reasons = Account.which_emails_are_banned(aids_by_canon.keys()) - - for canon, ban_reason in ban_reasons.iteritems(): - if ban_reason: - for aid in aids_by_canon[canon]: - rv[aid].append(("wrong", "banned email " + ban_reason, "B")) - - if single: - return rv[account_ids[0]] - elif return_dict: - return rv - else: - return filter(None, (rv.get(i) for i in account_ids)) - def set_last_sr_ban(self, things): by_srid = {} for thing in things: diff --git a/r2/r2/models/builder.py b/r2/r2/models/builder.py index cddf2b266..2f2462cd5 100755 --- a/r2/r2/models/builder.py +++ b/r2/r2/models/builder.py @@ -74,13 +74,10 @@ class Builder(object): authors = {} cup_infos = {} - email_attrses = {} friend_rels = None if aids: authors = Account._byID(aids, data=True, stale=self.stale) if aids else {} cup_infos = Account.cup_info_multi(aids) - if c.user_is_admin: - email_attrses = admintools.email_attrs(aids, return_dict=True) if user and user.gold: friend_rels = user.friend_rels() @@ -168,10 +165,6 @@ class Builder(object): args['kind'] = 'special' add_attr(w.attribs, **args) - if False and w.author and c.user_is_admin: - for attr in email_attrses[w.author._id]: - add_attr(w.attribs, attr[2], label=attr[1]) - if w.author and w.author._id in cup_infos and not c.profilepage: cup_info = cup_infos[w.author._id] label = _(cup_info["label_template"]) % \ diff --git a/r2/r2/tests/unit/lib/utils_test.py b/r2/r2/tests/unit/lib/utils_test.py index 6a4a90094..6288604ba 100755 --- a/r2/r2/tests/unit/lib/utils_test.py +++ b/r2/r2/tests/unit/lib/utils_test.py @@ -32,5 +32,38 @@ class UtilsTest(unittest.TestCase): expect('z', 5) self.assertRaises(ValueError, expect, None, 6) + +class TestCanonicalizeEmail(unittest.TestCase): + def test_empty_string(self): + canonical = utils.canonicalize_email("") + self.assertEquals(canonical, "") + + def test_unicode(self): + canonical = utils.canonicalize_email(u"\u2713@example.com") + self.assertEquals(canonical, "\xe2\x9c\x93@example.com") + + def test_localonly(self): + canonical = utils.canonicalize_email("invalid") + self.assertEquals(canonical, "") + + def test_multiple_ats(self): + canonical = utils.canonicalize_email("invalid@invalid@invalid") + self.assertEquals(canonical, "") + + def test_remove_dots(self): + canonical = utils.canonicalize_email("d.o.t.s@example.com") + self.assertEquals(canonical, "dots@example.com") + + def test_remove_plus_address(self): + canonical = utils.canonicalize_email("fork+nork@example.com") + self.assertEquals(canonical, "fork@example.com") + + def test_unicode_in_byte_str(self): + # this shouldn't ever happen, but some entries in postgres appear + # to be byte strings with non-ascii in 'em. + canonical = utils.canonicalize_email("\xe2\x9c\x93@example.com") + self.assertEquals(canonical, "\xe2\x9c\x93@example.com") + + if __name__ == '__main__': unittest.main()