mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-28 16:28:01 -05:00
Refactor and reorganize email canonicalization; add some tests.
The email_attrs thing wasn't actually being used (`if False`) and so I've removed it in the process.
This commit is contained in:
@@ -1488,3 +1488,27 @@ class GoldPrice(object):
|
||||
def config_gold_price(v, key=None, data=None):
|
||||
return GoldPrice(v)
|
||||
|
||||
|
||||
def canonicalize_email(email):
|
||||
"""Return the given email address without various localpart manglings.
|
||||
|
||||
a.s.d.f+something@gmail.com --> asdf@gmail.com
|
||||
|
||||
This is not at all RFC-compliant or correct. It's only intended to be a
|
||||
quick heuristic to remove commonly used mangling techniques.
|
||||
|
||||
"""
|
||||
|
||||
if not email:
|
||||
return ""
|
||||
|
||||
email = _force_utf8(email.lower())
|
||||
|
||||
localpart, at, domain = email.partition("@")
|
||||
if not at or "@" in domain:
|
||||
return ""
|
||||
|
||||
localpart = localpart.replace(".", "")
|
||||
localpart = localpart.partition("+")[0]
|
||||
|
||||
return localpart + "@" + domain
|
||||
|
||||
@@ -27,7 +27,7 @@ from r2.lib.db import tdb_cassandra
|
||||
from r2.lib.memoize import memoize
|
||||
from r2.lib.utils import modhash, valid_hash, randstr, timefromnow
|
||||
from r2.lib.utils import UrlParser
|
||||
from r2.lib.utils import constant_time_compare
|
||||
from r2.lib.utils import constant_time_compare, canonicalize_email
|
||||
from r2.lib.cache import sgm
|
||||
from r2.lib import filters
|
||||
from r2.lib.log import log_text
|
||||
@@ -570,19 +570,7 @@ class Account(Thing):
|
||||
return which.get(canon, None)
|
||||
|
||||
def canonical_email(self):
|
||||
email = self.email.lower().encode('utf-8')
|
||||
if email.count("@") != 1:
|
||||
return "invalid@invalid.invalid"
|
||||
|
||||
localpart, domain = email.split("@")
|
||||
|
||||
# a.s.d.f+something@gmail.com --> asdf@gmail.com
|
||||
localpart.replace(".", "")
|
||||
plus = localpart.find("+")
|
||||
if plus > 0:
|
||||
localpart = localpart[:plus]
|
||||
|
||||
return localpart + "@" + domain
|
||||
return canonicalize_email(self.email)
|
||||
|
||||
def cromulent(self):
|
||||
"""Return whether the user has validated their email address and
|
||||
|
||||
@@ -142,47 +142,6 @@ class AdminTools(object):
|
||||
author = authors[aid]
|
||||
author._incr('spammer', len(author_things) if spam else -len(author_things))
|
||||
|
||||
|
||||
def email_attrs(self, account_ids, return_dict=True):
|
||||
account_ids, single = tup(account_ids, True)
|
||||
|
||||
accounts = Account._byID(account_ids, data=True, return_dict=False)
|
||||
|
||||
rv = {}
|
||||
canons = {}
|
||||
aids_by_canon = {} # sounds terrifying
|
||||
|
||||
for a in accounts:
|
||||
attrs = []
|
||||
rv[a._id] = attrs
|
||||
|
||||
if not getattr(a, "email", None):
|
||||
attrs.append(("gray", "no email specified", "X"))
|
||||
else:
|
||||
canon = a.canonical_email()
|
||||
aids_by_canon.setdefault(canon, [])
|
||||
aids_by_canon[canon].append(a._id)
|
||||
|
||||
verify_str = "verified email: " + canon
|
||||
if getattr(a, "email_verified", None):
|
||||
attrs.append(("green", verify_str, "V"))
|
||||
else:
|
||||
attrs.append(("gray", "un" + verify_str, "@"))
|
||||
|
||||
ban_reasons = Account.which_emails_are_banned(aids_by_canon.keys())
|
||||
|
||||
for canon, ban_reason in ban_reasons.iteritems():
|
||||
if ban_reason:
|
||||
for aid in aids_by_canon[canon]:
|
||||
rv[aid].append(("wrong", "banned email " + ban_reason, "B"))
|
||||
|
||||
if single:
|
||||
return rv[account_ids[0]]
|
||||
elif return_dict:
|
||||
return rv
|
||||
else:
|
||||
return filter(None, (rv.get(i) for i in account_ids))
|
||||
|
||||
def set_last_sr_ban(self, things):
|
||||
by_srid = {}
|
||||
for thing in things:
|
||||
|
||||
@@ -74,13 +74,10 @@ class Builder(object):
|
||||
|
||||
authors = {}
|
||||
cup_infos = {}
|
||||
email_attrses = {}
|
||||
friend_rels = None
|
||||
if aids:
|
||||
authors = Account._byID(aids, data=True, stale=self.stale) if aids else {}
|
||||
cup_infos = Account.cup_info_multi(aids)
|
||||
if c.user_is_admin:
|
||||
email_attrses = admintools.email_attrs(aids, return_dict=True)
|
||||
if user and user.gold:
|
||||
friend_rels = user.friend_rels()
|
||||
|
||||
@@ -168,10 +165,6 @@ class Builder(object):
|
||||
args['kind'] = 'special'
|
||||
add_attr(w.attribs, **args)
|
||||
|
||||
if False and w.author and c.user_is_admin:
|
||||
for attr in email_attrses[w.author._id]:
|
||||
add_attr(w.attribs, attr[2], label=attr[1])
|
||||
|
||||
if w.author and w.author._id in cup_infos and not c.profilepage:
|
||||
cup_info = cup_infos[w.author._id]
|
||||
label = _(cup_info["label_template"]) % \
|
||||
|
||||
@@ -32,5 +32,38 @@ class UtilsTest(unittest.TestCase):
|
||||
expect('z', 5)
|
||||
self.assertRaises(ValueError, expect, None, 6)
|
||||
|
||||
|
||||
class TestCanonicalizeEmail(unittest.TestCase):
|
||||
def test_empty_string(self):
|
||||
canonical = utils.canonicalize_email("")
|
||||
self.assertEquals(canonical, "")
|
||||
|
||||
def test_unicode(self):
|
||||
canonical = utils.canonicalize_email(u"\u2713@example.com")
|
||||
self.assertEquals(canonical, "\xe2\x9c\x93@example.com")
|
||||
|
||||
def test_localonly(self):
|
||||
canonical = utils.canonicalize_email("invalid")
|
||||
self.assertEquals(canonical, "")
|
||||
|
||||
def test_multiple_ats(self):
|
||||
canonical = utils.canonicalize_email("invalid@invalid@invalid")
|
||||
self.assertEquals(canonical, "")
|
||||
|
||||
def test_remove_dots(self):
|
||||
canonical = utils.canonicalize_email("d.o.t.s@example.com")
|
||||
self.assertEquals(canonical, "dots@example.com")
|
||||
|
||||
def test_remove_plus_address(self):
|
||||
canonical = utils.canonicalize_email("fork+nork@example.com")
|
||||
self.assertEquals(canonical, "fork@example.com")
|
||||
|
||||
def test_unicode_in_byte_str(self):
|
||||
# this shouldn't ever happen, but some entries in postgres appear
|
||||
# to be byte strings with non-ascii in 'em.
|
||||
canonical = utils.canonicalize_email("\xe2\x9c\x93@example.com")
|
||||
self.assertEquals(canonical, "\xe2\x9c\x93@example.com")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user