From 16ad702fe01082c18f1d8e2fa20887ca16ccce7c Mon Sep 17 00:00:00 2001 From: Roger Ostrander Date: Mon, 23 Sep 2013 16:25:50 -0700 Subject: [PATCH] Builder: Do bulk domain-ban lookups --- r2/r2/models/builder.py | 2 -- r2/r2/models/link.py | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/r2/r2/models/builder.py b/r2/r2/models/builder.py index 2fa1b56fe..b98856651 100755 --- a/r2/r2/models/builder.py +++ b/r2/r2/models/builder.py @@ -215,8 +215,6 @@ class Builder(object): if getattr(item, "verdict", None): if not item.verdict.endswith("-approved"): w.link_notes.append(w.verdict) - if hasattr(item, 'url') and is_banned_domain(item.url): - w.link_notes.append("banned domain") if c.user_is_admin and getattr(item, 'ip', None): w.ip_span = ip_span(item.ip) diff --git a/r2/r2/models/link.py b/r2/r2/models/link.py index a6ef6a47e..c50aab84f 100755 --- a/r2/r2/models/link.py +++ b/r2/r2/models/link.py @@ -44,6 +44,7 @@ from mako.filters import url_escape from r2.lib.strings import strings, Score from r2.lib.db import tdb_cassandra from r2.lib.db.tdb_cassandra import NotFoundException, view_of +from r2.lib.utils import sanitize_url from r2.models.subreddit import MultiReddit from r2.models.query_cache import CachedQueryMutator from r2.models.promo import PROMOTE_STATUS, get_promote_srid @@ -374,6 +375,15 @@ class Link(Thing, Printable): cname = c.cname site = c.site + if user_is_admin: + # Checking if a domain's banned isn't even cheap + urls = [item.url for item in wrapped if hasattr(item, 'url')] + # bans_for_domain_parts is just a generator; convert to a set for + # easy use of 'intersection' + from r2.models.admintools import bans_for_domain_parts + banned_domains = {ban.domain + for ban in bans_for_domain_parts(urls)} + if user_is_loggedin: try: saved = LinkSavesByAccount.fast_query(user, wrapped) @@ -622,6 +632,22 @@ class Link(Thing, Printable): taglinetext = _("submitted %(when)s ago by %(author)s") item.taglinetext = taglinetext + if user_is_admin: + # Link notes + url = getattr(item, 'url') + # Pull just the relevant portions out of the url + urlf = sanitize_url(_force_unicode(url)) + if urlf: + urlp = UrlParser(urlf) + hostname = urlp.hostname + if hostname: + parts = (hostname.encode("utf-8").rstrip("."). + split(".")) + subparts = {".".join(parts[y:]) + for y in xrange(len(parts))} + if subparts.intersection(banned_domains): + item.link_notes.append('banned domain') + if user_is_loggedin: incr_counts(wrapped)