Clean up and reorganize traffic-tracking code.

This commit is contained in:
Neil Williams
2012-12-18 19:38:16 -08:00
parent efee207552
commit d253daa907
9 changed files with 201 additions and 203 deletions

View File

@@ -35,14 +35,13 @@ from r2.models import Thing
from r2.config import cache
from r2.config.extensions import is_api
from r2.lib.menus import CommentSortMenu
from r2.lib.tracking import AdframeInfo
from r2.lib.jsonresponse import json_respond
from pylons.i18n import _, ungettext
from pylons import c, request, g
from pylons.controllers.util import abort
from r2.lib import media
from r2.lib import promote
from r2.lib import promote, tracking
from r2.lib.captcha import get_iden
from r2.lib.filters import spaceCompress, _force_unicode, _force_utf8
from r2.lib.filters import unsafe, websafe, SC_ON, SC_OFF, websafe_json
@@ -3584,8 +3583,7 @@ class Dart_Ad(CachedTemplate):
def __init__(self, dartsite, tag, custom_keyword=None):
tag = tag or "homepage"
keyword = custom_keyword or tag
tracker_url = AdframeInfo.gen_url(fullname = "dart_" + tag,
ip = request.ip)
tracker_url = tracking.get_impression_pixel_url("dart_" + tag)
Templated.__init__(self, tag = tag, dartsite = dartsite,
tracker_url = tracker_url, keyword=keyword)

View File

@@ -20,206 +20,141 @@
# Inc. All Rights Reserved.
###############################################################################
from base64 import standard_b64decode as b64dec, \
standard_b64encode as b64enc
from pylons import request
from Crypto.Cipher import AES
from random import choice
from pylons import g, c
from urllib import quote_plus, unquote_plus
from Crypto.Random import get_random_bytes
import base64
import hashlib
import urllib
key_len = 16
pad_len = 32
from pylons import c, g, request
def pkcs5pad(text, padlen = 8):
'''Insures the string is an integer multiple of padlen by appending to its end
N characters which are chr(N).'''
l = (padlen - len(text) % padlen) or padlen
padding = ''.join([chr(l) for x in xrange(0,l)])
from r2.lib.filters import _force_utf8
KEY_SIZE = 16 # AES-128
SALT_SIZE = KEY_SIZE * 2 # backwards compatibility
def _pad_message(text):
"""Return `text` padded out to a multiple of block_size bytes.
This uses the PKCS7 padding algorithm. The pad-bytes have a value of N
where N is the number of bytes of padding added. If the input string is
already a multiple of the block size, it will be padded with one full extra
block to make an unambiguous output string.
"""
block_size = AES.block_size
padding_size = (block_size - len(text) % block_size) or block_size
padding = chr(padding_size) * padding_size
return text + padding
def pkcs5unpad(text, padlen = 8):
'''Undoes padding of pkcs5pad'''
if text:
key = ord(text[-1])
if (key <= padlen and key > 0 and
all(ord(x) == key for x in text[-key:])):
text = text[:-key]
return text
def cipher(lv):
'''returns a pycrypto object used by encrypt and decrypt, with the key based on g.tracking_secret'''
key = g.tracking_secret
return AES.new(key[:key_len], AES.MODE_CBC, lv[:key_len])
def _unpad_message(text):
"""Return `text` with padding removed. The inverse of _pad_message."""
if not text:
return ""
def encrypt(text):
'''generates an encrypted version of text. The encryption is salted using the pad_len characters
that randomly make up the front of the resulting string. The string is base64 encoded, and url escaped
so as to be suitable to be used as a GET parameter'''
randstr = ''.join(choice('1234567890abcdefghijklmnopqrstuvwxyz' +
'ABCDEFGHIJKLMNOPQRSTUVWXYZ+/')
for x in xrange(pad_len))
cip = cipher(randstr)
text = b64enc(cip.encrypt(pkcs5pad(text, key_len)))
return quote_plus(randstr + text, safe='')
padding_size = ord(text[-1])
if padding_size > AES.block_size:
return ""
def decrypt(text):
'''Inverts encrypt'''
# we can unquote even if text is not quoted.
text = unquote_plus(text)
# grab salt
randstr = text[:pad_len]
# grab message
text = text[pad_len:]
cip = cipher(randstr)
return pkcs5unpad(cip.decrypt(b64dec(text)), key_len)
unpadded, padding = text[:-padding_size], text[-padding_size:]
if any(ord(x) != padding_size for x in padding):
return ""
return unpadded
def safe_str(text):
'''That pesky function always needed to make sure nothing breaks if text is unicode. if it is,
it returns the utf8 transcode of it and returns a python str.'''
try:
if isinstance(text, unicode):
return text.encode('utf8')
except:
g.log.error("unicode encoding exception in safe_str")
return ''
return str(text)
class Info(object):
'''Class for generating and reading user tracker information.'''
_tracked = []
tracker_url = ""
def __init__(self, text = '', **kw):
for s in self._tracked:
setattr(self, s, '')
if text:
try:
data = decrypt(text).split('|')
except:
g.log.error("decryption failure on '%s'" % text)
data = []
for i, d in enumerate(data):
if i < len(self._tracked):
setattr(self, self._tracked[i], d)
else:
self.init_defaults(**kw)
def init_defaults(self, **kw):
raise NotImplementedError
def tracking_url(self):
data = '|'.join(getattr(self, s) for s in self._tracked)
data = encrypt(data)
return "%s?v=%s" % (self.tracker_url, data)
@classmethod
def gen_url(cls, **kw):
try:
return cls(**kw).tracking_url()
except Exception,e:
g.log.error(e)
try:
randstr = ''.join(choice('1234567890abcdefghijklmnopqrstuvwxyz' +
'ABCDEFGHIJKLMNOPQRSTUVWXYZ+')
for x in xrange(pad_len))
return "%s?v=%s" % (cls.tracker_url, randstr)
except:
g.log.error("fallback rendering failed as well")
return ""
class UserInfo(Info):
'''Class for generating and reading user tracker information.'''
_tracked = ['name', 'site', 'lang', 'cname']
tracker_url = g.tracker_url
@staticmethod
def get_site():
return safe_str(c.site.name if c.site else '')
@staticmethod
def get_srpath():
name = UserInfo.get_site()
action = None
if c.render_style in ("mobile", "compact"):
action = c.render_style
else:
try:
action = request.environ['pylons.routes_dict'].get('action')
except Exception,e:
g.log.error(e)
if not action:
return name
return '-'.join((name, action))
@staticmethod
def get_usertype():
return "loggedin" if c.user_is_loggedin else "guest"
def init_defaults(self):
self.name = safe_str(c.user.name if c.user_is_loggedin else '')
self.site = UserInfo.get_srpath()
self.lang = safe_str(c.lang if c.lang else '')
self.cname = safe_str(c.cname)
class PromotedLinkInfo(Info):
_tracked = []
tracker_url = g.adtracker_url
def __init__(self, text = "", ip = "0.0.0.0", **kw):
self.ip = ip
Info.__init__(self, text = text, **kw)
def init_defaults(self, fullname):
self.fullname = fullname
@classmethod
def make_hash(cls, ip, fullname):
return hashlib.sha1("%s%s%s" % (ip, fullname,
g.tracking_secret)).hexdigest()
def tracking_url(self):
return (self.tracker_url + "?hash=" +
self.make_hash(self.ip, self.fullname)
+ "&id=" + self.fullname)
class PromotedLinkClickInfo(PromotedLinkInfo):
_tracked = []
tracker_url = g.clicktracker_url
def init_defaults(self, dest, **kw):
self.dest = dest
return PromotedLinkInfo.init_defaults(self, **kw)
def tracking_url(self):
s = (PromotedLinkInfo.tracking_url(self) + '&url=' +
urllib.quote_plus(self.dest))
return s
class AdframeInfo(PromotedLinkInfo):
tracker_url = g.adframetracker_url
@classmethod
def make_hash(cls, ip, fullname):
return hashlib.sha1("%s%s" % (fullname,
g.tracking_secret)).hexdigest()
def _make_cipher(initialization_vector):
"""Return a block cipher object for use in `encrypt` and `decrypt`."""
return AES.new(g.tracking_secret[:KEY_SIZE], AES.MODE_CBC,
initialization_vector[:AES.block_size])
def encrypt(plaintext):
"""Return the message `plaintext` encrypted.
def benchmark(n = 10000):
"""on my humble desktop machine, this gives ~150 microseconds per gen_url"""
import time
t = time.time()
for x in xrange(n):
gen_url()
t = time.time() - t
print ("%d generations in %5.3f seconds (%5.3f us/gen)" %
(n, t, 10**6 * t/n))
The encrypted message will have its salt prepended and will be URL encoded
to make it suitable for use in URLs and Cookies.
NOTE: this function is here for backwards compatibility. Please do not
use it for new code.
"""
# we want SALT_SIZE letters of salt text, but we're generating random bytes
# so we'll calculate how many bytes we need to get SALT_SIZE characters of
# base64 output. because of padding, this only works for SALT_SIZE % 4 == 0
assert SALT_SIZE % 4 == 0
salt_byte_count = (SALT_SIZE / 4) * 3
salt_bytes = get_random_bytes(salt_byte_count)
salt = base64.b64encode(salt_bytes)
cipher = _make_cipher(salt)
padded = _pad_message(plaintext)
ciphertext = cipher.encrypt(padded)
encoded = base64.b64encode(ciphertext)
return urllib.quote_plus(salt + encoded, safe="")
def decrypt(encrypted):
"""Decrypt `encrypted` and return the plaintext.
NOTE: like `encrypt` above, please do not use this function for new code.
"""
encrypted = urllib.unquote_plus(encrypted)
salt, encoded = encrypted[:SALT_SIZE], encrypted[SALT_SIZE:]
ciphertext = base64.b64decode(encoded)
cipher = _make_cipher(salt)
padded = cipher.decrypt(ciphertext)
return _unpad_message(padded)
def get_site():
"""Return the name of the current "site" (subreddit)."""
return c.site.name if c.site else ""
def get_srpath():
"""Return the srpath of the current request.
The srpath is Subredditname-Action. e.g. sophiepotamus-GET_listing.
"""
name = get_site()
action = None
if c.render_style in ("mobile", "compact"):
action = c.render_style
else:
action = request.environ['pylons.routes_dict'].get('action')
if not action:
return name
return '-'.join((name, action))
def get_pageview_pixel_url():
"""Return a URL to use for tracking pageviews for the current request."""
data = [
c.user.name if c.user_is_loggedin else "",
get_srpath(),
c.lang or "",
c.cname,
]
encrypted = encrypt("|".join(_force_utf8(s) for s in data))
return g.tracker_url + "?v=" + encrypted
def get_impression_pixel_url(codename):
"""Return a URL to use for tracking impressions of the given advert."""
# TODO: use HMAC here
mac = codename + hashlib.sha1(codename + g.tracking_secret).hexdigest()
return g.adframetracker_url + "?" + urllib.urlencode({
"hash": mac,
"id": codename,
})

View File

@@ -22,7 +22,7 @@
<%!
from r2.lib.template_helpers import get_domain, static
from r2.lib.tracking import AdframeInfo
from r2.lib import tracking
import random
%>
@@ -32,7 +32,7 @@
<script type="text/javascript">
<%
tracker_url = AdframeInfo.gen_url(fullname = "adblock", ip = request.ip)
tracker_url = tracking.get_impression_pixel_url("adblock")
%>
$(function() {
var ad = $("#ad-frame");

View File

@@ -24,7 +24,7 @@
<%!
from r2.lib.template_helpers import static
from r2.models import Link, Comment, Subreddit
from r2.lib.tracking import UserInfo
from r2.lib import tracking
%>
<%namespace file="utils.html" import="js_setup, googleanalytics"/>
<html xmlns="http://www.w3.org/1999/xhtml" lang="${c.lang}"
@@ -79,7 +79,7 @@ reddit, reddit.com, vote, comment, submit
%if g.tracker_url and thing.site_tracking and not c.secure:
<script type="text/javascript">
(function() {
var url = '${UserInfo.gen_url()}';
var url = '${tracking.get_pageview_pixel_url()}';
var cachebuster = Math.round(Math.random() * 2147483647);
var cachebusted_url = url + "&r=" + cachebuster;
var img = new Image();

View File

@@ -51,7 +51,7 @@
%endif
%endif
%if g.tracker_url and thing.site_tracking and not c.secure:
<img alt="" src="${tracking.UserInfo.gen_url()}"/>
<img alt="" src="${tracking.get_pageview_pixel_url()}"/>
%endif
</body>
</%def>

View File

@@ -36,7 +36,7 @@
${thing.content and thing.content() or ''}
%if g.tracker_url and thing.site_tracking:
<img alt="" src="${tracking.UserInfo.gen_url()}"/>
<img alt="" src="${tracking.get_pageview_pixel_url()}"/>
%endif
<%def name="Title()">

View File

@@ -49,7 +49,7 @@
</div>
%if g.tracker_url and thing.site_tracking:
<img alt="" src="${tracking.UserInfo.gen_url()}"/>
<img alt="" src="${tracking.get_pageview_pixel_url()}"/>
%endif
</body>
</%def>

View File

@@ -26,7 +26,7 @@
from r2.lib.filters import spaceCompress, unsafe, safemarkdown
from r2.lib.template_helpers import add_sr, js_config, static, html_datetime
from r2.lib.utils import cols, long_datetime, timesince
from r2.lib.tracking import UserInfo
from r2.lib import tracking
from datetime import datetime
%>
<%def name="tags(**kw)">
@@ -420,9 +420,9 @@ ${unsafe(txt)}
var _gaq = _gaq || [];
_gaq.push(
['_setAccount', '${g.googleanalytics}'],
['_setCustomVar', 1, 'site', '${UserInfo.get_site()}', 3],
['_setCustomVar', 2, 'srpath', '${UserInfo.get_srpath()}', 3],
['_setCustomVar', 3, 'usertype', '${UserInfo.get_usertype()}', 2],
['_setCustomVar', 1, 'site', '${tracking.get_site()}', 3],
['_setCustomVar', 2, 'srpath', '${tracking.get_srpath()}', 3],
['_setCustomVar', 3, 'usertype', '${"loggedin" if c.user_is_loggedin else "guest"}', 2],
['_setCustomVar', 4, 'uitype', '${uitype}', 3],
['_trackPageview']
);

View File

@@ -0,0 +1,65 @@
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2012 reddit
# Inc. All Rights Reserved.
###############################################################################
import unittest
from r2.lib import tracking
KEY_SIZE = tracking.KEY_SIZE
MESSAGE = "the quick brown fox jumped over..."
BLOCK_O_PADDING = ("\x10\x10\x10\x10\x10\x10\x10\x10"
"\x10\x10\x10\x10\x10\x10\x10\x10")
class TestPadding(unittest.TestCase):
def test_pad_empty_string(self):
padded = tracking._pad_message("")
self.assertEquals(padded, BLOCK_O_PADDING)
def test_pad_round_string(self):
padded = tracking._pad_message("x" * KEY_SIZE)
self.assertEquals(len(padded), KEY_SIZE * 2)
self.assertEquals(padded[KEY_SIZE:], BLOCK_O_PADDING)
def test_unpad_empty_message(self):
unpadded = tracking._unpad_message("")
self.assertEquals(unpadded, "")
def test_unpad_evil_message(self):
evil = ("a" * 88) + chr(57)
result = tracking._unpad_message(evil)
self.assertEquals(result, "")
def test_padding_roundtrip(self):
tested = tracking._unpad_message(tracking._pad_message(MESSAGE))
self.assertEquals(MESSAGE, tested)
class TestEncryption(unittest.TestCase):
def test_encryption_roundtrip(self):
tested = tracking.decrypt(tracking.encrypt(MESSAGE))
self.assertEquals(MESSAGE, tested)
if __name__ == '__main__':
unittest.main()