#!/usr/bin/python # The contents of this file are subject to the Common Public Attribution # License Version 1.0. (the "License"); you may not use this file except in # compliance with the License. You may obtain a copy of the License at # http://code.reddit.com/LICENSE. The License is based on the Mozilla Public # License Version 1.1, but Sections 14 and 15 have been added to cover use of # software over a computer network and provide for limited attribution for the # Original Developer. In addition, Exhibit A has been modified to be consistent # with Exhibit B. # # Software distributed under the License is distributed on an "AS IS" basis, # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for # the specific language governing rights and limitations under the License. # # The Original Code is reddit. # # The Original Developer is the Initial Developer. The Initial Developer of # the Original Code is reddit Inc. # # All portions of the code written by reddit are Copyright (c) 2006-2013 reddit # Inc. All Rights Reserved. ############################################################################### """ This is a tiny Flask app used for a couple of self-serve ad tracking mechanisms. The URLs it provides are: /fetch-trackers Given a list of Ad IDs, generate tracking hashes specific to the user's IP address. This must run outside the original request because the HTML may be cached by the CDN. /click Promoted links have their URL replaced with a /click URL by the JS (after a call to /fetch-trackers). Redirect to the actual URL after logging the click. This must be run in a place whose logs are stored for traffic analysis. For convenience, the script can compile itself into a Zip archive suitable for use on Amazon Elastic Beanstalk (and possibly other systems). """ import cStringIO import hashlib import time from ConfigParser import RawConfigParser from wsgiref.handlers import format_date_time from flask import Flask, request, json, make_response, abort, redirect application = Flask(__name__) MAX_FULLNAME_LENGTH = 128 # can include srname and codename, leave room REQUIRED_PACKAGES = [ "flask", ] class ApplicationConfig(object): """A thin wrapper around ConfigParser that remembers what we read. The remembered settings can then be written out to a minimal config file when building the Elastic Beanstalk zipfile. """ def __init__(self): self.input = RawConfigParser() with open("production.ini") as f: self.input.readfp(f) self.output = RawConfigParser() def get(self, section, key): value = self.input.get(section, key) # remember that we needed this configuration value if (section.upper() != "DEFAULT" and not self.output.has_section(section)): self.output.add_section(section) self.output.set(section, key, value) return value def to_config(self): io = cStringIO.StringIO() self.output.write(io) return io.getvalue() config = ApplicationConfig() tracking_secret = config.get('DEFAULT', 'tracking_secret') adtracker_url = config.get('DEFAULT', 'adtracker_url') def jsonpify(callback_name, data): data = callback_name + '(' + json.dumps(data) + ')' response = make_response(data) response.mimetype = 'text/javascript' return response def get_client_ip(): """Figure out the IP address of the remote client. If the remote address is on the 10.* network, we'll assume that it is a trusted load balancer and that the last component of X-Forwarded-For is trustworthy. """ if request.remote_addr.startswith("10."): # it's a load balancer, use x-forwarded-for return request.access_route[-1] else: # direct connection to someone outside return request.remote_addr @application.route("/") def healthcheck(): return "I am healthy." @application.route('/fetch-trackers') def fetch_trackers(): ip = get_client_ip() jsonp_callback = request.args['callback'] ids = request.args.getlist('ids[]') if len(ids) > 32: abort(400) hashed = {} for fullname in ids: if len(fullname) > MAX_FULLNAME_LENGTH: continue text = ''.join((ip, fullname, tracking_secret)) hashed[fullname] = hashlib.sha1(text).hexdigest() return jsonpify(jsonp_callback, hashed) @application.route('/click') def click_redirect(): ip = get_client_ip() destination = request.args['url'].encode('utf-8') fullname = request.args['id'] observed_hash = request.args['hash'] expected_hash_text = ''.join((ip, fullname, tracking_secret)) expected_hash = hashlib.sha1(expected_hash_text).hexdigest() if expected_hash != observed_hash: abort(403) now = format_date_time(time.time()) response = redirect(destination) response.headers['Cache-control'] = 'no-cache' response.headers['Pragma'] = 'no-cache' response.headers['Date'] = now response.headers['Expires'] = now return response if __name__ == "__main__": # package up for elastic beanstalk import zipfile with zipfile.ZipFile("/tmp/tracker.zip", "w", zipfile.ZIP_DEFLATED) as zip: zip.write(__file__, "application.py") zip.writestr("production.ini", config.to_config()) zip.writestr("requirements.txt", "\n".join(REQUIRED_PACKAGES) + "\n")