From 157362fa6bc808da85b525ca45b8f39a91d0f3f2 Mon Sep 17 00:00:00 2001 From: bsimpson63 Date: Thu, 15 Nov 2012 17:53:19 -0500 Subject: [PATCH] Specify dates we want in traffic queries. --- r2/r2/models/traffic.py | 122 ++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/r2/r2/models/traffic.py b/r2/r2/models/traffic.py index f797826da..fe686d686 100644 --- a/r2/r2/models/traffic.py +++ b/r2/r2/models/traffic.py @@ -127,34 +127,24 @@ def decrement_month(date, amount=1): return date.replace(day=1) -def fill_gaps_generator(interval, start_time, stop_time, query, *columns): +def fill_gaps_generator(time_points, query, *columns): """Generate a timeseries sequence with a value for every sample expected. - Iterate backwards in steps specified by interval from the most recent date - (stop_time) to the oldest (start_time) and pull the columns listed out of - query. If the query doesn't have data for a time slice, fill the gap with + Iterate over specified time points and pull the columns listed out of + query. If the query doesn't have data for a time point, fill the gap with an appropriate number of zeroes. """ iterator = PeekableIterator(query) - step = timedelta_by_name(interval) - current_slice = stop_time - - while current_slice > start_time: + for t in time_points: row = iterator.peek() - if row and row.date == current_slice: - yield current_slice, tuple(getattr(row, c) for c in columns) + if row and row.date == t: + yield t, tuple(getattr(row, c) for c in columns) iterator.next() else: - yield current_slice, tuple(0 for c in columns) - - # moving backwards a month isn't a fixed timedelta -- special case it - if interval != "month": - current_slice -= step - else: - current_slice = decrement_month(current_slice) + yield t, tuple(0 for c in columns) def fill_gaps(*args, **kwargs): @@ -168,27 +158,42 @@ time_range_by_interval = dict(hour=datetime.timedelta(days=4), month=datetime.timedelta(weeks=52)) -def time_range(interval): - """Calculate the time range to view for a given level of precision. +def get_time_points(interval, start_time=None, stop_time=None): + """Return time points for given interval type. - The coarser our granularity, the more history we'll want to see. + Time points are in reverse chronological order to match the sort of + queries this will be used with. If start_time and stop_time are not + specified they will be picked based on the interval. """ - # the stop time is the most recent slice-time; get this by truncating - # the appropriate amount from the current time - stop_time = datetime.datetime.utcnow() - stop_time = stop_time.replace(minute=0, second=0, microsecond=0) - if interval in ("day", "month"): - stop_time = stop_time.replace(hour=0) - if interval == "month": - stop_time = stop_time.replace(day=1) + if start_time and stop_time: + start_time, stop_time = sorted([start_time, stop_time]) + else: + # the stop time is the most recent slice-time; get this by truncating + # the appropriate amount from the current time + stop_time = datetime.datetime.utcnow() + stop_time = stop_time.replace(minute=0, second=0, microsecond=0) + if interval in ("day", "month"): + stop_time = stop_time.replace(hour=0) + if interval == "month": + stop_time = stop_time.replace(day=1) - # then the start time is easy to work out - range = time_range_by_interval[interval] - start_time = stop_time - range + # then the start time is easy to work out + range = time_range_by_interval[interval] + start_time = stop_time - range - return start_time, stop_time + step = timedelta_by_name(interval) + current_time = stop_time + time_points = [] + + while current_time >= start_time: + time_points.append(current_time) + if interval != 'month': + current_time -= step + else: + current_time = decrement_month(current_time) + return time_points def points_for_interval(interval): @@ -200,10 +205,9 @@ def points_for_interval(interval): def make_history_query(cls, interval): """Build a generic query showing the history of a given aggregate.""" - - start_time, stop_time = time_range(interval) + time_points = get_time_points(interval) q = (Session.query(cls) - .filter(cls.date >= start_time)) + .filter(cls.date.in_(time_points))) # subscription stats doesn't have an interval (it's only daily) if hasattr(cls, "interval"): @@ -211,7 +215,7 @@ def make_history_query(cls, interval): q = q.order_by(desc(cls.date)) - return start_time, stop_time, q + return time_points, q def top_last_month(cls, key): @@ -241,14 +245,16 @@ def totals(cls, interval): effectively filters out all DART / 300x100 etc. traffic numbers. """ - start_time, stop_time = time_range(interval) + + time_points = get_time_points(interval) + q = (Session.query(cls.date, sum(cls.pageview_count).label("sum")) .filter(cls.interval == interval) - .filter(cls.date > start_time) + .filter(cls.date.in_(time_points)) .filter(cls.codename.startswith(Link._type_prefix)) .group_by(cls.date) .order_by(desc(cls.date))) - return fill_gaps(interval, start_time, stop_time, q, "sum") + return fill_gaps(time_points, q, "sum") def total_by_codename(cls, codenames): @@ -263,11 +269,11 @@ def total_by_codename(cls, codenames): def promotion_history(cls, codename, start, stop): """Get hourly traffic for a self-serve promotion across all campaigns.""" + time_points = get_time_points('hour', start, stop) q = (Session.query(cls) .filter(cls.interval == "hour") .filter(cls.codename == codename) - .filter(cls.date >= start) - .filter(cls.date <= stop) + .filter(cls.date.in_(time_points)) .order_by(cls.date)) return [(r.date, (r.unique_count, r.pageview_count)) for r in q.all()] @@ -292,9 +298,8 @@ class SitewidePageviews(Base): @classmethod @memoize_traffic(time=3600) def history(cls, interval): - start_time, stop_time, q = make_history_query(cls, interval) - return fill_gaps(interval, start_time, stop_time, q, - "unique_count", "pageview_count") + time_points, q = make_history_query(cls, interval) + return fill_gaps(time_points, q, "unique_count", "pageview_count") class PageviewsBySubreddit(Base): @@ -309,10 +314,9 @@ class PageviewsBySubreddit(Base): @classmethod @memoize_traffic(time=3600) def history(cls, interval, subreddit): - start_time, stop_time, q = make_history_query(cls, interval) + time_points, q = make_history_query(cls, interval) q = q.filter(cls.subreddit == subreddit) - return fill_gaps(interval, start_time, stop_time, q, - "unique_count", "pageview_count") + return fill_gaps(time_points, q, "unique_count", "pageview_count") @classmethod @memoize_traffic(time=3600 * 6) @@ -342,10 +346,9 @@ class PageviewsByLanguage(Base): @classmethod @memoize_traffic(time=3600) def history(cls, interval, lang): - start_time, stop_time, q = make_history_query(cls, interval) + time_points, q = make_history_query(cls, interval) q = q.filter(cls.lang == lang) - return fill_gaps(interval, start_time, stop_time, q, - "unique_count", "pageview_count") + return fill_gaps(time_points, q, "unique_count", "pageview_count") @classmethod @memoize_traffic(time=3600 * 6) @@ -365,10 +368,9 @@ class ClickthroughsByCodename(Base): @classmethod @memoize_traffic(time=3600) def history(cls, interval, codename): - start_time, stop_time, q = make_history_query(cls, interval) + time_points, q = make_history_query(cls, interval) q = q.filter(cls.codename == codename) - return fill_gaps(interval, start_time, stop_time, q, "unique_count", - "pageview_count") + return fill_gaps(time_points, q, "unique_count", "pageview_count") @classmethod @memoize_traffic(time=3600) @@ -414,10 +416,9 @@ class AdImpressionsByCodename(Base): @classmethod @memoize_traffic(time=3600) def history(cls, interval, codename): - start_time, stop_time, q = make_history_query(cls, interval) + time_points, q = make_history_query(cls, interval) q = q.filter(cls.codename == codename) - return fill_gaps(interval, start_time, stop_time, q, - "unique_count", "pageview_count") + return fill_gaps(time_points, q, "unique_count", "pageview_count") @classmethod @memoize_traffic(time=3600) @@ -442,9 +443,9 @@ class AdImpressionsByCodename(Base): The 300x100 ads get a codename that looks like "fullname_campaign". This function gets a list of recent campaigns. """ - start_time, stop_time = time_range("day") + time_points = get_time_points('day') query = (Session.query(distinct(cls.codename).label("codename")) - .filter(cls.date > start_time) + .filter(cls.date.in_(time_points)) .filter(cls.codename.startswith(fullname))) return [row.codename for row in query] @@ -480,10 +481,9 @@ class SubscriptionsBySubreddit(Base): @classmethod @memoize_traffic(time=3600 * 6) def history(cls, interval, subreddit): - start_time, stop_time, q = make_history_query(cls, interval) + time_points, q = make_history_query(cls, interval) q = q.filter(cls.subreddit == subreddit) - return fill_gaps(interval, start_time, stop_time, q, - "subscriber_count") + return fill_gaps(time_points, q, "subscriber_count") # create the tables if they don't exist if g.db_create_tables: