From 157362fa6bc808da85b525ca45b8f39a91d0f3f2 Mon Sep 17 00:00:00 2001
From: bsimpson63 <bsimpson63@gmail.com>
Date: Thu, 15 Nov 2012 17:53:19 -0500
Subject: [PATCH] Specify dates we want in traffic queries.

---
 r2/r2/models/traffic.py | 122 ++++++++++++++++++++--------------------
 1 file changed, 61 insertions(+), 61 deletions(-)

diff --git a/r2/r2/models/traffic.py b/r2/r2/models/traffic.py
index f797826da..fe686d686 100644
--- a/r2/r2/models/traffic.py
+++ b/r2/r2/models/traffic.py
@@ -127,34 +127,24 @@ def decrement_month(date, amount=1):
     return date.replace(day=1)
 
 
-def fill_gaps_generator(interval, start_time, stop_time, query, *columns):
+def fill_gaps_generator(time_points, query, *columns):
     """Generate a timeseries sequence with a value for every sample expected.
 
-    Iterate backwards in steps specified by interval from the most recent date
-    (stop_time) to the oldest (start_time) and pull the columns listed out of
-    query. If the query doesn't have data for a time slice, fill the gap with
+    Iterate over specified time points and pull the columns listed out of
+    query. If the query doesn't have data for a time point, fill the gap with
     an appropriate number of zeroes.
 
     """
 
     iterator = PeekableIterator(query)
-    step = timedelta_by_name(interval)
-    current_slice = stop_time
-
-    while current_slice > start_time:
+    for t in time_points:
         row = iterator.peek()
 
-        if row and row.date == current_slice:
-            yield current_slice, tuple(getattr(row, c) for c in columns)
+        if row and row.date == t:
+            yield t, tuple(getattr(row, c) for c in columns)
             iterator.next()
         else:
-            yield current_slice, tuple(0 for c in columns)
-
-        # moving backwards a month isn't a fixed timedelta -- special case it
-        if interval != "month":
-            current_slice -= step
-        else:
-            current_slice = decrement_month(current_slice)
+            yield t, tuple(0 for c in columns)
 
 
 def fill_gaps(*args, **kwargs):
@@ -168,27 +158,42 @@ time_range_by_interval = dict(hour=datetime.timedelta(days=4),
                               month=datetime.timedelta(weeks=52))
 
 
-def time_range(interval):
-    """Calculate the time range to view for a given level of precision.
+def get_time_points(interval, start_time=None, stop_time=None):
+    """Return time points for given interval type.
 
-    The coarser our granularity, the more history we'll want to see.
+    Time points are in reverse chronological order to match the sort of
+    queries this will be used with. If start_time and stop_time are not
+    specified they will be picked based on the interval.
 
     """
 
-    # the stop time is the most recent slice-time; get this by truncating
-    # the appropriate amount from the current time
-    stop_time = datetime.datetime.utcnow()
-    stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
-    if interval in ("day", "month"):
-        stop_time = stop_time.replace(hour=0)
-    if interval == "month":
-        stop_time = stop_time.replace(day=1)
+    if start_time and stop_time:
+        start_time, stop_time = sorted([start_time, stop_time])
+    else:
+        # the stop time is the most recent slice-time; get this by truncating
+        # the appropriate amount from the current time
+        stop_time = datetime.datetime.utcnow()
+        stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
+        if interval in ("day", "month"):
+            stop_time = stop_time.replace(hour=0)
+        if interval == "month":
+            stop_time = stop_time.replace(day=1)
 
-    # then the start time is easy to work out
-    range = time_range_by_interval[interval]
-    start_time = stop_time - range
+        # then the start time is easy to work out
+        range = time_range_by_interval[interval]
+        start_time = stop_time - range
 
-    return start_time, stop_time
+    step = timedelta_by_name(interval)
+    current_time = stop_time
+    time_points = []
+
+    while current_time >= start_time:
+        time_points.append(current_time)
+        if interval != 'month':
+            current_time -= step
+        else:
+            current_time = decrement_month(current_time)
+    return time_points
 
 
 def points_for_interval(interval):
@@ -200,10 +205,9 @@ def points_for_interval(interval):
 
 def make_history_query(cls, interval):
     """Build a generic query showing the history of a given aggregate."""
-
-    start_time, stop_time = time_range(interval)
+    time_points = get_time_points(interval)
     q = (Session.query(cls)
-                .filter(cls.date >= start_time))
+                .filter(cls.date.in_(time_points)))
 
     # subscription stats doesn't have an interval (it's only daily)
     if hasattr(cls, "interval"):
@@ -211,7 +215,7 @@ def make_history_query(cls, interval):
 
     q = q.order_by(desc(cls.date))
 
-    return start_time, stop_time, q
+    return time_points, q
 
 
 def top_last_month(cls, key):
@@ -241,14 +245,16 @@ def totals(cls, interval):
     effectively filters out all DART / 300x100 etc. traffic numbers.
 
     """
-    start_time, stop_time = time_range(interval)
+
+    time_points = get_time_points(interval)
+
     q = (Session.query(cls.date, sum(cls.pageview_count).label("sum"))
                 .filter(cls.interval == interval)
-                .filter(cls.date > start_time)
+                .filter(cls.date.in_(time_points))
                 .filter(cls.codename.startswith(Link._type_prefix))
                 .group_by(cls.date)
                 .order_by(desc(cls.date)))
-    return fill_gaps(interval, start_time, stop_time, q, "sum")
+    return fill_gaps(time_points, q, "sum")
 
 
 def total_by_codename(cls, codenames):
@@ -263,11 +269,11 @@ def total_by_codename(cls, codenames):
 
 def promotion_history(cls, codename, start, stop):
     """Get hourly traffic for a self-serve promotion across all campaigns."""
+    time_points = get_time_points('hour', start, stop)
     q = (Session.query(cls)
                 .filter(cls.interval == "hour")
                 .filter(cls.codename == codename)
-                .filter(cls.date >= start)
-                .filter(cls.date <= stop)
+                .filter(cls.date.in_(time_points))
                 .order_by(cls.date))
     return [(r.date, (r.unique_count, r.pageview_count)) for r in q.all()]
 
@@ -292,9 +298,8 @@ class SitewidePageviews(Base):
     @classmethod
     @memoize_traffic(time=3600)
     def history(cls, interval):
-        start_time, stop_time, q = make_history_query(cls, interval)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        time_points, q = make_history_query(cls, interval)
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")
 
 
 class PageviewsBySubreddit(Base):
@@ -309,10 +314,9 @@ class PageviewsBySubreddit(Base):
     @classmethod
     @memoize_traffic(time=3600)
     def history(cls, interval, subreddit):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
         q = q.filter(cls.subreddit == subreddit)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")
 
     @classmethod
     @memoize_traffic(time=3600 * 6)
@@ -342,10 +346,9 @@ class PageviewsByLanguage(Base):
     @classmethod
     @memoize_traffic(time=3600)
     def history(cls, interval, lang):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
         q = q.filter(cls.lang == lang)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")
 
     @classmethod
     @memoize_traffic(time=3600 * 6)
@@ -365,10 +368,9 @@ class ClickthroughsByCodename(Base):
     @classmethod
     @memoize_traffic(time=3600)
     def history(cls, interval, codename):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
         q = q.filter(cls.codename == codename)
-        return fill_gaps(interval, start_time, stop_time, q, "unique_count",
-                                                             "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")
 
     @classmethod
     @memoize_traffic(time=3600)
@@ -414,10 +416,9 @@ class AdImpressionsByCodename(Base):
     @classmethod
     @memoize_traffic(time=3600)
     def history(cls, interval, codename):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
         q = q.filter(cls.codename == codename)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")
 
     @classmethod
     @memoize_traffic(time=3600)
@@ -442,9 +443,9 @@ class AdImpressionsByCodename(Base):
         The 300x100 ads get a codename that looks like "fullname_campaign".
         This function gets a list of recent campaigns.
         """
-        start_time, stop_time = time_range("day")
+        time_points = get_time_points('day')
         query = (Session.query(distinct(cls.codename).label("codename"))
-                        .filter(cls.date > start_time)
+                        .filter(cls.date.in_(time_points))
                         .filter(cls.codename.startswith(fullname)))
         return [row.codename for row in query]
 
@@ -480,10 +481,9 @@ class SubscriptionsBySubreddit(Base):
     @classmethod
     @memoize_traffic(time=3600 * 6)
     def history(cls, interval, subreddit):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
         q = q.filter(cls.subreddit == subreddit)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "subscriber_count")
+        return fill_gaps(time_points, q, "subscriber_count")
 
 # create the tables if they don't exist
 if g.db_create_tables: