Specify dates we want in traffic queries.

2026-01-23 22:08:11 -05:00 · 2012-11-15 17:53:19 -05:00
parent fee89d18af
commit 157362fa6b
1 changed files with 61 additions and 61 deletions
--- a/r2/r2/models/traffic.py
+++ b/r2/r2/models/traffic.py
@@ -127,34 +127,24 @@ def decrement_month(date, amount=1):
    return date.replace(day=1)


-def fill_gaps_generator(interval, start_time, stop_time, query, *columns):
+def fill_gaps_generator(time_points, query, *columns):
    """Generate a timeseries sequence with a value for every sample expected.

-    Iterate backwards in steps specified by interval from the most recent date
-    (stop_time) to the oldest (start_time) and pull the columns listed out of
-    query. If the query doesn't have data for a time slice, fill the gap with
+    Iterate over specified time points and pull the columns listed out of
+    query. If the query doesn't have data for a time point, fill the gap with
    an appropriate number of zeroes.

    """

    iterator = PeekableIterator(query)
-    step = timedelta_by_name(interval)
-    current_slice = stop_time
-
-    while current_slice > start_time:
+    for t in time_points:
        row = iterator.peek()

-        if row and row.date == current_slice:
-            yield current_slice, tuple(getattr(row, c) for c in columns)
+        if row and row.date == t:
+            yield t, tuple(getattr(row, c) for c in columns)
            iterator.next()
        else:
-            yield current_slice, tuple(0 for c in columns)
-
-        # moving backwards a month isn't a fixed timedelta -- special case it
-        if interval != "month":
-            current_slice -= step
-        else:
-            current_slice = decrement_month(current_slice)
+            yield t, tuple(0 for c in columns)


 def fill_gaps(*args, **kwargs):
@@ -168,27 +158,42 @@ time_range_by_interval = dict(hour=datetime.timedelta(days=4),
                              month=datetime.timedelta(weeks=52))


-def time_range(interval):
-    """Calculate the time range to view for a given level of precision.
+def get_time_points(interval, start_time=None, stop_time=None):
+    """Return time points for given interval type.

-    The coarser our granularity, the more history we'll want to see.
+    Time points are in reverse chronological order to match the sort of
+    queries this will be used with. If start_time and stop_time are not
+    specified they will be picked based on the interval.

    """

-    # the stop time is the most recent slice-time; get this by truncating
-    # the appropriate amount from the current time
-    stop_time = datetime.datetime.utcnow()
-    stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
-    if interval in ("day", "month"):
-        stop_time = stop_time.replace(hour=0)
-    if interval == "month":
-        stop_time = stop_time.replace(day=1)
+    if start_time and stop_time:
+        start_time, stop_time = sorted([start_time, stop_time])
+    else:
+        # the stop time is the most recent slice-time; get this by truncating
+        # the appropriate amount from the current time
+        stop_time = datetime.datetime.utcnow()
+        stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
+        if interval in ("day", "month"):
+            stop_time = stop_time.replace(hour=0)
+        if interval == "month":
+            stop_time = stop_time.replace(day=1)

-    # then the start time is easy to work out
-    range = time_range_by_interval[interval]
-    start_time = stop_time - range
+        # then the start time is easy to work out
+        range = time_range_by_interval[interval]
+        start_time = stop_time - range

-    return start_time, stop_time
+    step = timedelta_by_name(interval)
+    current_time = stop_time
+    time_points = []
+
+    while current_time >= start_time:
+        time_points.append(current_time)
+        if interval != 'month':
+            current_time -= step
+        else:
+            current_time = decrement_month(current_time)
+    return time_points


 def points_for_interval(interval):
@@ -200,10 +205,9 @@ def points_for_interval(interval):

 def make_history_query(cls, interval):
    """Build a generic query showing the history of a given aggregate."""
-
-    start_time, stop_time = time_range(interval)
+    time_points = get_time_points(interval)
    q = (Session.query(cls)
-                .filter(cls.date >= start_time))
+                .filter(cls.date.in_(time_points)))

    # subscription stats doesn't have an interval (it's only daily)
    if hasattr(cls, "interval"):
@@ -211,7 +215,7 @@ def make_history_query(cls, interval):

    q = q.order_by(desc(cls.date))

-    return start_time, stop_time, q
+    return time_points, q


 def top_last_month(cls, key):
@@ -241,14 +245,16 @@ def totals(cls, interval):
    effectively filters out all DART / 300x100 etc. traffic numbers.

    """
-    start_time, stop_time = time_range(interval)
+
+    time_points = get_time_points(interval)
+
    q = (Session.query(cls.date, sum(cls.pageview_count).label("sum"))
                .filter(cls.interval == interval)
-                .filter(cls.date > start_time)
+                .filter(cls.date.in_(time_points))
                .filter(cls.codename.startswith(Link._type_prefix))
                .group_by(cls.date)
                .order_by(desc(cls.date)))
-    return fill_gaps(interval, start_time, stop_time, q, "sum")
+    return fill_gaps(time_points, q, "sum")


 def total_by_codename(cls, codenames):
@@ -263,11 +269,11 @@ def total_by_codename(cls, codenames):

 def promotion_history(cls, codename, start, stop):
    """Get hourly traffic for a self-serve promotion across all campaigns."""
+    time_points = get_time_points('hour', start, stop)
    q = (Session.query(cls)
                .filter(cls.interval == "hour")
                .filter(cls.codename == codename)
-                .filter(cls.date >= start)
-                .filter(cls.date <= stop)
+                .filter(cls.date.in_(time_points))
                .order_by(cls.date))
    return [(r.date, (r.unique_count, r.pageview_count)) for r in q.all()]

@@ -292,9 +298,8 @@ class SitewidePageviews(Base):
    @classmethod
    @memoize_traffic(time=3600)
    def history(cls, interval):
-        start_time, stop_time, q = make_history_query(cls, interval)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        time_points, q = make_history_query(cls, interval)
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")


 class PageviewsBySubreddit(Base):
@@ -309,10 +314,9 @@ class PageviewsBySubreddit(Base):
    @classmethod
    @memoize_traffic(time=3600)
    def history(cls, interval, subreddit):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
        q = q.filter(cls.subreddit == subreddit)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")

    @classmethod
    @memoize_traffic(time=3600 * 6)
@@ -342,10 +346,9 @@ class PageviewsByLanguage(Base):
    @classmethod
    @memoize_traffic(time=3600)
    def history(cls, interval, lang):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
        q = q.filter(cls.lang == lang)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")

    @classmethod
    @memoize_traffic(time=3600 * 6)
@@ -365,10 +368,9 @@ class ClickthroughsByCodename(Base):
    @classmethod
    @memoize_traffic(time=3600)
    def history(cls, interval, codename):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
        q = q.filter(cls.codename == codename)
-        return fill_gaps(interval, start_time, stop_time, q, "unique_count",
-                                                             "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")

    @classmethod
    @memoize_traffic(time=3600)
@@ -414,10 +416,9 @@ class AdImpressionsByCodename(Base):
    @classmethod
    @memoize_traffic(time=3600)
    def history(cls, interval, codename):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
        q = q.filter(cls.codename == codename)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "unique_count", "pageview_count")
+        return fill_gaps(time_points, q, "unique_count", "pageview_count")

    @classmethod
    @memoize_traffic(time=3600)
@@ -442,9 +443,9 @@ class AdImpressionsByCodename(Base):
        The 300x100 ads get a codename that looks like "fullname_campaign".
        This function gets a list of recent campaigns.
        """
-        start_time, stop_time = time_range("day")
+        time_points = get_time_points('day')
        query = (Session.query(distinct(cls.codename).label("codename"))
-                        .filter(cls.date > start_time)
+                        .filter(cls.date.in_(time_points))
                        .filter(cls.codename.startswith(fullname)))
        return [row.codename for row in query]

@@ -480,10 +481,9 @@ class SubscriptionsBySubreddit(Base):
    @classmethod
    @memoize_traffic(time=3600 * 6)
    def history(cls, interval, subreddit):
-        start_time, stop_time, q = make_history_query(cls, interval)
+        time_points, q = make_history_query(cls, interval)
        q = q.filter(cls.subreddit == subreddit)
-        return fill_gaps(interval, start_time, stop_time, q,
-                         "subscriber_count")
+        return fill_gaps(time_points, q, "subscriber_count")

 # create the tables if they don't exist
 if g.db_create_tables: