Specify dates we want in traffic queries.

This commit is contained in:
bsimpson63
2012-11-15 17:53:19 -05:00
committed by Neil Williams
parent fee89d18af
commit 157362fa6b

View File

@@ -127,34 +127,24 @@ def decrement_month(date, amount=1):
return date.replace(day=1)
def fill_gaps_generator(interval, start_time, stop_time, query, *columns):
def fill_gaps_generator(time_points, query, *columns):
"""Generate a timeseries sequence with a value for every sample expected.
Iterate backwards in steps specified by interval from the most recent date
(stop_time) to the oldest (start_time) and pull the columns listed out of
query. If the query doesn't have data for a time slice, fill the gap with
Iterate over specified time points and pull the columns listed out of
query. If the query doesn't have data for a time point, fill the gap with
an appropriate number of zeroes.
"""
iterator = PeekableIterator(query)
step = timedelta_by_name(interval)
current_slice = stop_time
while current_slice > start_time:
for t in time_points:
row = iterator.peek()
if row and row.date == current_slice:
yield current_slice, tuple(getattr(row, c) for c in columns)
if row and row.date == t:
yield t, tuple(getattr(row, c) for c in columns)
iterator.next()
else:
yield current_slice, tuple(0 for c in columns)
# moving backwards a month isn't a fixed timedelta -- special case it
if interval != "month":
current_slice -= step
else:
current_slice = decrement_month(current_slice)
yield t, tuple(0 for c in columns)
def fill_gaps(*args, **kwargs):
@@ -168,27 +158,42 @@ time_range_by_interval = dict(hour=datetime.timedelta(days=4),
month=datetime.timedelta(weeks=52))
def time_range(interval):
"""Calculate the time range to view for a given level of precision.
def get_time_points(interval, start_time=None, stop_time=None):
"""Return time points for given interval type.
The coarser our granularity, the more history we'll want to see.
Time points are in reverse chronological order to match the sort of
queries this will be used with. If start_time and stop_time are not
specified they will be picked based on the interval.
"""
# the stop time is the most recent slice-time; get this by truncating
# the appropriate amount from the current time
stop_time = datetime.datetime.utcnow()
stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
if interval in ("day", "month"):
stop_time = stop_time.replace(hour=0)
if interval == "month":
stop_time = stop_time.replace(day=1)
if start_time and stop_time:
start_time, stop_time = sorted([start_time, stop_time])
else:
# the stop time is the most recent slice-time; get this by truncating
# the appropriate amount from the current time
stop_time = datetime.datetime.utcnow()
stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
if interval in ("day", "month"):
stop_time = stop_time.replace(hour=0)
if interval == "month":
stop_time = stop_time.replace(day=1)
# then the start time is easy to work out
range = time_range_by_interval[interval]
start_time = stop_time - range
# then the start time is easy to work out
range = time_range_by_interval[interval]
start_time = stop_time - range
return start_time, stop_time
step = timedelta_by_name(interval)
current_time = stop_time
time_points = []
while current_time >= start_time:
time_points.append(current_time)
if interval != 'month':
current_time -= step
else:
current_time = decrement_month(current_time)
return time_points
def points_for_interval(interval):
@@ -200,10 +205,9 @@ def points_for_interval(interval):
def make_history_query(cls, interval):
"""Build a generic query showing the history of a given aggregate."""
start_time, stop_time = time_range(interval)
time_points = get_time_points(interval)
q = (Session.query(cls)
.filter(cls.date >= start_time))
.filter(cls.date.in_(time_points)))
# subscription stats doesn't have an interval (it's only daily)
if hasattr(cls, "interval"):
@@ -211,7 +215,7 @@ def make_history_query(cls, interval):
q = q.order_by(desc(cls.date))
return start_time, stop_time, q
return time_points, q
def top_last_month(cls, key):
@@ -241,14 +245,16 @@ def totals(cls, interval):
effectively filters out all DART / 300x100 etc. traffic numbers.
"""
start_time, stop_time = time_range(interval)
time_points = get_time_points(interval)
q = (Session.query(cls.date, sum(cls.pageview_count).label("sum"))
.filter(cls.interval == interval)
.filter(cls.date > start_time)
.filter(cls.date.in_(time_points))
.filter(cls.codename.startswith(Link._type_prefix))
.group_by(cls.date)
.order_by(desc(cls.date)))
return fill_gaps(interval, start_time, stop_time, q, "sum")
return fill_gaps(time_points, q, "sum")
def total_by_codename(cls, codenames):
@@ -263,11 +269,11 @@ def total_by_codename(cls, codenames):
def promotion_history(cls, codename, start, stop):
"""Get hourly traffic for a self-serve promotion across all campaigns."""
time_points = get_time_points('hour', start, stop)
q = (Session.query(cls)
.filter(cls.interval == "hour")
.filter(cls.codename == codename)
.filter(cls.date >= start)
.filter(cls.date <= stop)
.filter(cls.date.in_(time_points))
.order_by(cls.date))
return [(r.date, (r.unique_count, r.pageview_count)) for r in q.all()]
@@ -292,9 +298,8 @@ class SitewidePageviews(Base):
@classmethod
@memoize_traffic(time=3600)
def history(cls, interval):
start_time, stop_time, q = make_history_query(cls, interval)
return fill_gaps(interval, start_time, stop_time, q,
"unique_count", "pageview_count")
time_points, q = make_history_query(cls, interval)
return fill_gaps(time_points, q, "unique_count", "pageview_count")
class PageviewsBySubreddit(Base):
@@ -309,10 +314,9 @@ class PageviewsBySubreddit(Base):
@classmethod
@memoize_traffic(time=3600)
def history(cls, interval, subreddit):
start_time, stop_time, q = make_history_query(cls, interval)
time_points, q = make_history_query(cls, interval)
q = q.filter(cls.subreddit == subreddit)
return fill_gaps(interval, start_time, stop_time, q,
"unique_count", "pageview_count")
return fill_gaps(time_points, q, "unique_count", "pageview_count")
@classmethod
@memoize_traffic(time=3600 * 6)
@@ -342,10 +346,9 @@ class PageviewsByLanguage(Base):
@classmethod
@memoize_traffic(time=3600)
def history(cls, interval, lang):
start_time, stop_time, q = make_history_query(cls, interval)
time_points, q = make_history_query(cls, interval)
q = q.filter(cls.lang == lang)
return fill_gaps(interval, start_time, stop_time, q,
"unique_count", "pageview_count")
return fill_gaps(time_points, q, "unique_count", "pageview_count")
@classmethod
@memoize_traffic(time=3600 * 6)
@@ -365,10 +368,9 @@ class ClickthroughsByCodename(Base):
@classmethod
@memoize_traffic(time=3600)
def history(cls, interval, codename):
start_time, stop_time, q = make_history_query(cls, interval)
time_points, q = make_history_query(cls, interval)
q = q.filter(cls.codename == codename)
return fill_gaps(interval, start_time, stop_time, q, "unique_count",
"pageview_count")
return fill_gaps(time_points, q, "unique_count", "pageview_count")
@classmethod
@memoize_traffic(time=3600)
@@ -414,10 +416,9 @@ class AdImpressionsByCodename(Base):
@classmethod
@memoize_traffic(time=3600)
def history(cls, interval, codename):
start_time, stop_time, q = make_history_query(cls, interval)
time_points, q = make_history_query(cls, interval)
q = q.filter(cls.codename == codename)
return fill_gaps(interval, start_time, stop_time, q,
"unique_count", "pageview_count")
return fill_gaps(time_points, q, "unique_count", "pageview_count")
@classmethod
@memoize_traffic(time=3600)
@@ -442,9 +443,9 @@ class AdImpressionsByCodename(Base):
The 300x100 ads get a codename that looks like "fullname_campaign".
This function gets a list of recent campaigns.
"""
start_time, stop_time = time_range("day")
time_points = get_time_points('day')
query = (Session.query(distinct(cls.codename).label("codename"))
.filter(cls.date > start_time)
.filter(cls.date.in_(time_points))
.filter(cls.codename.startswith(fullname)))
return [row.codename for row in query]
@@ -480,10 +481,9 @@ class SubscriptionsBySubreddit(Base):
@classmethod
@memoize_traffic(time=3600 * 6)
def history(cls, interval, subreddit):
start_time, stop_time, q = make_history_query(cls, interval)
time_points, q = make_history_query(cls, interval)
q = q.filter(cls.subreddit == subreddit)
return fill_gaps(interval, start_time, stop_time, q,
"subscriber_count")
return fill_gaps(time_points, q, "subscriber_count")
# create the tables if they don't exist
if g.db_create_tables: