mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-23 22:08:11 -05:00
Specify dates we want in traffic queries.
This commit is contained in:
committed by
Neil Williams
parent
fee89d18af
commit
157362fa6b
@@ -127,34 +127,24 @@ def decrement_month(date, amount=1):
|
||||
return date.replace(day=1)
|
||||
|
||||
|
||||
def fill_gaps_generator(interval, start_time, stop_time, query, *columns):
|
||||
def fill_gaps_generator(time_points, query, *columns):
|
||||
"""Generate a timeseries sequence with a value for every sample expected.
|
||||
|
||||
Iterate backwards in steps specified by interval from the most recent date
|
||||
(stop_time) to the oldest (start_time) and pull the columns listed out of
|
||||
query. If the query doesn't have data for a time slice, fill the gap with
|
||||
Iterate over specified time points and pull the columns listed out of
|
||||
query. If the query doesn't have data for a time point, fill the gap with
|
||||
an appropriate number of zeroes.
|
||||
|
||||
"""
|
||||
|
||||
iterator = PeekableIterator(query)
|
||||
step = timedelta_by_name(interval)
|
||||
current_slice = stop_time
|
||||
|
||||
while current_slice > start_time:
|
||||
for t in time_points:
|
||||
row = iterator.peek()
|
||||
|
||||
if row and row.date == current_slice:
|
||||
yield current_slice, tuple(getattr(row, c) for c in columns)
|
||||
if row and row.date == t:
|
||||
yield t, tuple(getattr(row, c) for c in columns)
|
||||
iterator.next()
|
||||
else:
|
||||
yield current_slice, tuple(0 for c in columns)
|
||||
|
||||
# moving backwards a month isn't a fixed timedelta -- special case it
|
||||
if interval != "month":
|
||||
current_slice -= step
|
||||
else:
|
||||
current_slice = decrement_month(current_slice)
|
||||
yield t, tuple(0 for c in columns)
|
||||
|
||||
|
||||
def fill_gaps(*args, **kwargs):
|
||||
@@ -168,27 +158,42 @@ time_range_by_interval = dict(hour=datetime.timedelta(days=4),
|
||||
month=datetime.timedelta(weeks=52))
|
||||
|
||||
|
||||
def time_range(interval):
|
||||
"""Calculate the time range to view for a given level of precision.
|
||||
def get_time_points(interval, start_time=None, stop_time=None):
|
||||
"""Return time points for given interval type.
|
||||
|
||||
The coarser our granularity, the more history we'll want to see.
|
||||
Time points are in reverse chronological order to match the sort of
|
||||
queries this will be used with. If start_time and stop_time are not
|
||||
specified they will be picked based on the interval.
|
||||
|
||||
"""
|
||||
|
||||
# the stop time is the most recent slice-time; get this by truncating
|
||||
# the appropriate amount from the current time
|
||||
stop_time = datetime.datetime.utcnow()
|
||||
stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
|
||||
if interval in ("day", "month"):
|
||||
stop_time = stop_time.replace(hour=0)
|
||||
if interval == "month":
|
||||
stop_time = stop_time.replace(day=1)
|
||||
if start_time and stop_time:
|
||||
start_time, stop_time = sorted([start_time, stop_time])
|
||||
else:
|
||||
# the stop time is the most recent slice-time; get this by truncating
|
||||
# the appropriate amount from the current time
|
||||
stop_time = datetime.datetime.utcnow()
|
||||
stop_time = stop_time.replace(minute=0, second=0, microsecond=0)
|
||||
if interval in ("day", "month"):
|
||||
stop_time = stop_time.replace(hour=0)
|
||||
if interval == "month":
|
||||
stop_time = stop_time.replace(day=1)
|
||||
|
||||
# then the start time is easy to work out
|
||||
range = time_range_by_interval[interval]
|
||||
start_time = stop_time - range
|
||||
# then the start time is easy to work out
|
||||
range = time_range_by_interval[interval]
|
||||
start_time = stop_time - range
|
||||
|
||||
return start_time, stop_time
|
||||
step = timedelta_by_name(interval)
|
||||
current_time = stop_time
|
||||
time_points = []
|
||||
|
||||
while current_time >= start_time:
|
||||
time_points.append(current_time)
|
||||
if interval != 'month':
|
||||
current_time -= step
|
||||
else:
|
||||
current_time = decrement_month(current_time)
|
||||
return time_points
|
||||
|
||||
|
||||
def points_for_interval(interval):
|
||||
@@ -200,10 +205,9 @@ def points_for_interval(interval):
|
||||
|
||||
def make_history_query(cls, interval):
|
||||
"""Build a generic query showing the history of a given aggregate."""
|
||||
|
||||
start_time, stop_time = time_range(interval)
|
||||
time_points = get_time_points(interval)
|
||||
q = (Session.query(cls)
|
||||
.filter(cls.date >= start_time))
|
||||
.filter(cls.date.in_(time_points)))
|
||||
|
||||
# subscription stats doesn't have an interval (it's only daily)
|
||||
if hasattr(cls, "interval"):
|
||||
@@ -211,7 +215,7 @@ def make_history_query(cls, interval):
|
||||
|
||||
q = q.order_by(desc(cls.date))
|
||||
|
||||
return start_time, stop_time, q
|
||||
return time_points, q
|
||||
|
||||
|
||||
def top_last_month(cls, key):
|
||||
@@ -241,14 +245,16 @@ def totals(cls, interval):
|
||||
effectively filters out all DART / 300x100 etc. traffic numbers.
|
||||
|
||||
"""
|
||||
start_time, stop_time = time_range(interval)
|
||||
|
||||
time_points = get_time_points(interval)
|
||||
|
||||
q = (Session.query(cls.date, sum(cls.pageview_count).label("sum"))
|
||||
.filter(cls.interval == interval)
|
||||
.filter(cls.date > start_time)
|
||||
.filter(cls.date.in_(time_points))
|
||||
.filter(cls.codename.startswith(Link._type_prefix))
|
||||
.group_by(cls.date)
|
||||
.order_by(desc(cls.date)))
|
||||
return fill_gaps(interval, start_time, stop_time, q, "sum")
|
||||
return fill_gaps(time_points, q, "sum")
|
||||
|
||||
|
||||
def total_by_codename(cls, codenames):
|
||||
@@ -263,11 +269,11 @@ def total_by_codename(cls, codenames):
|
||||
|
||||
def promotion_history(cls, codename, start, stop):
|
||||
"""Get hourly traffic for a self-serve promotion across all campaigns."""
|
||||
time_points = get_time_points('hour', start, stop)
|
||||
q = (Session.query(cls)
|
||||
.filter(cls.interval == "hour")
|
||||
.filter(cls.codename == codename)
|
||||
.filter(cls.date >= start)
|
||||
.filter(cls.date <= stop)
|
||||
.filter(cls.date.in_(time_points))
|
||||
.order_by(cls.date))
|
||||
return [(r.date, (r.unique_count, r.pageview_count)) for r in q.all()]
|
||||
|
||||
@@ -292,9 +298,8 @@ class SitewidePageviews(Base):
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600)
|
||||
def history(cls, interval):
|
||||
start_time, stop_time, q = make_history_query(cls, interval)
|
||||
return fill_gaps(interval, start_time, stop_time, q,
|
||||
"unique_count", "pageview_count")
|
||||
time_points, q = make_history_query(cls, interval)
|
||||
return fill_gaps(time_points, q, "unique_count", "pageview_count")
|
||||
|
||||
|
||||
class PageviewsBySubreddit(Base):
|
||||
@@ -309,10 +314,9 @@ class PageviewsBySubreddit(Base):
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600)
|
||||
def history(cls, interval, subreddit):
|
||||
start_time, stop_time, q = make_history_query(cls, interval)
|
||||
time_points, q = make_history_query(cls, interval)
|
||||
q = q.filter(cls.subreddit == subreddit)
|
||||
return fill_gaps(interval, start_time, stop_time, q,
|
||||
"unique_count", "pageview_count")
|
||||
return fill_gaps(time_points, q, "unique_count", "pageview_count")
|
||||
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600 * 6)
|
||||
@@ -342,10 +346,9 @@ class PageviewsByLanguage(Base):
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600)
|
||||
def history(cls, interval, lang):
|
||||
start_time, stop_time, q = make_history_query(cls, interval)
|
||||
time_points, q = make_history_query(cls, interval)
|
||||
q = q.filter(cls.lang == lang)
|
||||
return fill_gaps(interval, start_time, stop_time, q,
|
||||
"unique_count", "pageview_count")
|
||||
return fill_gaps(time_points, q, "unique_count", "pageview_count")
|
||||
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600 * 6)
|
||||
@@ -365,10 +368,9 @@ class ClickthroughsByCodename(Base):
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600)
|
||||
def history(cls, interval, codename):
|
||||
start_time, stop_time, q = make_history_query(cls, interval)
|
||||
time_points, q = make_history_query(cls, interval)
|
||||
q = q.filter(cls.codename == codename)
|
||||
return fill_gaps(interval, start_time, stop_time, q, "unique_count",
|
||||
"pageview_count")
|
||||
return fill_gaps(time_points, q, "unique_count", "pageview_count")
|
||||
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600)
|
||||
@@ -414,10 +416,9 @@ class AdImpressionsByCodename(Base):
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600)
|
||||
def history(cls, interval, codename):
|
||||
start_time, stop_time, q = make_history_query(cls, interval)
|
||||
time_points, q = make_history_query(cls, interval)
|
||||
q = q.filter(cls.codename == codename)
|
||||
return fill_gaps(interval, start_time, stop_time, q,
|
||||
"unique_count", "pageview_count")
|
||||
return fill_gaps(time_points, q, "unique_count", "pageview_count")
|
||||
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600)
|
||||
@@ -442,9 +443,9 @@ class AdImpressionsByCodename(Base):
|
||||
The 300x100 ads get a codename that looks like "fullname_campaign".
|
||||
This function gets a list of recent campaigns.
|
||||
"""
|
||||
start_time, stop_time = time_range("day")
|
||||
time_points = get_time_points('day')
|
||||
query = (Session.query(distinct(cls.codename).label("codename"))
|
||||
.filter(cls.date > start_time)
|
||||
.filter(cls.date.in_(time_points))
|
||||
.filter(cls.codename.startswith(fullname)))
|
||||
return [row.codename for row in query]
|
||||
|
||||
@@ -480,10 +481,9 @@ class SubscriptionsBySubreddit(Base):
|
||||
@classmethod
|
||||
@memoize_traffic(time=3600 * 6)
|
||||
def history(cls, interval, subreddit):
|
||||
start_time, stop_time, q = make_history_query(cls, interval)
|
||||
time_points, q = make_history_query(cls, interval)
|
||||
q = q.filter(cls.subreddit == subreddit)
|
||||
return fill_gaps(interval, start_time, stop_time, q,
|
||||
"subscriber_count")
|
||||
return fill_gaps(time_points, q, "subscriber_count")
|
||||
|
||||
# create the tables if they don't exist
|
||||
if g.db_create_tables:
|
||||
|
||||
Reference in New Issue
Block a user