From c0bff7498bc6f464c3fc591b7c780bae19f278dd Mon Sep 17 00:00:00 2001 From: umbrae Date: Thu, 29 May 2014 04:32:34 -0700 Subject: [PATCH] Support 'all' in compute_time_listings --- r2/r2/lib/mr_top.py | 14 ++++++++++++-- scripts/compute_time_listings | 8 ++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/r2/r2/lib/mr_top.py b/r2/r2/lib/mr_top.py index ec659a5e8..013e35535 100644 --- a/r2/r2/lib/mr_top.py +++ b/r2/r2/lib/mr_top.py @@ -45,9 +45,19 @@ def join_things(): mr_tools.join_things(('url', 'sr_id', 'author_id')) +def _get_cutoffs(intervals): + cutoffs = {} + for interval in intervals: + if interval == "all": + cutoffs["all"] = 0.0 + else: + cutoffs[interval] = epoch_seconds(timeago("1 %s" % interval)) + + return cutoffs + + def time_listings(intervals): - cutoff_by_interval = {interval: epoch_seconds(timeago("1 %s" % interval)) - for interval in intervals} + cutoff_by_interval = _get_cutoffs(intervals) @mr_tools.dataspec_m_thing( ("url", str), diff --git a/scripts/compute_time_listings b/scripts/compute_time_listings index ecd52c2b9..8699f6aac 100755 --- a/scripts/compute_time_listings +++ b/scripts/compute_time_listings @@ -36,10 +36,10 @@ export PGHOST=${PGHOST:-localhost} ## command line args # one of "link" or "comment" export THING_CLS="$1" -# period of data to extract from postgres: e.g. "hour", "week", "year" +# period of data to extract from postgres: e.g. "hour", "week", "year", "all" export INTERVAL="$2" # which period listings to update. -# formatted as python tuple of strings: e.g. '("hour",)' +# formatted as python tuple of strings: e.g. '("hour",)' or ("week", "all",) etc export TIMES="$3" THING_DUMP=$TMPDIR/$THING_CLS-$INTERVAL-thing.dump @@ -69,6 +69,10 @@ function reddit { paster --plugin=r2 run $REDDIT_INI $REDDIT_ROOT/r2/lib/mr_top.py -c "$1" } +# Hack to let pg fetch all things with intervals +if [ $INTERVAL = "all" ]; then + export INTERVAL="century" +fi MINID=$(run_query "SELECT thing_id FROM reddit_thing_$THING_CLS