Support 'all' in compute_time_listings

This commit is contained in:
umbrae
2014-05-29 04:32:34 -07:00
parent b840c6e197
commit c0bff7498b
2 changed files with 18 additions and 4 deletions

View File

@@ -45,9 +45,19 @@ def join_things():
mr_tools.join_things(('url', 'sr_id', 'author_id'))
def _get_cutoffs(intervals):
cutoffs = {}
for interval in intervals:
if interval == "all":
cutoffs["all"] = 0.0
else:
cutoffs[interval] = epoch_seconds(timeago("1 %s" % interval))
return cutoffs
def time_listings(intervals):
cutoff_by_interval = {interval: epoch_seconds(timeago("1 %s" % interval))
for interval in intervals}
cutoff_by_interval = _get_cutoffs(intervals)
@mr_tools.dataspec_m_thing(
("url", str),

View File

@@ -36,10 +36,10 @@ export PGHOST=${PGHOST:-localhost}
## command line args
# one of "link" or "comment"
export THING_CLS="$1"
# period of data to extract from postgres: e.g. "hour", "week", "year"
# period of data to extract from postgres: e.g. "hour", "week", "year", "all"
export INTERVAL="$2"
# which period listings to update.
# formatted as python tuple of strings: e.g. '("hour",)'
# formatted as python tuple of strings: e.g. '("hour",)' or ("week", "all",) etc
export TIMES="$3"
THING_DUMP=$TMPDIR/$THING_CLS-$INTERVAL-thing.dump
@@ -69,6 +69,10 @@ function reddit {
paster --plugin=r2 run $REDDIT_INI $REDDIT_ROOT/r2/lib/mr_top.py -c "$1"
}
# Hack to let pg fetch all things with intervals
if [ $INTERVAL = "all" ]; then
export INTERVAL="century"
fi
MINID=$(run_query "SELECT thing_id
FROM reddit_thing_$THING_CLS