mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-30 09:18:20 -05:00
76 lines
1.9 KiB
Bash
Executable File
76 lines
1.9 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# expects two environment variables
|
|
# REDDIT_ROOT = path to the root of the reddit public code; the directory with the Makefile
|
|
# REDDIT_CONFIG = path to the ini file to use
|
|
|
|
export USER=ri
|
|
|
|
# e.g. link or comment
|
|
export KIND="$1"
|
|
# e.g. prec01 for links, db02s8 for comments
|
|
export LINKDBHOST="$2"
|
|
# e.g. 5432 for default pg or 6543 for pgbouncer
|
|
export DB_PORT=6543
|
|
|
|
# e.g. hour
|
|
export INTERVAL="$3"
|
|
|
|
# e.g., '("hour",)'
|
|
export TIMES="$4"
|
|
|
|
export THING=/scratch/profile-thing-$KIND.$INTERVAL.dump
|
|
export DTHING=/scratch/profile-data-$KIND.$INTERVAL.dump
|
|
|
|
cd $REDDIT_ROOT
|
|
|
|
if [ -e $THING ]; then
|
|
echo cannot start because $THING exists
|
|
ls -l $THING
|
|
exit 1
|
|
fi
|
|
|
|
trap "rm -f $THING $DTHING" SIGINT SIGTERM
|
|
|
|
# make this exist immediately to act as a lock
|
|
touch $THING
|
|
|
|
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST -p $DB_PORT \
|
|
-c "\\copy (select t.thing_id, 'thing', '$KIND',
|
|
t.ups, t.downs, t.deleted, t.spam, extract(epoch from t.date)
|
|
from reddit_thing_$KIND t
|
|
where not t.deleted
|
|
and t.date > now() - interval '1 $INTERVAL'
|
|
)
|
|
to '$THING'"
|
|
|
|
# get the min thing_id
|
|
MINID=`head -n 1 $THING | awk '{print $1}'`
|
|
|
|
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST -p $DB_PORT \
|
|
-c "\\copy (select d.thing_id, 'data', '$KIND',
|
|
d.key, d.value
|
|
from reddit_data_$KIND d
|
|
where d.thing_id >= $MINID
|
|
and d.key = 'author_id'
|
|
)
|
|
to '$DTHING'"
|
|
|
|
function mrsort {
|
|
#psort -T/mnt/tmp -S50m
|
|
sort -T/scratch -S200m
|
|
}
|
|
|
|
function f {
|
|
paster --plugin=r2 run $REDDIT_CONFIG r2/lib/mr_account.py -c "$1"
|
|
}
|
|
|
|
cat $THING $DTHING | \
|
|
mrsort | \
|
|
f "join_links()" | \
|
|
f "time_listings($TIMES)" | \
|
|
mrsort | \
|
|
f "write_permacache()"
|
|
|
|
rm $THING $DTHING
|