mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-04-27 03:00:12 -04:00
Make mr_top jobs more portable.
This commit is contained in:
@@ -1,12 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
# expects two environment variables
|
||||
# REDDIT_ROOT = path to the root of the reddit public code; the directory with the Makefile
|
||||
# REDDIT_CONFIG = path to the ini file to use
|
||||
|
||||
export USER=ri
|
||||
export INI=production_batch.ini
|
||||
|
||||
# e.g. link or comment
|
||||
export KIND="$1"
|
||||
# e.g. prec01 for links, db02s8 for comments
|
||||
export LINKDBHOST="$2"
|
||||
# e.g. 5432 for default pg or 6543 for pgbouncer
|
||||
export DB_PORT=6543
|
||||
|
||||
# e.g. hour
|
||||
export INTERVAL="$3"
|
||||
@@ -14,12 +19,10 @@ export INTERVAL="$3"
|
||||
# e.g., '("hour",)'
|
||||
export TIMES="$4"
|
||||
|
||||
export PATH=/usr/local/pgsql/bin:/usr/local/bin:$HOME/bin:$PATH
|
||||
|
||||
export THING=/scratch/profile-thing-$KIND.$INTERVAL.dump
|
||||
export DTHING=/scratch/profile-data-$KIND.$INTERVAL.dump
|
||||
|
||||
cd $HOME/reddit/r2
|
||||
cd $REDDIT_ROOT
|
||||
|
||||
if [ -e $THING ]; then
|
||||
echo cannot start because $THING exists
|
||||
@@ -32,7 +35,7 @@ trap "rm -f $THING $DTHING" SIGINT SIGTERM
|
||||
# make this exist immediately to act as a lock
|
||||
touch $THING
|
||||
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST -p $DB_PORT \
|
||||
-c "\\copy (select t.thing_id, 'thing', '$KIND',
|
||||
t.ups, t.downs, t.deleted, t.spam, extract(epoch from t.date)
|
||||
from reddit_thing_$KIND t
|
||||
@@ -44,7 +47,7 @@ psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
|
||||
# get the min thing_id
|
||||
MINID=`head -n 1 $THING | awk '{print $1}'`
|
||||
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST -p $DB_PORT \
|
||||
-c "\\copy (select d.thing_id, 'data', '$KIND',
|
||||
d.key, d.value
|
||||
from reddit_data_$KIND d
|
||||
@@ -59,7 +62,7 @@ function mrsort {
|
||||
}
|
||||
|
||||
function f {
|
||||
paster --plugin=r2 run $INI r2/lib/mr_account.py -c "$1"
|
||||
paster --plugin=r2 run $REDDIT_CONFIG r2/lib/mr_account.py -c "$1"
|
||||
}
|
||||
|
||||
cat $THING $DTHING | \
|
||||
|
||||
@@ -1,21 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
# expects two environment variables
|
||||
# REDDIT_ROOT = path to the root of the reddit public code; the directory with the Makefile
|
||||
# REDDIT_CONFIG = path to the ini file to use
|
||||
|
||||
USER=ri
|
||||
LINKDBHOST=pg-05s0
|
||||
LINKDBHOST="$1"
|
||||
|
||||
# e.g. 'year'
|
||||
INTERVAL="$1"
|
||||
INTERVAL="$2"
|
||||
|
||||
# e.g. '("hour","day","week","month","year")'
|
||||
LISTINGS="$2"
|
||||
LISTINGS="$3"
|
||||
|
||||
INI=production_batch.ini
|
||||
# e.g. 5432 for default pg or 6543 for pgbouncer
|
||||
DB_PORT=6543
|
||||
|
||||
FNAME=/scratch/top-thing-links.$INTERVAL.dump
|
||||
DNAME=/scratch/top-data-links.$INTERVAL.dump
|
||||
export PATH=/usr/local/pgsql/bin:/usr/local/bin:$HOME/bin:$PATH
|
||||
|
||||
cd $HOME/reddit/r2
|
||||
cd $REDDIT_ROOT
|
||||
|
||||
if [ -e $FNAME ]; then
|
||||
echo cannot start because $FNAME existss
|
||||
@@ -28,7 +32,7 @@ trap "rm -f $FNAME $DNAME" SIGINT SIGTERM
|
||||
# make this exist immediately to act as a lock
|
||||
touch $FNAME
|
||||
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST -p $DB_PORT \
|
||||
-c "\\copy (select t.thing_id, 'thing', 'link',
|
||||
t.ups, t.downs, t.deleted, t.spam, extract(epoch from t.date)
|
||||
from reddit_thing_link t
|
||||
@@ -36,13 +40,13 @@ psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
|
||||
and t.date > now() - interval '1 $INTERVAL'
|
||||
)
|
||||
to '$FNAME'"
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
|
||||
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST -p $DB_PORT \
|
||||
-c "\\copy (select t.thing_id, 'data', 'link',
|
||||
d.key, d.value
|
||||
from reddit_data_link d, reddit_thing_link t
|
||||
where t.thing_id = d.thing_id
|
||||
and not t.spam and not t.deleted
|
||||
and (d.key = 'url' or d.key = 'sr_id')
|
||||
and d.key in ('url', 'sr_id')
|
||||
and t.date > now() - interval '1 $INTERVAL'
|
||||
) to '$DNAME'"
|
||||
|
||||
@@ -52,7 +56,7 @@ function mrsort {
|
||||
}
|
||||
|
||||
function f {
|
||||
paster --plugin=r2 run $INI r2/lib/mr_top.py -c "$1"
|
||||
paster --plugin=r2 run $REDDIT_CONFIG r2/lib/mr_top.py -c "$1"
|
||||
}
|
||||
|
||||
cat $FNAME $DNAME | \
|
||||
|
||||
Reference in New Issue
Block a user