mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-11 16:07:56 -05:00
28 lines
862 B
Bash
Executable File
28 lines
862 B
Bash
Executable File
#!/bin/bash
|
|
# This is an example of how regenerating the query cache
|
|
# works. You will need to configure several things first:
|
|
#
|
|
# * configure postgres environment, see run-query.sh
|
|
# * prepare pig:
|
|
# * make sure the environment is set up for pig
|
|
# * rearrange the values in TypeIDs.java to match your
|
|
# instance's IDs
|
|
# * compile the pig UDFs (ant build in udfs/)
|
|
# * ensure the Cassandra JARs are on the CLASSPATH
|
|
|
|
# dump the relevant postgres tables to input/
|
|
./dump-all.sh
|
|
|
|
# process the postgres dumps in hadoop and generate
|
|
# output data suitable for cassandra
|
|
pig regenerate-query-cache.py
|
|
|
|
# for each column family we'll be writing to, generate
|
|
# sstables from the map-reduce output
|
|
for $cf in $(ls output/); do
|
|
jython tuples_to_sstables.py $cf output/$cf/*/part*
|
|
done
|
|
|
|
# bulk-load the sstables into cassandra
|
|
sstableloader reddit/
|