Restore old runscripts for backwards compatibility.

These are still deprecated, but they are necessary to keep running if you installed before the Natty installer.
2026-04-05 03:00:15 -04:00 · 2011-09-20 12:26:22 -07:00
parent bf6c671551
commit de2102e975
12 changed files with 576 additions and 0 deletions
--- a/config/cassandra/cassandra.yaml
+++ b/config/cassandra/cassandra.yaml
@@ -0,0 +1,460 @@
+# Cassandra storage config YAML 
+
+# NOTE:
+#   See http://wiki.apache.org/cassandra/StorageConfiguration for
+#   full explanations of configuration directives
+# /NOTE
+
+# The name of the cluster. This is mainly used to prevent machines in
+# one logical cluster from joining another.
+cluster_name: 'reddit'
+
+# You should always specify InitialToken when setting up a production
+# cluster for the first time, and often when adding capacity later.
+# The principle is that each node should be given an equal slice of
+# the token ring; see http://wiki.apache.org/cassandra/Operations
+# for more details.
+#
+# If blank, Cassandra will request a token bisecting the range of
+# the heaviest-loaded existing node.  If there is no load information
+# available, such as is the case with a new cluster, it will pick
+# a random token, which will lead to hot spots.
+initial_token:
+
+# Set to true to make new [non-seed] nodes automatically migrate data
+# to themselves from the pre-existing nodes in the cluster.  Defaults
+# to false because you can only bootstrap N machines at a time from
+# an existing cluster of N, so if you are bringing up a cluster of
+# 10 machines with 3 seeds you would have to do it in stages.  Leaving
+# this off for the initial start simplifies that.
+auto_bootstrap: false
+
+# See http://wiki.apache.org/cassandra/HintedHandoff
+hinted_handoff_enabled: true
+
+# authentication backend, implementing IAuthenticator; used to identify users
+authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
+
+# authorization backend, implementing IAuthority; used to limit access/provide permissions
+authority: org.apache.cassandra.auth.AllowAllAuthority
+
+# The partitioner is responsible for distributing rows (by key) across
+# nodes in the cluster.  Any IPartitioner may be used, including your
+# own as long as it is on the classpath.  Out of the box, Cassandra
+# provides org.apache.cassandra.dht.RandomPartitioner
+# org.apache.cassandra.dht.ByteOrderedPartitioner,
+# org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated),
+# and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner
+# (deprecated).
+# 
+# - RandomPartitioner distributes rows across the cluster evenly by md5.
+#   When in doubt, this is the best option.
+# - ByteOrderedPartitioner orders rows lexically by key bytes.  BOP allows
+#   scanning rows in key order, but the ordering can generate hot spots
+#   for sequential insertion workloads.
+# - OrderPreservingPartitioner is an obsolete form of BOP, that stores
+# - keys in a less-efficient format and only works with keys that are
+#   UTF8-encoded Strings.
+# - CollatingOPP colates according to EN,US rules rather than lexical byte
+#   ordering.  Use this as an example if you need custom collation.
+#
+# See http://wiki.apache.org/cassandra/Operations for more on
+# partitioners and token selection.
+partitioner: org.apache.cassandra.dht.RandomPartitioner
+
+# directories where Cassandra should store data on disk.
+data_file_directories:
+    - /cassandra/data
+
+# commit log
+commitlog_directory: /cassandra/commitlog
+
+# saved caches
+saved_caches_directory: /cassandra/saved_caches
+
+# Size to allow commitlog to grow to before creating a new segment 
+commitlog_rotation_threshold_in_mb: 128
+
+# commitlog_sync may be either "periodic" or "batch." 
+# When in batch mode, Cassandra won't ack writes until the commit log
+# has been fsynced to disk.  It will wait up to
+# CommitLogSyncBatchWindowInMS milliseconds for other writes, before
+# performing the sync.
+commitlog_sync: periodic
+
+# the other option is "timed," where writes may be acked immediately
+# and the CommitLog is simply synced every commitlog_sync_period_in_ms
+# milliseconds.
+commitlog_sync_period_in_ms: 10000
+
+# Addresses of hosts that are deemed contact points.
+# Cassandra nodes use this list of hosts to find each other and learn
+# the topology of the ring.  You must change this if you are running
+# multiple nodes!
+seeds:
+    - 127.0.0.1
+
+# Access mode.  mmapped i/o is substantially faster, but only practical on
+# a 64bit machine (which notably does not include EC2 "small" instances)
+# or relatively small datasets.  "auto", the safe choice, will enable
+# mmapping on a 64bit JVM.  Other values are "mmap", "mmap_index_only"
+# (which may allow you to get part of the benefits of mmap on a 32bit
+# machine by mmapping only index files) and "standard".
+# (The buffer size settings that follow only apply to standard,
+# non-mmapped i/o.)
+disk_access_mode: mmap_index_only
+
+# Unlike most systems, in Cassandra writes are faster than reads, so
+# you can afford more of those in parallel.  A good rule of thumb is 2
+# concurrent reads per processor core.  Increase ConcurrentWrites to
+# the number of clients writing at once if you enable CommitLogSync +
+# CommitLogSyncDelay. -->
+concurrent_reads: 8
+concurrent_writes: 32
+
+# This sets the amount of memtable flush writer threads.  These will
+# be blocked by disk io, and each one will hold a memtable in memory
+# while blocked. If you have a large heap and many data directories,
+# you can increase this value for better flush performance.
+# By default this will be set to the amount of data directories defined.
+#memtable_flush_writers: 1
+
+# Buffer size to use when performing contiguous column slices. 
+# Increase this to the size of the column slices you typically perform
+sliced_buffer_size_in_kb: 64
+
+# TCP port, for commands and data
+storage_port: 7000
+
+# Address to bind to and tell other Cassandra nodes to connect to. You
+# _must_ change this if you want multiple nodes to be able to
+# communicate!
+# 
+# Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+# will always do the Right Thing *if* the node is properly configured
+# (hostname, name resolution, etc), and the Right Thing is to use the
+# address associated with the hostname (it might not be).
+#
+# Setting this to 0.0.0.0 is always wrong.
+listen_address: 127.0.0.1
+
+# The address to bind the Thrift RPC service to -- clients connect
+# here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
+# you want Thrift to listen on all interfaces.
+# 
+# Leaving this blank has the same effect it does for ListenAddress,
+# (i.e. it will be based on the configured hostname of the node).
+rpc_address: 0.0.0.0
+# port for Thrift to listen for clients on
+rpc_port: 9160
+
+# enable or disable keepalive on rpc connections
+rpc_keepalive: true
+
+# uncomment to set socket buffer sizes on rpc connections
+# rpc_send_buff_size_in_bytes:
+# rpc_recv_buff_size_in_bytes:
+
+# Frame size for thrift (maximum field length).
+# 0 disables TFramedTransport in favor of TSocket. This option
+# is deprecated; we strongly recommend using Framed mode.
+thrift_framed_transport_size_in_mb: 15
+
+# The max length of a thrift message, including all fields and
+# internal thrift overhead.
+thrift_max_message_length_in_mb: 16
+
+# Whether or not to take a snapshot before each compaction.  Be
+# careful using this option, since Cassandra won't clean up the
+# snapshots for you.  Mostly useful if you're paranoid when there
+# is a data format change.
+snapshot_before_compaction: false
+
+# change this to increase the compaction thread's priority.  In java, 1 is the
+# lowest priority and that is our default.
+# compaction_thread_priority: 1
+
+# The threshold size in megabytes the binary memtable must grow to,
+# before it's submitted for flushing to disk.
+binary_memtable_throughput_in_mb: 256
+
+# Add column indexes to a row after its contents reach this size.
+# Increase if your column values are large, or if you have a very large
+# number of columns.  The competing causes are, Cassandra has to
+# deserialize this much of the row to read a single column, so you want
+# it to be small - at least if you do many partial-row reads - but all
+# the index data is read for each access, so you don't want to generate
+# that wastefully either.
+column_index_size_in_kb: 64
+
+# Size limit for rows being compacted in memory.  Larger rows will spill
+# over to disk and use a slower two-pass compaction process.  A message
+# will be logged specifying the row key.
+in_memory_compaction_limit_in_mb: 64
+
+# Time to wait for a reply from other nodes before failing the command 
+rpc_timeout_in_ms: 20000
+
+# phi value that must be reached for a host to be marked down.
+# most users should never need to adjust this.
+phi_convict_threshold: 10
+
+# endpoint_snitch -- Set this to a class that implements
+# IEndpointSnitch, which will let Cassandra know enough
+# about your network topology to route requests efficiently.
+# Out of the box, Cassandra provides
+#  - org.apache.cassandra.locator.SimpleSnitch:
+#    Treats Strategy order as proximity. This improves cache locality
+#    when disabling read repair, which can further improve throughput.
+#  - org.apache.cassandra.locator.RackInferringSnitch:
+#    Proximity is determined by rack and data center, which are
+#    assumed to correspond to the 3rd and 2nd octet of each node's
+#    IP address, respectively
+# org.apache.cassandra.locator.PropertyFileSnitch:
+#  - Proximity is determined by rack and data center, which are
+#    explicitly configured in cassandra-topology.properties.
+endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
+
+# dynamic_snitch -- This boolean controls whether the above snitch is
+# wrapped with a dynamic snitch, which will monitor read latencies
+# and avoid reading from hosts that have slowed (due to compaction,
+# for instance)
+dynamic_snitch: true
+# controls how often to perform the more expensive part of host score
+# calculation
+dynamic_snitch_update_interval_in_ms: 100 
+# controls how often to reset all host scores, allowing a bad host to
+# possibly recover
+dynamic_snitch_reset_interval_in_ms: 600000
+# if set greater than zero and read_repair_chance is < 1.0, this will allow
+# 'pinning' of replicas to hosts in order to increase cache capacity.
+# The badness threshold will control how much worse the pinned host has to be
+# before the dynamic snitch will prefer other replicas over it.  This is
+# expressed as a double which represents a percentage.  Thus, a value of
+# 0.2 means Cassandra would continue to prefer the static snitch values
+# until the pinned host was 20% worse than the fastest.
+dynamic_snitch_badness_threshold: 0.1
+
+# request_scheduler -- Set this to a class that implements
+# RequestScheduler, which will schedule incoming client requests
+# according to the specific policy. This is useful for multi-tenancy
+# with a single Cassandra cluster.
+# NOTE: This is specifically for requests from the client and does
+# not affect inter node communication.
+# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
+# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
+# client requests to a node with a separate queue for each
+# request_scheduler_id. The scheduler is further customized by
+# request_scheduler_options as described below.
+request_scheduler: org.apache.cassandra.scheduler.NoScheduler
+
+# Scheduler Options vary based on the type of scheduler
+# NoScheduler - Has no options
+# RoundRobin
+#  - throttle_limit -- The throttle_limit is the number of in-flight
+#                      requests per client.  Requests beyond 
+#                      that limit are queued up until
+#                      running requests can complete.
+#                      The value of 80 here is twice the number of
+#                      concurrent_reads + concurrent_writes.
+#  - default_weight -- default_weight is optional and allows for
+#                      overriding the default which is 1.
+#  - weights -- Weights are optional and will default to 1 or the
+#               overridden default_weight. The weight translates into how
+#               many requests are handled during each turn of the
+#               RoundRobin, based on the scheduler id.
+#
+# request_scheduler_options:
+#    throttle_limit: 80
+#    default_weight: 5
+#    weights:
+#      Keyspace1: 1
+#      Keyspace2: 5
+
+# request_scheduler_id -- An identifer based on which to perform
+# the request scheduling. Currently the only valid option is keyspace.
+# request_scheduler_id: keyspace
+
+# The Index Interval determines how large the sampling of row keys
+#  is for a given SSTable. The larger the sampling, the more effective
+#  the index is at the cost of space.
+index_interval: 128
+
+# Keyspaces have ColumnFamilies.        (Usually 1 KS per application.)
+# ColumnFamilies have Rows.             (Dozens of CFs per KS.)
+# Rows contain Columns.                 (Many per CF.)
+# Columns contain name:value:timestamp. (Many per Row.)
+#
+# A KS is most similar to a schema, and a CF is most similar to a relational table.
+#
+# Keyspaces, ColumnFamilies, and Columns may carry additional
+# metadata that change their behavior. These are as follows:
+#
+# Keyspace required parameters:
+# - name: name of the keyspace; "system" is
+#   reserved for Cassandra Internals.
+# - replica_placement_strategy: the class that determines how replicas
+#   are distributed among nodes. Contains both the class as well as
+#   configuration information.  Must extend AbstractReplicationStrategy.
+#   Out of the box, Cassandra provides 
+#     * org.apache.cassandra.locator.SimpleStrategy 
+#     * org.apache.cassandra.locator.NetworkTopologyStrategy
+#     * org.apache.cassandra.locator.OldNetworkTopologyStrategy
+#
+#   SimpleStrategy merely places the first
+#   replica at the node whose token is closest to the key (as determined
+#   by the Partitioner), and additional replicas on subsequent nodes
+#   along the ring in increasing Token order.
+# 
+#   With NetworkTopologyStrategy,
+#   for each datacenter, you can specify how many replicas you want
+#   on a per-keyspace basis.  Replicas are placed on different racks
+#   within each DC, if possible. This strategy also requires rack aware
+#   snitch, such as RackInferringSnitch or PropertyFileSnitch.
+#   An example:
+#    - name: Keyspace1
+#      replica_placement_strategy: org.apache.cassandra.locator.NetworkTopologyStrategy
+#      strategy_options:
+#        DC1 : 3
+#        DC2 : 2
+#        DC3 : 1
+# 
+#   OldNetworkToplogyStrategy [formerly RackAwareStrategy] 
+#   places one replica in each of two datacenters, and the third on a
+#   different rack in in the first.  Additional datacenters are not
+#   guaranteed to get a replica.  Additional replicas after three are placed
+#   in ring order after the third without regard to rack or datacenter.
+# - replication_factor: Number of replicas of each row
+# Keyspace optional paramaters:
+# - strategy_options: Additional information for the replication strategy.
+# - column_families:
+#     ColumnFamily required parameters:
+#     - name: name of the ColumnFamily.  Must not contain the character "-".
+#     - compare_with: tells Cassandra how to sort the columns for slicing
+#       operations. The default is BytesType, which is a straightforward
+#       lexical comparison of the bytes in each column.  Other options are
+#       AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, LongType,
+#       and IntegerType (a generic variable-length integer type).
+#       You can also specify the fully-qualified class name to a class of
+#       your choice extending org.apache.cassandra.db.marshal.AbstractType.
+#    
+#     ColumnFamily optional parameters:
+#     - keys_cached: specifies the number of keys per sstable whose
+#        locations we keep in memory in "mostly LRU" order.  (JUST the key
+#        locations, NOT any column values.) Specify a fraction (value less
+#        than 1) or an absolute number of keys to cache.  Defaults to 200000
+#        keys.
+#     - rows_cached: specifies the number of rows whose entire contents we
+#        cache in memory. Do not use this on ColumnFamilies with large rows,
+#        or ColumnFamilies with high write:read ratios. Specify a fraction
+#        (value less than 1) or an absolute number of rows to cache.
+#        Defaults to 0. (i.e. row caching is off by default)
+#     - comment: used to attach additional human-readable information about 
+#        the column family to its definition.
+#     - read_repair_chance: specifies the probability with which read
+#        repairs should be invoked on non-quorum reads.  must be between 0
+#        and 1. defaults to 1.0 (always read repair).
+#     - gc_grace_seconds: specifies the time to wait before garbage
+#        collecting tombstones (deletion markers). defaults to 864000 (10
+#        days). See http://wiki.apache.org/cassandra/DistributedDeletes
+#     - default_validation_class: specifies a validator class to use for
+#        validating all the column values in the CF.
+#     NOTE:
+#     min_ must be less than max_compaction_threshold!
+#     - min_compaction_threshold: the minimum number of SSTables needed
+#        to start a minor compaction.  increasing this will cause minor
+#        compactions to start less frequently and be more intensive. setting
+#        this to 0 disables minor compactions.  defaults to 4.
+#     - max_compaction_threshold: the maximum number of SSTables allowed
+#        before a minor compaction is forced.  decreasing this will cause
+#        minor compactions to start more frequently and be less intensive.
+#        setting this to 0 disables minor compactions.  defaults to 32.
+#     /NOTE
+#     - row_cache_save_period_in_seconds: number of seconds between saving
+#        row caches.  The row caches can be saved periodically and if one
+#        exists on startup it will be loaded.
+#     - key_cache_save_period_in_seconds: number of seconds between saving
+#        key caches.  The key caches can be saved periodically and if one 
+#        exists on startup it will be loaded.
+#     - memtable_flush_after_mins: The maximum time to leave a dirty table
+#        unflushed.  This should be large enough that it won't cause a flush
+#        storm of all memtables during periods of inactivity.
+#     - memtable_throughput_in_mb: The maximum size of the memtable before
+#        it is flushed.  If undefined, 1/8 * heapsize will be used.
+#     - memtable_operations_in_millions: Number of operations in millions
+#        before the memtable is flushed. If undefined, throughput / 64 * 0.3
+#        will be used.
+#     - column_metadata:
+#         Column required parameters:
+#         - name: binds a validator (and optionally an indexer) to columns 
+#            with this name in any row of the enclosing column family.
+#         - validator: like cf.compare_with, an AbstractType that checks
+#            that the value of the column is well-defined.
+#         Column optional parameters:
+#         NOTE:
+#         index_name cannot be set if index_type is not also set!
+#         - index_name: User-friendly name for the index.
+#         - index_type: The type of index to be created. Currently only
+#            KEYS is supported.
+#         /NOTE
+# 
+# NOTE:
+#   this keyspace definition is for demonstration purposes only.
+#   Cassandra will not load these definitions during startup. See
+#   http://wiki.apache.org/cassandra/FAQ#no_keyspaces for an explanation.
+# /NOTE
+keyspaces:
+    - name: reddit
+      replica_placement_strategy: org.apache.cassandra.locator.RackUnawareStrategy
+      replication_factor: 1
+      column_families:
+      - column_type: Standard
+        compare_with: BytesType
+        name: permacache
+        row_cache_save_period_in_seconds: 3600
+        rows_cached: 100000
+      - column_type: Standard
+        compare_with: BytesType
+        name: urls
+        row_cache_save_period_in_seconds: 3600
+        rows_cached: 100000
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: LinkVote
+        rows_cached: 100000
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: CommentVote
+        rows_cached: 100000
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: Friend
+        rows_cached: 10000
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: Save
+        rows_cached: 100000
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: Hide
+        rows_cached: 100000
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: Click
+        rows_cached: 100000
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: VotesByLink
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: VotesByDay
+      - column_type: Standard
+        name: FriendsByAccount
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: SavesByAccount
+      - column_type: Standard
+        compare_with: UTF8Type
+        name: CommentSortsCache
+        row_cache_save_period_in_seconds: 3600
+        rows_cached: 200000