21 Jul 2010 merge

* Use Flaptor's Indextank product for search, including santip's patch
* for their new API
* Add Cassandra ConsistencyLevels to the ini file, and storage-conf.xml
* to the public repo
* Patch contributed by umbrae in ticket #929: Add jumpToContent support
* for Keyboard Accessibility
* reddit gold
    - paypal/postcard support
    - friends with benefits
    - profile-page sorting for gold members
    - move domain listings into the permacache
This commit is contained in:
ketralnis
2010-07-21 17:48:12 -07:00
parent 52da322156
commit 0ae8f2fb96
70 changed files with 2426 additions and 2551 deletions

View File

@@ -0,0 +1,216 @@
<Storage>
<!--======================================================================-->
<!-- Basic Configuration -->
<!--======================================================================-->
<ClusterName>reddit</ClusterName>
<AutoBootstrap>false</AutoBootstrap>
<HintedHandoffEnabled>true</HintedHandoffEnabled>
<Keyspaces>
<Keyspace Name="permacache">
<ColumnFamily CompareWith="BytesType" Name="permacache" RowsCached="3000000" />
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>
<Keyspace Name="urls">
<ColumnFamily CompareWith="UTF8Type" Name="urls" />
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>
<Keyspace Name="reddit">
<!-- Relations -->
<ColumnFamily CompareWith="UTF8Type" Name="LinkVote" />
<ColumnFamily CompareWith="UTF8Type" Name="CommentVote" />
<!-- Views -->
<ColumnFamily CompareWith="UTF8Type" Name="VotesByLink" />
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>
</Keyspaces>
<Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
<InitialToken></InitialToken>
<CommitLogDirectory>/cassandra/commitlog</CommitLogDirectory>
<DataFileDirectories>
<DataFileDirectory>/cassandra/data</DataFileDirectory>
</DataFileDirectories>
<Seeds>
<Seed>pmc01</Seed>
<Seed>pmc02</Seed>
<Seed>pmc03</Seed>
<Seed>pmc06</Seed>
<Seed>pmc07</Seed>
<Seed>pmc08</Seed>
</Seeds>
<!-- Miscellaneous -->
<!-- Time to wait for a reply from other nodes before failing the command -->
<RpcTimeoutInMillis>30000</RpcTimeoutInMillis>
<!-- phi value that must be reached before a host is marked as down.
most users should never adjust this -->
<PhiConvictThreshold>10</PhiConvictThreshold>
<!-- Size to allow commitlog to grow to before creating a new segment -->
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
<!-- Local hosts and ports -->
<ListenAddress></ListenAddress>
<!-- internal communications port -->
<StoragePort>7000</StoragePort>
<ThriftAddress></ThriftAddress>
<!-- Thrift RPC port (the port clients connect to). -->
<ThriftPort>9160</ThriftPort>
<ThriftFramedTransport>false</ThriftFramedTransport>
<!--======================================================================-->
<!-- Memory, Disk, and Performance -->
<!--======================================================================-->
<!--
~ Access mode. mmapped i/o is substantially faster, but only practical on
~ a 64bit machine (which notably does not include EC2 "small" instances)
~ or relatively small datasets. "auto", the safe choice, will enable
~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
~ (which may allow you to get part of the benefits of mmap on a 32bit
~ machine by mmapping only index files) and "standard".
~ (The buffer size settings that follow only apply to standard,
~ non-mmapped i/o.)
-->
<DiskAccessMode>mmap_index_only</DiskAccessMode>
<!--
~ Size of compacted row above which to log a warning. (If compacted
~ rows do not fit in memory, Cassandra will crash. This is explained
~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
~ scheduled to be fixed in 0.7.)
-->
<RowWarningThresholdInMB>512</RowWarningThresholdInMB>
<!--
~ Buffer size to use when performing contiguous column slices. Increase
~ this to the size of the column slices you typically perform.
~ (Name-based queries are performed with a buffer size of
~ ColumnIndexSizeInKB.)
-->
<SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
<!--
~ Buffer size to use when flushing memtables to disk. (Only one
~ memtable is ever flushed at a time.) Increase (decrease) the index
~ buffer size relative to the data buffer if you have few (many)
~ columns per key. Bigger is only better _if_ your memtables get large
~ enough to use the space. (Check in your data directory after your
~ app has been running long enough.) -->
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
<!--
~ Add column indexes to a row after its contents reach this size.
~ Increase if your column values are large, or if you have a very large
~ number of columns. The competing causes are, Cassandra has to
~ deserialize this much of the row to read a single column, so you want
~ it to be small - at least if you do many partial-row reads - but all
~ the index data is read for each access, so you don't want to generate
~ that wastefully either.
-->
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
<!--
~ Flush memtable after this much data has been inserted, including
~ overwritten data. There is one memtable per column family, and
~ this threshold is based solely on the amount of data stored, not
~ actual heap memory usage (there is some overhead in indexing the
~ columns).
-->
<MemtableThroughputInMB>64</MemtableThroughputInMB>
<!--
~ Throughput setting for Binary Memtables. Typically these are
~ used for bulk load so you want them to be larger.
-->
<BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
<!--
~ The maximum number of columns in millions to store in memory per
~ ColumnFamily before flushing to disk. This is also a per-memtable
~ setting. Use with MemtableThroughputInMB to tune memory usage.
-->
<MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
<!--
~ The maximum time to leave a dirty memtable unflushed.
~ (While any affected columnfamilies have unflushed data from a
~ commit log segment, that segment cannot be deleted.)
~ This needs to be large enough that it won't cause a flush storm
~ of all your memtables flushing at once because none has hit
~ the size or count thresholds yet. For production, a larger
~ value such as 1440 is recommended.
-->
<MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
<!--
~ Unlike most systems, in Cassandra writes are faster than reads, so
~ you can afford more of those in parallel. A good rule of thumb is 2
~ concurrent reads per processor core. Increase ConcurrentWrites to
~ the number of clients writing at once if you enable CommitLogSync +
~ CommitLogSyncDelay. -->
<ConcurrentReads>8</ConcurrentReads>
<ConcurrentWrites>32</ConcurrentWrites>
<!--
~ CommitLogSync may be either "periodic" or "batch." When in batch
~ mode, Cassandra won't ack writes until the commit log has been
~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
~ milliseconds for other writes, before performing the sync.
~ This is less necessary in Cassandra than in traditional databases
~ since replication reduces the odds of losing data from a failure
~ after writing the log entry but before it actually reaches the disk.
~ So the other option is "periodic," where writes may be acked immediately
~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
~ milliseconds.
-->
<CommitLogSync>periodic</CommitLogSync>
<!--
~ Interval at which to perform syncs of the CommitLog in periodic mode.
~ Usually the default of 10000ms is fine; increase it if your i/o
~ load is such that syncs are taking excessively long times.
-->
<CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
<!--
~ Delay (in milliseconds) during which additional commit log entries
~ may be written before fsync in batch mode. This will increase
~ latency slightly, but can vastly improve throughput where there are
~ many writers. Set to zero to disable (each entry will be synced
~ individually). Reasonable values range from a minimal 0.1 to 10 or
~ even more if throughput matters more than latency.
-->
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
<!--
~ Time to wait before garbage-collection deletion markers. Set this to
~ a large enough value that you are confident that the deletion marker
~ will be propagated to all replicas by the time this many seconds has
~ elapsed, even in the face of hardware failures. The default value is
~ ten days.
-->
<GCGraceSeconds>864000</GCGraceSeconds>
</Storage>

View File

@@ -59,6 +59,11 @@ def draw_load(row_size = 12, width = 200, out_file = "/tmp/load.png"):
draw_box(" %s load: %s" % (host.host, host.load()),
get_load_level(host))
draw_box(" ==== MEDIA ==== ", "#BBBBBB", center = True)
for host in hosts:
if host.host.startswith('media'):
draw_box(" %s load: %s" % (host.host, host.load()),
get_load_level(host))
draw_box(" ==== SEARCH ==== ", "#BBBBBB", center = True)
for host in hosts:
if host.host.startswith('search'):

View File

@@ -92,6 +92,9 @@ servicecaches = 127.0.0.1:11211
permacache_memcaches = 127.0.0.1:11211
# cassandra hosts. one of these will be chosen at random by pycassa
cassandra_seeds = 127.0.0.1:9160
# read/write consistency levels for Cassandra
cassandra_rcl = ONE
cassandra_wcl = QUORUM
# -- url cache options --
url_caches = 127.0.0.1:11211
@@ -285,6 +288,8 @@ MIN_UP_KARMA = 1
MIN_RATE_LIMIT_KARMA = 10
MIN_RATE_LIMIT_COMMENT_KARMA = 1
QUOTA_THRESHOLD = 5
# Links and comments older than this many days qualify for historic preservation
REPLY_AGE_LIMIT = 180
# min amount of karma to edit
WIKI_KARMA = 100
@@ -302,6 +307,8 @@ num_comments = 200
max_comments = 500
# list of reddits to auto-subscribe users to
automatic_reddits =
# special reddit that only reddit gold subscribers can use
lounge_reddit =
# cutoff number of reddits to show unsubscribed users
num_default_reddits = 10
# how deep do we go into the top listing when fetching /random
@@ -338,5 +345,5 @@ beaker.session_secret = somesecret
# WARNING: *THE LINE BELOW MUST BE UNCOMMENTED ON A PRODUCTION ENVIRONMENT*
# Debug mode will enable the interactive debugging tool, allowing ANYONE to
# execute malicious code after an exception is raised.
set debug = true
#set debug = false

View File

@@ -169,6 +169,9 @@ def make_map(global_conf={}, app_conf={}):
mc('/message/moderator/:subwhere', controller='message', action='listing',
where = 'moderator')
mc('/thanks', controller='forms', action="thanks", secret = '')
mc('/thanks/:secret', controller='forms', action="thanks")
mc('/password', controller='forms', action="password")
mc('/:action', controller='front',
requirements=dict(action="random|framebuster|selfserviceoatmeal"))
@@ -202,6 +205,7 @@ def make_map(global_conf={}, app_conf={}):
requirements=dict(action="options|over18|unlogged_options|optout|optin|login|reg"))
mc('/api/distinguish/:how', controller='api', action="distinguish")
mc('/api/ipn/:secret', controller='api', action='ipn')
mc('/api/:action/:url_user', controller='api',
requirements=dict(action="login|register"))
mc('/api/gadget/click/:ids', controller = 'api', action='gadget', type='click')

View File

@@ -30,7 +30,7 @@ from r2.models import *
from r2.models.subreddit import Default as DefaultSR
from r2.lib.utils import get_title, sanitize_url, timeuntil, set_last_modified
from r2.lib.utils import query_string, timefromnow
from r2.lib.utils import query_string, timefromnow, randstr
from r2.lib.utils import timeago, tup, filter_links
from r2.lib.pages import FriendList, ContributorList, ModList, \
BannedList, BoringPage, FormPage, CssError, UploadedImage, \
@@ -55,6 +55,8 @@ from r2.lib.filters import safemarkdown
from datetime import datetime, timedelta
from md5 import md5
import urllib
import urllib2
def reject_vote(thing):
voteword = request.params.get('dir')
@@ -244,8 +246,6 @@ class ApiController(RedditController):
banmsg = None
banmsg = None
if kind == 'link':
check_domain = True
@@ -339,6 +339,7 @@ class ApiController(RedditController):
#update the queries
queries.new_link(l)
changed(l)
if then == 'comments':
path = add_sr(l.make_permalink_slow())
@@ -507,6 +508,9 @@ class ApiController(RedditController):
fn = getattr(container, 'remove_' + type)
fn(victim)
if type == "friend" and c.user.gold:
c.user.friend_rels_cache(_update=True)
if type in ("moderator", "contributor"):
Subreddit.special_reddits(victim, type, _update=True)
@@ -518,9 +522,10 @@ class ApiController(RedditController):
friend = VExistingUname('name'),
container = VByName('container'),
type = VOneOf('type', ('friend', 'moderator',
'contributor', 'banned')))
def POST_friend(self, form, jquery, ip, friend,
container, type):
'contributor', 'banned')),
note = VLength('note', 300))
def POST_friend(self, form, jquery, ip, friend,
container, type, note):
"""
Complement to POST_unfriend: handles friending as well as
privilege changes on subreddits.
@@ -544,6 +549,13 @@ class ApiController(RedditController):
new = fn(friend)
if type == "friend" and c.user.gold:
# Yes, the order of the next two lines is correct.
# First you recalculate the rel_ids, then you find
# the right one and update its data.
c.user.friend_rels_cache(_update=True)
c.user.add_friend_note(friend, note or '')
if type in ("moderator", "contributor"):
Subreddit.special_reddits(friend, type, _update=True)
@@ -574,6 +586,13 @@ class ApiController(RedditController):
queries.new_message(item, inbox_rel)
@validatedForm(VGold(),
friend = VExistingUname('name'),
note = VLength('note', 300))
def POST_friendnote(self, form, jquery, friend, note):
c.user.add_friend_note(friend, note)
form.set_html('.status', _("saved"))
@validatedForm(VUser('curpass', default = ''),
VModhash(),
email = ValidEmails("email", num = 1),
@@ -762,7 +781,9 @@ class ApiController(RedditController):
if ((link.is_self and link.author_id == c.user._id)
or not sr.should_ratelimit(c.user, 'comment')):
should_ratelimit = False
parent_age = c.start_time - parent._date
if parent_age.days > g.REPLY_AGE_LIMIT:
c.errors.add(errors.TOO_OLD, field = "parent")
#remove the ratelimit error if the user's karma is high
if not should_ratelimit:
c.errors.remove((errors.RATELIMIT, 'ratelimit'))
@@ -774,7 +795,8 @@ class ApiController(RedditController):
errors.RATELIMIT) and
not commentform.has_errors("parent",
errors.DELETED_COMMENT,
errors.DELETED_LINK)):
errors.DELETED_LINK,
errors.TOO_OLD)):
if is_message:
to = Account._byID(parent.author_id)
@@ -950,9 +972,6 @@ class ApiController(RedditController):
set_last_modified(c.user, 'liked')
set_last_modified(c.user, 'disliked')
# flag search indexer that something has changed
changed(thing)
@validatedForm(VUser(),
VModhash(),
# nop is safe: handled after auth checks below
@@ -991,7 +1010,7 @@ class ApiController(RedditController):
c.site.stylesheet_hash = md5(stylesheet_contents_parsed).hexdigest()
set_last_modified(c.site,'stylesheet_contents')
changed(c.site)
c.site._commit()
form.set_html(".status", _('saved'))
@@ -1221,6 +1240,7 @@ class ApiController(RedditController):
prefix = "create_reddit_")
queries.new_subreddit(sr)
changed(sr)
#editting an existing reddit
elif sr.is_moderator(c.user) or c.user_is_admin:
@@ -1268,7 +1288,7 @@ class ApiController(RedditController):
username = None
d = dict(username=username, q=q, sort=sort, t=t)
hex = md5(repr(d)).hexdigest()
key = "searchfeedback-%s-%s-%s" % (timestamp[:10], request.ip, hex)
key = "indextankfeedback-%s-%s-%s" % (timestamp[:10], request.ip, hex)
d['timestamp'] = timestamp
d['approval'] = approval
g.hardcache.set(key, d, time=86400 * 7)
@@ -1303,6 +1323,123 @@ class ApiController(RedditController):
jquery(".content").replace_things(w, True, True)
jquery(".content .link .rank").hide()
@noresponse(paypal_secret = VPrintable('secret', 50),
payment_status = VPrintable('payment_status', 20),
txn_id = VPrintable('txn_id', 20),
paying_id = VPrintable('payer_id', 50),
payer_email = VPrintable('payer_email', 250),
item_number = VPrintable('item_number', 20),
mc_currency = VPrintable('mc_currency', 20),
mc_gross = VFloat('mc_gross'))
def POST_ipn(self, paypal_secret, payment_status, txn_id,
paying_id, payer_email, item_number, mc_currency, mc_gross):
if paypal_secret != g.PAYPAL_SECRET:
log_text("invalid IPN secret",
"%s guessed the wrong IPN secret" % request.ip,
"warning")
raise ValueError
if request.POST:
parameters = request.POST.copy()
else:
parameters = request.GET.copy()
if payment_status is None:
payment_status = ''
psl = payment_status.lower()
if psl == '' and parameters['txn_type'] == 'subscr_signup':
return "Ok"
elif psl == '' and parameters['txn_type'] == 'subscr_cancel':
return "Ok"
elif parameters.get('txn_type', '') == 'send_money' and mc_gross < 3.95:
# Temporary block while the last of the "legacy" PWYW subscriptions
# roll in
for k, v in parameters.iteritems():
g.log.info("IPN: %r = %r" % (k, v))
return "Ok"
elif psl == 'completed':
pass
elif psl == 'refunded':
log_text("refund", "Just got notice of a refund.", "info")
# TODO: something useful when this happens -- and don't
# forget to verify first
return "Ok"
elif psl == 'pending':
log_text("pending",
"Just got notice of a Pending, whatever that is.", "info")
# TODO: something useful when this happens -- and don't
# forget to verify first
return "Ok"
else:
for k, v in parameters.iteritems():
g.log.info("IPN: %r = %r" % (k, v))
raise ValueError("Unknown IPN status: %r" % payment_status)
if mc_currency != 'USD':
raise ValueError("Somehow got non-USD IPN %r" % mc_currency)
if g.cache.get("ipn-debug"):
g.cache.delete("ipn-debug")
for k, v in parameters.iteritems():
g.log.info("IPN: %r = %r" % (k, v))
parameters['cmd']='_notify-validate'
try:
safer = dict([k, v.encode('utf-8')] for k, v in parameters.items())
params = urllib.urlencode(safer)
except UnicodeEncodeError:
g.log.error("problem urlencoding %r" % (parameters,))
raise
req = urllib2.Request(g.PAYPAL_URL, params)
req.add_header("Content-type", "application/x-www-form-urlencoded")
response = urllib2.urlopen(req)
status = response.read()
# TODO: stop not doing this
# if status != "VERIFIED":
# raise ValueError("Invalid IPN response: %r" % status)
pennies = int(mc_gross * 100)
if item_number and item_number == 'rgsub':
if pennies == 2999:
secret_prefix = "ys_"
elif pennies == 399:
secret_prefix = "m_"
else:
log_text("weird IPN subscription",
"Got %d pennies via PayPal?" % pennies, "error")
secret_prefix = "w_"
else:
secret_prefix = "o_"
gold_secret = secret_prefix + randstr(10)
create_unclaimed_gold("P" + txn_id, payer_email, paying_id,
pennies, gold_secret, c.start_time)
url = "http://www.reddit.com/thanks/" + gold_secret
# No point in i18n, since we don't have access to the user's
# language info (or name) at this point
body = """
Thanks for subscribing to reddit gold! We have received your PayPal
transaction, number %s.
Your secret subscription code is %s. You can use it to associate this
subscription with your reddit account -- just visit
%s
""" % (txn_id, gold_secret, url)
emailer.gold_email(body, payer_email, "reddit gold subscriptions")
g.log.info("Just got IPN for %d, secret=%s" % (pennies, gold_secret))
return "Ok"
@noresponse(VUser(),
VModhash(),
thing = VByName('id'))
@@ -1496,6 +1633,60 @@ class ApiController(RedditController):
return self.redirect("/static/css_submit.png")
@validatedForm(VUser(),
code = VPrintable("code", 30),
postcard_okay = VOneOf("postcard", ("yes", "no")),)
def POST_claimgold(self, form, jquery, code, postcard_okay):
if not code:
c.errors.add(errors.NO_TEXT, field = "code")
form.has_errors("code", errors.NO_TEXT)
return
if code.startswith("pc_"):
gold_type = 'postcard'
if postcard_okay is None:
jquery(".postcard").show()
form.set_html(".status", _("just one more question"))
return
else:
d = dict(user=c.user.name, okay=postcard_okay)
g.hardcache.set("postcard-" + code, d, 86400 * 30)
elif code.startswith("ys_"):
gold_type = 'yearly special'
elif code.startswith("m_"):
gold_type = 'monthly'
else:
gold_type = 'old'
pennies = claim_gold(code, c.user._id)
if pennies is None:
c.errors.add(errors.INVALID_CODE, field = "code")
log_text ("invalid gold claim",
"%s just tried to claim %s" % (c.user.name, code),
"info")
elif pennies == 0:
c.errors.add(errors.CLAIMED_CODE, field = "code")
log_text ("invalid gold reclaim",
"%s just tried to reclaim %s" % (c.user.name, code),
"info")
elif pennies > 0:
log_text ("valid gold claim",
"%s just claimed %s" % (c.user.name, code),
"info")
g.cache.set("recent-gold-" + c.user.name, True, 600)
c.user.creddits += pennies
c.user.gold_type = gold_type
admintools.engolden(c.user, postcard_okay)
form.set_html(".status", _("claimed!"))
jquery(".lounge").show()
else:
raise ValueError("pennies = %r?" % pennies)
# Activate any errors we just manually set
form.has_errors("code", errors.INVALID_CODE, errors.CLAIMED_CODE,
errors.NO_TEXT)
@validatedForm(user = VUserWithEmail('name'))
def POST_password(self, form, jquery, user):
if form.has_errors('name', errors.USER_DOESNT_EXIST):
@@ -1568,7 +1759,8 @@ class ApiController(RedditController):
if action != 'sub' or sr.can_comment(c.user):
self._subscribe(sr, action == 'sub')
def _subscribe(self, sr, sub):
@classmethod
def _subscribe(cls, sr, sub):
try:
Subreddit.subscribe_defaults(c.user)
@@ -1578,7 +1770,7 @@ class ApiController(RedditController):
else:
if sr.remove_subscriber(c.user):
sr._incr('_ups', -1)
changed(sr)
changed(sr, True)
except CreationError:
# This only seems to happen when someone is pounding on the
# subscribe button or the DBs are really lagged; either way,

View File

@@ -76,9 +76,11 @@ error_list = dict((
('BAD_CARD', _('card problem: %(message)s')),
('TOO_LONG', _("this is too long (max: %(max_length)s)")),
('NO_TEXT', _('we need something here')),
('INVALID_CODE', _("we've never seen that code before")),
('CLAIMED_CODE', _("that code has already been claimed -- perhaps by you?")),
('NO_SELFS', _("that reddit doesn't allow text posts")),
('NO_LINKS', _("that reddit only allows text posts")),
('TOO_OLD', _("that's a piece of history now; it's too late to reply to it")),
))
errors = Storage([(e, e) for e in error_list.keys()])
@@ -136,3 +138,4 @@ class ErrorSet(object):
class UserRequiredException(Exception): pass
class VerifiedUserRequiredException(Exception): pass
class GoldRequiredException(Exception): pass

View File

@@ -36,7 +36,8 @@ from r2.lib.emailer import has_opted_out, Email
from r2.lib.db.operators import desc
from r2.lib.db import queries
from r2.lib.strings import strings
from r2.lib.solrsearch import RelatedSearchQuery, SubredditSearchQuery, LinkSearchQuery
from r2.lib.solrsearch import RelatedSearchQuery, SubredditSearchQuery
from r2.lib.indextank import IndextankQuery
from r2.lib.contrib.pysolr import SolrError
from r2.lib import jsontemplates
from r2.lib import sup
@@ -196,7 +197,11 @@ class FrontController(RedditController):
# insert reply box only for logged in user
if c.user_is_loggedin and can_comment_link(article) and not is_api():
#no comment box for permalinks
display = not bool(comment)
display = False
if not comment:
age = c.start_time - article._date
if age.days < g.REPLY_AGE_LIMIT:
display = True
displayPane.append(UserText(item = article, creating = True,
post_form = 'comment',
display = display,
@@ -463,33 +468,15 @@ class FrontController(RedditController):
verify_langs_regex = re.compile(r"^[a-z][a-z](,[a-z][a-z])*$")
@base_listing
@validate(query = nop('q'),
time = VMenu('action', TimeMenu),
sort = VMenu('sort', SearchSortMenu),
langs = nop('langs'))
def GET_search(self, query, num, time, reverse, after, count, langs, sort):
sort = VMenu('sort', SearchSortMenu, remember=False))
def GET_search(self, query, num, reverse, after, count, sort):
"""Search links page."""
if query and '.' in query:
url = sanitize_url(query, require_scheme = True)
if url:
return self.redirect("/submit" + query_string({'url':url}))
if langs and self.verify_langs_regex.match(langs):
langs = langs.split(',')
else:
langs = c.content_langs
subreddits = None
authors = None
if c.site == subreddit.Friends and c.user_is_loggedin and c.user.friends:
authors = c.user.friends
elif isinstance(c.site, MultiReddit):
subreddits = c.site.sr_ids
elif not isinstance(c.site, FakeSubreddit):
subreddits = [c.site._id]
q = LinkSearchQuery(q = query, timerange = time, langs = langs,
subreddits = subreddits, authors = authors,
sort = SearchSortMenu.operator(sort))
q = IndextankQuery(query, c.site, sort)
num, t, spane = self._search(q, num = num, after = after, reverse = reverse,
count = count)
@@ -505,9 +492,8 @@ class FrontController(RedditController):
infotext = None
res = SearchPage(_('search results'), query, t, num, content=spane,
nav_menus = [TimeMenu(default = time),
SearchSortMenu(default=sort)],
search_params = dict(sort = sort, t = time),
nav_menus = [SearchSortMenu(default=sort)],
search_params = dict(sort = sort),
infotext = infotext).render()
return res
@@ -912,3 +898,9 @@ class FormsController(RedditController):
def GET_try_compact(self, dest):
c.render_style = "compact"
return TryCompact(dest = dest).render()
@validate(VUser(),
secret=VPrintable("secret", 50))
def GET_thanks(self, secret):
"""The page to claim reddit gold trophies"""
return BoringPage(_("thanks"), content=Thanks(secret)).render()

View File

@@ -25,7 +25,7 @@ from validator import *
from r2.models import *
from r2.lib.pages import *
from r2.lib.pages.things import wrap_links
from r2.lib.menus import NewMenu, TimeMenu, SortMenu, RecSortMenu
from r2.lib.menus import NewMenu, TimeMenu, SortMenu, RecSortMenu, ProfileSortMenu
from r2.lib.menus import ControversyTimeMenu
from r2.lib.rising import get_rising
from r2.lib.wrapped import Wrapped
@@ -36,6 +36,7 @@ from r2.lib.strings import Score
from r2.lib import organic
from r2.lib.jsontemplates import is_api
from r2.lib.solrsearch import SearchQuery
from r2.lib.indextank import IndextankQuery
from r2.lib.utils import iters, check_cheating, timeago
from r2.lib.utils.trial_utils import populate_spotlight
from r2.lib import sup
@@ -85,7 +86,7 @@ class ListingController(RedditController):
"""list of menus underneat the header (e.g., sort, time, kind,
etc) to be displayed on this listing page"""
return []
@base_listing
def build_listing(self, num, after, reverse, count):
"""uses the query() method to define the contents of the
@@ -123,7 +124,7 @@ class ListingController(RedditController):
builder_cls = self.builder_cls
elif isinstance(self.query_obj, Query):
builder_cls = QueryBuilder
elif isinstance(self.query_obj, SearchQuery):
elif isinstance(self.query_obj, (SearchQuery,IndextankQuery)):
builder_cls = SearchBuilder
elif isinstance(self.query_obj, iters):
builder_cls = IDBuilder
@@ -235,7 +236,8 @@ class HotController(FixListing, ListingController):
elif pos != 0:
pos = pos % len(spotlight_links)
spotlight_links, pos = promote.insert_promoted(spotlight_links, pos)
if c.user.pref_show_sponsors or not c.user.gold:
spotlight_links, pos = promote.insert_promoted(spotlight_links, pos)
trial = populate_spotlight()
# Need to do this again, because if there was a duplicate removed,
@@ -313,10 +315,16 @@ class HotController(FixListing, ListingController):
self.fix_listing = False
if c.site == Default:
if c.user_is_loggedin:
srlimit = Subreddit.sr_limit
over18 = c.user.has_subscribed and c.over18
else:
srlimit = g.num_default_reddits
over18 = False
sr_ids = Subreddit.user_subreddits(c.user,
limit=(Subreddit.sr_limit
if c.user_is_loggedin
else g.num_default_reddits))
limit=srlimit,
over18=over18)
return normalized_hot(sr_ids)
#if not using the query_cache we still want cached front pages
elif (not g.use_query_cache
@@ -385,10 +393,10 @@ class NewController(ListingController):
return keep
def query(self):
res = None
if self.sort == 'rising':
res = get_rising(c.site)
return res or c.site.get_links('new', 'all')
return get_rising(c.site)
else:
return c.site.get_links('new', 'all')
@validate(sort = VMenu('controller', NewMenu))
def GET_listing(self, sort, **env):
@@ -485,6 +493,16 @@ class UserController(ListingController):
render_cls = ProfilePage
show_nums = False
@property
def menus(self):
res = []
if (self.vuser.gold and
self.where in ('overview', 'submitted', 'comments')):
res.append(ProfileSortMenu(default = self.sort))
if self.sort not in ("hot", "new"):
res.append(TimeMenu(default = self.time))
return res
def title(self):
titles = {'overview': _("overview for %(user)s"),
'comments': _("comments by %(user)s"),
@@ -501,7 +519,12 @@ class UserController(ListingController):
def keep_fn(self):
# keep promotions off of profile pages.
def keep(item):
return (getattr(item, "promoted", None) is None and
wouldkeep = True
if item._deleted:
return False
if self.time != 'all':
wouldkeep = (item._date > utils.timeago('1 %s' % str(self.time)))
return wouldkeep and (getattr(item, "promoted", None) is None and
(self.where == "deleted" or
not getattr(item, "deleted", False)))
return keep
@@ -510,17 +533,17 @@ class UserController(ListingController):
q = None
if self.where == 'overview':
self.check_modified(self.vuser, 'overview')
q = queries.get_overview(self.vuser, 'new', 'all')
q = queries.get_overview(self.vuser, self.sort, self.time)
elif self.where == 'comments':
sup.set_sup_header(self.vuser, 'commented')
self.check_modified(self.vuser, 'commented')
q = queries.get_comments(self.vuser, 'new', 'all')
q = queries.get_comments(self.vuser, self.sort, self.time)
elif self.where == 'submitted':
sup.set_sup_header(self.vuser, 'submitted')
self.check_modified(self.vuser, 'submitted')
q = queries.get_submitted(self.vuser, 'new', 'all')
q = queries.get_submitted(self.vuser, self.sort, self.time)
elif self.where in ('liked', 'disliked'):
sup.set_sup_header(self.vuser, self.where)
@@ -541,14 +564,25 @@ class UserController(ListingController):
return q
@validate(vuser = VExistingUname('username'))
def GET_listing(self, where, vuser, **env):
@validate(vuser = VExistingUname('username'),
sort = VMenu('t', ProfileSortMenu),
time = VMenu('t', TimeMenu))
def GET_listing(self, where, vuser, sort, time, **env):
self.where = where
self.sort = sort
self.time = time
# the validator will ensure that vuser is a valid account
if not vuser:
return self.abort404()
if not vuser.gold:
self.sort = 'new'
self.time = 'all'
if self.sort in ('hot', 'new'):
self.time = 'all'
# hide spammers profile pages
if (not c.user_is_loggedin or
(c.user._id != vuser._id and not c.user_is_admin)) \

View File

@@ -94,6 +94,8 @@ class PostController(ApiController):
pref_threaded_messages = VBoolean("threaded_messages"),
pref_collapse_read_messages = VBoolean("collapse_read_messages"),
pref_private_feeds = VBoolean("private_feeds"),
pref_show_adbox = VBoolean("show_adbox"),
pref_show_sponsors = VBoolean("show_sponsors"),
all_langs = nop('all-langs', default = 'all'))
def POST_options(self, all_langs, pref_lang, **kw):
#temporary. eventually we'll change pref_clickgadget to an
@@ -110,6 +112,10 @@ class PostController(ApiController):
if kw.get("pref_no_profanity") or c.user.pref_no_profanity:
kw['pref_label_nsfw'] = True
if not c.user.gold:
kw['pref_show_adbox'] = True
kw['pref_show_sponsors'] = True
self.set_options(all_langs, pref_lang, **kw)
u = UrlParser(c.site.path + "prefs")
u.update_query(done = 'true')

View File

@@ -35,6 +35,7 @@ from r2.lib.authorize import Address, CreditCard
from r2.controllers.errors import errors, UserRequiredException
from r2.controllers.errors import VerifiedUserRequiredException
from r2.controllers.errors import GoldRequiredException
from copy import copy
from datetime import datetime, timedelta
@@ -565,6 +566,12 @@ class VVerifiedUser(VUser):
if not c.user.email_verified:
raise VerifiedUserRequiredException
class VGold(VUser):
def run(self):
VUser.run(self)
if not c.user.gold:
raise GoldRequiredException
class VSponsorAdmin(VVerifiedUser):
"""
Validator which checks c.user_is_sponsor
@@ -1028,8 +1035,6 @@ class VRatelimit(Validator):
to_set = {}
if seconds is None:
seconds = g.RATELIMIT*60
if not seconds:
return
expire_time = datetime.now(g.tz) + timedelta(seconds = seconds)
if rate_user and c.user_is_loggedin:
to_set['user' + str(c.user._id36)] = expire_time

View File

@@ -287,6 +287,54 @@ def black_hole(queue):
consume_items(queue, _ignore)
def dedup_queue(queue, rk = None, limit=None,
delivery_mode = DELIVERY_DURABLE):
"""Hackily try to reduce the size of a queue by removing duplicate
messages. The consumers of the target queue must consider
identical messages to be idempotent. Preserves only message
bodies"""
chan = connection_manager.get_channel()
if rk is None:
rk = queue
bodies = set()
while True:
msg = chan.basic_get(queue)
if msg is None:
break
if msg.body not in bodies:
bodies.add(msg.body)
if limit is None:
limit = msg.delivery_info.get('message_count')
if limit is None:
default_max = 100*1000
print ("Message count was unavailable, defaulting to %d"
% (default_max,))
limit = default_max
else:
print "Grabbing %d messages" % (limit,)
else:
limit -= 1
if limit <= 0:
break
elif limit % 1000 == 0:
print limit
print "Grabbed %d unique bodies" % (len(bodies),)
if bodies:
for body in bodies:
_add_item(rk, body, delivery_mode = delivery_mode)
worker.join()
chan.basic_ack(0, multiple=True)
def _test_setup(test_q = 'test_q'):
from r2.lib.queues import RedditQueueMap

View File

@@ -27,7 +27,7 @@ import pycassa
from r2.lib.cache import LocalCache, SelfEmptyingCache
from r2.lib.cache import CMemcache
from r2.lib.cache import HardCache, MemcacheChain, MemcacheChain, HardcacheChain
from r2.lib.cache import CassandraCache, CassandraCacheChain, CacheChain, CL_ONE, CL_QUORUM
from r2.lib.cache import CassandraCache, CassandraCacheChain, CacheChain, CL_ONE, CL_QUORUM, CL_ZERO
from r2.lib.db.stats import QueryStats
from r2.lib.translation import get_active_langs
from r2.lib.lock import make_lock_factory
@@ -43,6 +43,7 @@ class Globals(object):
'MIN_DOWN_KARMA',
'MIN_RATE_LIMIT_KARMA',
'MIN_RATE_LIMIT_COMMENT_KARMA',
'REPLY_AGE_LIMIT',
'WIKI_KARMA',
'HOT_PAGE_AGE',
'MODWINDOW',
@@ -93,6 +94,15 @@ class Globals(object):
'allowed_css_linked_domains',
'authorized_cnames']
choice_props = {'cassandra_rcl': {'ZERO': CL_ZERO,
'ONE': CL_ONE,
'QUORUM': CL_QUORUM},
'cassandra_wcl': {'ZERO': CL_ZERO,
'ONE': CL_ONE,
'QUORUM': CL_QUORUM},
}
def __init__(self, global_conf, app_conf, paths, **extra):
"""
Globals acts as a container for objects available throughout
@@ -130,6 +140,11 @@ class Globals(object):
v = self.to_bool(v)
elif k in self.tuple_props:
v = tuple(self.to_iter(v))
elif k in self.choice_props:
if v not in self.choice_props[k]:
raise ValueError("Unknown option for %r: %r not in %r"
% (k, v, self.choice_props[k]))
v = self.choice_props[k][v]
setattr(self, k, v)
self.running_as_script = global_conf.get('running_as_script', False)
@@ -149,6 +164,8 @@ class Globals(object):
if not self.cassandra_seeds:
raise ValueError("cassandra_seeds not set in the .ini")
if not self.url_seeds:
raise ValueError("url_seeds not set in the .ini")
self.cassandra_seeds = list(self.cassandra_seeds)
random.shuffle(self.cassandra_seeds)
self.cassandra = pycassa.connect_thread_local(self.cassandra_seeds)
@@ -159,6 +176,8 @@ class Globals(object):
self.cassandra,
self.make_lock,
memcache = perma_memcache,
read_consistency_level = self.cassandra_rcl,
write_consistency_level = self.cassandra_wcl,
localcache_cls = localcache_cls)
self.cache_chains.append(self.permacache)
@@ -168,6 +187,11 @@ class Globals(object):
self.urlcache = self.init_cass_cache('urls', 'urls',
self.url_cassandra,
self.make_lock,
# until we've merged this
# with the regular
# cluster, this will
# always be CL_ONE
read_consistency_level = CL_ONE,
write_consistency_level = CL_ONE,
localcache_cls = localcache_cls)
self.cache_chains.append(self.urlcache)
@@ -309,6 +333,11 @@ class Globals(object):
if self.write_query_queue and not self.amqp_host:
raise Exception("amqp_host must be defined to use the query queue")
# This requirement doesn't *have* to be a requirement, but there are
# bugs at the moment that will pop up if you violate it
if self.write_query_queue and not self.use_query_cache:
raise Exception("write_query_queue requires use_query_cache")
# try to set the source control revision number
try:
popen = subprocess.Popen(["git", "log", "--date=short",
@@ -331,7 +360,7 @@ class Globals(object):
lock_factory,
memcache = None,
read_consistency_level = CL_ONE,
write_consistency_level = CL_QUORUM,
write_consistency_level = CL_ONE,
localcache_cls = LocalCache):
return CassandraCacheChain(localcache_cls(),
CassandraCache(keyspace, column_family,

View File

@@ -496,17 +496,20 @@ class CassandraCacheChain(CacheChain):
# chain, which means that changing the chain will probably
# require changing this function. (This has an edge-case
# where memcached was populated by a ONE read rather than
# a QUROUM one just before running this. We could avoid
# a QUORUM one just before running this. We could avoid
# this by not using memcached at all for these mutations,
# which would require some more row-cache performace
# testing)
rcl = wcl = self.cassa.write_consistency_level
if rcl == CL_ZERO:
rcl = CL_ONE
try:
value = None
if self.memcache:
value = self.memcache.get(key)
if value is None:
value = self.cassa.get(key,
read_consistency_level = CL_QUORUM)
read_consistency_level = rcl)
except cassandra.ttypes.NotFoundException:
value = default
@@ -521,13 +524,31 @@ class CassandraCacheChain(CacheChain):
if value != new_value:
self.cassa.set(key, new_value,
write_consistency_level = CL_QUORUM)
write_consistency_level = wcl)
for ca in self.caches[:-1]:
# and update the rest of the chain; assumes that
# Cassandra is always the last entry
ca.set(key, new_value)
return new_value
def bulk_load(self, start='', end='', chunk_size = 100):
"""Try to load everything out of Cassandra and put it into
memcached"""
cf = self.cassa.cf
for rows in in_chunks(cf.get_range(start=start,
finish=end,
columns=['value']),
chunk_size):
print rows[0][0]
rows = dict((key, pickle.loads(cols['value']))
for (key, cols)
in rows
if (cols
# hack
and len(key) < 250))
self.memcache.set_multi(rows)
class CassandraCache(CacheUtils):
"""A cache that uses a Cassandra cluster. Uses a single keyspace
and column family and only the column-name 'value'"""
@@ -537,6 +558,8 @@ class CassandraCache(CacheUtils):
self.keyspace = keyspace
self.column_family = column_family
self.client = client
self.read_consistency_level = read_consistency_level
self.write_consistency_level = write_consistency_level
self.cf = pycassa.ColumnFamily(self.client, self.keyspace,
self.column_family,
read_consistency_level = read_consistency_level,

View File

@@ -115,19 +115,22 @@ def add_comment_nolock(comment):
(cids, comment_tree, depth, num_children))
def update_comment_vote(comment):
link_id = comment.link_id
# update the list of sorts
with g.make_lock(lock_key(link_id)):
for sort in ("_controversy", "_hot", "_confidence", "_score"):
key = sort_comments_key(link_id, sort)
r = g.permacache.get(key)
# don't bother recomputing a non-existant sort dict, as
# we'll catch it next time we have to render something
if r:
r[comment._id] = _get_sort_value(comment, sort)
g.permacache.set(key, r)
def update_comment_votes(comments):
comments = tup(comments)
link_map = {}
for com in comments:
link_map.setdefault(com.link_id, []).append(com)
for link_id, coms in link_map.iteritems():
with g.make_lock(lock_key(link_id)):
for sort in ("_controversy", "_hot", "_confidence", "_score"):
key = sort_comments_key(link_id, sort)
r = g.permacache.get(key)
# don't bother recomputing a non-existant sort dict, as
# we'll catch it next time we have to render something
if r:
for comment in coms:
r[comment._id] = _get_sort_value(comment, sort)
g.permacache.set(key, r)
def delete_comment(comment):

View File

@@ -0,0 +1,266 @@
import json
import httplib
import urllib
import urlparse
import base64
import datetime
class ApiClient:
"""
Basic client for an account.
It needs an API url to be constructed.
It has methods to manage and access the indexes of the
account. The objects returned by these methods implement
the IndexClient class.
"""
def __init__(self, api_url):
self.__api_url = api_url.rstrip('/')
def get_index(self, index_name):
return IndexClient(self.__index_url(index_name))
def create_index(self, index_name):
index = self.get_index(index_name)
index.create_index()
return index
def delete_index(self, index_name):
self.get_index(index_name).delete_index()
def list_indexes(self):
_, indexes = _request('GET', self.__indexes_url())
return [IndexClient(k, v) for k, v in indexes.iteritems()]
""" Api urls """
def __indexes_url(self): return '%s/%s/indexes' % (self.__api_url, 'v1')
def __index_url(self, name): return '%s/%s' % (self.__indexes_url(), name)
class IndexClient:
"""
Client for a specific index.
It allows to inspect the status of the index.
It also provides methods for indexing and searching said index.
"""
def __init__(self, index_url, metadata=None):
self.__index_url = index_url
self.__metadata = metadata
def exists(self):
"""
Returns whether an index for the name of this instance
exists, if it doesn't it can be created by calling
self.create_index()
"""
try:
self.refresh_metadata()
return True
except HttpException, e:
if e.status == 404:
return False
else:
raise
def has_started(self):
"""
Returns whether this index is responsive. Newly created
indexes can take a little while to get started.
If this method returns False most methods in this class
will raise an HttpException with a status of 503.
"""
return self.refresh_metadata()['started']
def get_code(self):
return self.get_metadata()['code']
def get_creation_time(self):
"""
Returns a datetime of when this index was created
"""
return _isoparse(self.get_metadata()['creation_time'])
def create_index(self):
"""
Creates this index.
If it already existed a IndexAlreadyExists exception is raised.
If the account has reached the limit a TooManyIndexes exception is raised
"""
try:
status, _ = _request('PUT', self.__index_url)
if status == 204:
raise IndexAlreadyExists('An index for the given name already exists')
except HttpException, e:
if e.status == 409:
raise TooManyIndexes(e.msg)
raise e
def delete_index(self):
_request('DELETE', self.__index_url)
def add_document(self, docid, fields, variables=None):
"""
Indexes a document for the given docid and fields.
Arguments:
docid: unique document identifier
field: map with the document fields
variables (optional): map integer -> float with values for variables that can
later be used in scoring functions during searches.
"""
data = {'docid': docid, 'fields': fields}
if variables is not None:
data['variables'] = variables
_request('PUT', self.__docs_url(), data=data)
def delete_document(self, docid):
"""
Deletes the given docid from the index if it existed. otherwise, does nothing.
Arguments:
docid: unique document identifier
"""
_request('DELETE', self.__docs_url(), data={'docid': docid})
def update_variables(self, docid, variables):
"""
Updates the variables of the document for the given docid.
Arguments:
docid: unique document identifier
variables: map integer -> float with values for variables that can
later be used in scoring functions during searches.
"""
_request('PUT', self.__variables_url(), data={'docid': docid, 'variables': variables})
def promote(self, docid, query):
"""
Makes the given docid the top result of the given query.
Arguments:
docid: unique document identifier
query: the query for which to promote the document
"""
_request('PUT', self.__promote_url(), data={'docid': docid, 'query': query})
def add_function(self, function_index, definition):
try:
_request('PUT', self.__function_url(function_index), data={'definition': definition})
except HttpException, e:
if e.status == 400:
raise InvalidDefinition(e.msg)
def delete_function(self, function_index):
_request('DELETE', self.__function_url(function_index))
def list_functions(self):
_, functions = _request('GET', self.__functions_url())
return functions
def search(self, query, start=None, len=None, scoring_function=None, snippet_fields=None, fetch_fields=None):
params = { 'q': query }
if start is not None: params['start'] = start
if len is not None: params['len'] = len
if scoring_function is not None: params['function'] = scoring_function
if snippet_fields is not None: params['snippet'] = snippet_fields
if fetch_fields is not None: params['fetch'] = fetch_fields
try:
_, result = _request('GET', self.__search_url(), params=params)
return result
except HttpException, e:
if e.status == 400:
raise InvalidQuery(e.msg)
raise
""" metadata management """
def get_metadata(self):
if self.__metadata is None:
return self.refresh_metadata()
return self.__metadata
def refresh_metadata(self):
_, self.__metadata = _request('GET', self.__index_url)
return self.__metadata
""" Index urls """
def __docs_url(self): return '%s/docs' % (self.__index_url)
def __variables_url(self): return '%s/docs/variables' % (self.__index_url)
def __promote_url(self): return '%s/promote' % (self.__index_url)
def __search_url(self): return '%s/search' % (self.__index_url)
def __functions_url(self): return '%s/functions' % (self.__index_url)
def __function_url(self,n): return '%s/functions/%d' % (self.__index_url, n)
class InvalidResponseFromServer(Exception):
pass
class TooManyIndexes(Exception):
pass
class IndexAlreadyExists(Exception):
pass
class InvalidQuery(Exception):
pass
class InvalidDefinition(Exception):
pass
class Unauthorized(Exception):
pass
class HttpException(Exception):
def __init__(self, status, msg):
self.status = status
self.msg = msg
super(HttpException, self).__init__('HTTP %d: %s' % (status, msg))
__USER_AGENT = 'IndexTank.PythonClient.v1'
def _is_ok(status):
return status / 100 == 2
def _request(method, url, params={}, data={}, headers={}):
splits = urlparse.urlsplit(url)
hostname = splits.hostname
port = splits.port
username = splits.username
password = splits.password
# drop the auth from the url
netloc = splits.hostname + (':%s' % splits.port if splits.port else '')
url = urlparse.urlunsplit((splits.scheme, netloc, splits.path, splits.query, splits.fragment))
if method == 'GET':
params = urllib.urlencode(params)
if params:
if '?' not in url:
url += '?' + params
else:
url += '&' + params
connection = httplib.HTTPConnection(hostname, port)
if username or password:
credentials = "%s:%s" % (username, password)
base64_credentials = base64.encodestring(credentials)
authorization = "Basic %s" % base64_credentials[:-1]
headers['Authorization'] = authorization
if data:
body = json.dumps(data, ensure_ascii=True)
else:
body = ''
connection.request(method, url, body, headers)
response = connection.getresponse()
response.body = response.read()
if _is_ok(response.status):
if response.body:
try:
response.body = json.loads(response.body)
except ValueError, e:
raise InvalidResponseFromServer('The JSON response could not be parsed: %s.\n%s' % (e, response.body))
ret = response.status, response.body
else:
ret = response.status, None
elif response.status == 401:
raise Unauthorized('Authorization required. Use your private api_url.')
else:
raise HttpException(response.status, response.body)
connection.close()
return ret
def _isoparse(s):
try:
return datetime.datetime(int(s[0:4]),int(s[5:7]),int(s[8:10]), int(s[11:13]), int(s[14:16]), int(s[17:19]))
except:
return None

View File

@@ -97,6 +97,7 @@ class query_func(Slot): pass
class lower(query_func): pass
class ip_network(query_func): pass
class base_url(query_func): pass
class domain(query_func): pass
class timeago(object):
def __init__(self, interval):

View File

@@ -7,7 +7,7 @@ from r2.lib.utils import fetch_things2, tup, UniqueIterator, set_last_modified
from r2.lib import utils
from r2.lib.solrsearch import DomainSearchQuery
from r2.lib import amqp, sup
from r2.lib.comment_tree import add_comment, link_comments, update_comment_vote
from r2.lib.comment_tree import add_comment, link_comments, update_comment_votes
import cPickle as pickle
@@ -59,20 +59,6 @@ def filter_thing2(x):
the object of the relationship."""
return x._thing2
def make_batched_time_query(sr, sort, time, preflight_check = True):
q = get_links(sr, sort, time, merge_batched=False)
if (g.use_query_cache
and sort in batched_time_sorts
and time in batched_time_times):
if not preflight_check:
q.force_run = True
q.batched_time_srid = sr._id
return q
class CachedResults(object):
"""Given a query returns a list-like object that will lazily look up
the query from the persistent cache. """
@@ -271,13 +257,13 @@ def merge_results(*results):
return MergedCachedResults(results)
else:
assert all((results[0]._sort == r._sort
and results[0] == r.prewrap_fn)
and results[0].prewrap_fn == r.prewrap_fn)
for r in results)
m = Merge(results, sort = results[0]._sort)
m.prewrap_fn = results[0].prewrap_fn
return m
def get_links(sr, sort, time, merge_batched=True):
def get_links(sr, sort, time):
"""General link query for a subreddit."""
q = Link._query(Link.c.sr_id == sr._id,
sort = db_sort(sort),
@@ -400,9 +386,19 @@ def get_modqueue(sr):
return merge_results(*results)
def get_domain_links(domain, sort, time):
def get_domain_links_old(domain, sort, time):
return DomainSearchQuery(domain, sort=search_sort[sort], timerange=time)
def get_domain_links(domain, sort, time):
from r2.lib.db import operators
q = Link._query(operators.domain(Link.c.url) == domain,
sort = db_sort(sort),
data = True)
if time != "all":
q._filter(db_times[time])
return make_results(q)
def user_query(kind, user, sort, time):
"""General profile-page query."""
q = kind._query(kind.c.author_id == user._id,
@@ -554,11 +550,14 @@ def new_link(link):
# that
results.append(get_submitted(author, 'new', 'all'))
for domain in utils.UrlParser(link.url).domain_permutations():
results.append(get_domain_links(domain, 'new', "all"))
if link._spam:
results.append(get_spam_links(sr))
else:
add_queries(results, insert_items = link)
add_queries(results, insert_items = link)
amqp.add_item('new_link', link._fullname)
@@ -607,13 +606,27 @@ def new_vote(vote):
if vote.valid_thing and not item._spam and not item._deleted:
sr = item.subreddit_slow
results = []
author = Account._byID(item.author_id)
if author.gold:
for sort in ('hot', 'top', 'controversial', 'new'):
if isinstance(item, Link):
results.append(get_submitted(author, sort, 'all'))
if isinstance(item, Comment):
results.append(get_comments(author, sort, 'all'))
# don't do 'new', because that was done by new_link, and the
# time-filtered versions of top/controversial will be done by
# mr_top
results = [get_links(sr, 'hot', 'all'),
get_links(sr, 'top', 'all'),
get_links(sr, 'controversial', 'all'),
]
results.extend([get_links(sr, 'hot', 'all'),
get_links(sr, 'top', 'all'),
get_links(sr, 'controversial', 'all'),
])
for domain in utils.UrlParser(item.url).domain_permutations():
for sort in ("hot", "top", "controversial"):
results.append(get_domain_links(domain, sort, "all"))
add_queries(results, insert_items = item)
@@ -679,10 +692,14 @@ def new_savehide(rel):
elif name == 'unhide':
add_queries([get_hidden(user)], delete_items = rel)
def changed(things):
def changed(things, boost_only=False):
"""Indicate to search that a given item should be updated in the index"""
for thing in tup(things):
amqp.add_item('search_changes', thing._fullname,
msg = {'fullname': thing._fullname}
if boost_only:
msg['boost_only'] = True
amqp.add_item('search_changes', pickle.dumps(msg),
message_id = thing._fullname,
delivery_mode = amqp.DELIVERY_TRANSIENT)
@@ -974,7 +991,6 @@ def handle_vote(user, thing, dir, ip, organic, cheater = False):
elif isinstance(thing, Comment):
#update last modified
update_comment_vote(thing)
if user._id == thing.author_id:
set_last_modified(user, 'overview')
set_last_modified(user, 'commented')
@@ -982,24 +998,28 @@ def handle_vote(user, thing, dir, ip, organic, cheater = False):
sup.add_update(user, 'commented')
def process_votes(limit=None):
def process_votes(limit=1000):
# limit is taken but ignored for backwards compatibility
def _handle_vote(msgs, chan):
assert(len(msgs) == 1)
msg = msgs[0]
#assert(len(msgs) == 1)
comments = []
for msg in msgs:
r = pickle.loads(msg.body)
r = pickle.loads(msg.body)
uid, tid, dir, ip, organic, cheater = r
voter = Account._byID(uid, data=True)
votee = Thing._by_fullname(tid, data = True)
if isinstance(votee, Comment):
comments.append(votee)
uid, tid, dir, ip, organic, cheater = r
voter = Account._byID(uid, data=True)
votee = Thing._by_fullname(tid, data = True)
print (voter, votee, dir, ip, organic, cheater)
handle_vote(voter, votee, dir, ip, organic,
cheater = cheater)
print (voter, votee, dir, ip, organic, cheater)
handle_vote(voter, votee, dir, ip, organic,
cheater = cheater)
update_comment_votes(comments)
amqp.handle_items('register_vote_q', _handle_vote)
amqp.handle_items('register_vote_q', _handle_vote, limit = limit)
try:
from r2admin.lib.admin_queries import *

View File

@@ -37,6 +37,8 @@ thing_cache = g.thing_cache
keyspace = 'reddit'
disallow_db_writes = g.disallow_db_writes
tz = g.tz
read_consistency_level = g.cassandra_rcl
write_consistency_level = g.cassandra_wcl
# descriptions of the CFs available on boot.
boot_cfs = cassandra.describe_keyspace(keyspace)
@@ -111,8 +113,8 @@ class ThingMeta(type):
cls.cf = pycassa.ColumnFamily(cassandra, keyspace,
cf_name,
read_consistency_level = CL.ONE,
write_consistency_level = CL.QUORUM)
read_consistency_level = read_consistency_level,
write_consistency_level = write_consistency_level)
cls._kind = name

View File

@@ -59,8 +59,12 @@ def create_table(table, index_commands=None):
for i in index_commands:
t.bind.execute(i)
def index_str(table, name, on, where = None):
index_str = 'create index idx_%s_' % name
def index_str(table, name, on, where = None, unique = False):
if unique:
index_str = 'create unique index'
else:
index_str = 'create index'
index_str += ' idx_%s_' % name
index_str += table.name
index_str += ' on '+ table.name + ' (%s)' % on
if where:

View File

@@ -51,6 +51,13 @@ def _nerds_email(body, from_name, kind):
Email.handler.add_to_queue(None, g.nerds_email, from_name, g.nerds_email,
kind, body = body)
def _gold_email(body, to_address, from_name, kind):
"""
For sending email to reddit gold subscribers
"""
Email.handler.add_to_queue(None, to_address, from_name, g.goldthanks_email,
kind, body = body)
def verify_email(user, dest):
"""
For verifying an email address
@@ -72,7 +79,7 @@ def verify_email(user, dest):
def password_email(user):
"""
For reseting a user's password.
For resetting a user's password.
"""
from r2.lib.pages import PasswordReset
key = passhash(random.randint(0, 1000), user.email)
@@ -100,6 +107,9 @@ def i18n_email(email, body, name='', reply_to = ''):
return _feedback_email(email, body, Email.Kind.HELP_TRANSLATE, name = name,
reply_to = reply_to)
def gold_email(body, to_address, from_name=g.domain):
return _gold_email(body, to_address, from_name, Email.Kind.GOLDMAIL)
def nerds_email(body, from_name=g.domain):
"""Queues a feedback email to the nerds running this site."""
return _nerds_email(body, from_name, Email.Kind.NERDMAIL)

View File

@@ -1,108 +1,295 @@
import urllib
import json
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is Reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of the
# Original Code is CondeNet, Inc.
#
# All portions of the code written by CondeNet are Copyright (c) 2006-2010
# CondeNet, Inc. All Rights Reserved.
################################################################################
"""
Module for reddit-level communication with IndexTank
"""
#BASE_URL = 'http://api.indextank.com/api/v0'
BASE_URL = 'http://api.reddit.indextank.com/api/v0'
#BASE_URL = 'http://api.it-test.flaptor.com/api/v0'
from pylons import g, config
import cPickle as pickle
from time import sleep
from r2.models import *
from r2.lib import amqp
from r2.lib.contrib import indextank_clientv1
from r2.lib.utils import in_chunks, progress, get_after, UrlParser
from r2.lib.utils import domain, strordict_fullname
indextank_indexed_types = (Link,)
sorts = dict(relevance = 1,
new = 2,
top = 3)
index = indextank_clientv1.ApiClient(g.INDEXTANK_API_URL).get_index('main')
class IndexTank:
api_key=None
def_index_code=None
def __init__(self, api_key, index_code=None):
self.api_key = api_key
self.def_index_code = index_code
def __api_call(self, method, index_code=None, params={}):
base_params = {
'api_key': self.api_key,
'index_code': index_code or self.def_index_code,
}
base_params.update(params)
params = urllib.urlencode(base_params)
url = "%s/%s"%(BASE_URL,method)
res = urllib.urlopen(url,params)
data = res.read()
if 200 != res.getcode():
return False, 'HttpResponse code %d\nResponse content is:\n%s' % (res.getcode(), data)
try:
result = json.loads(data)
except ValueError,e:
return False, 'Error decoding json response.\nResponse content is:\n%s' % (data)
ok = result.get('status') == 'OK'
return ok, result
def create_index(self, index_name=''):
data = { 'index_name': index_name}
return self.__api_call("admin/create",params = data)
def delete_index(self, index_code=None):
return self.__api_call("admin/delete",index_code=index_code)
def list_indexes(self):
return self.__api_call("admin/list")
def add(self, doc_id, content, boosts=None, index_code=None):
'''
doc_id: unique document identifier
content: map with the document fields
boosts (optional): map integer -> float with values for available boosts
index_code (optional): index code if not specified in construction
'''
if boosts:
dumped_boosts = json.dumps(boosts)
data = { 'document': json.dumps(content), 'document_id': doc_id, 'boosts': dumped_boosts}
class Results(object):
__slots__ = ['docs', 'hits']
def __init__(self, docs, hits):
self.docs = docs
self.hits = hits
def __repr__(self):
return '%s(%r,%r)' % (self.__class__.__name__,
self.docs,
self.hits)
class IndextankQuery(object):
def __init__(self, query, sr, sort):
self.query, self.sr, self.sort = query, sr, sort
def __repr__(self):
return '%s(%r,%r,%r)' % (self.__class__.__name__,
self.query, self.sr, self.sort)
def run(self, after=None, reverse=False, num=1000, _update=False):
results = self._run(_update=_update)
docs, hits = results.docs, results.hits
after_docs = get_after(docs,
after, num, reverse=reverse)
return Results(after_docs, hits)
def _req_fs(self, sr_ids, field='sr_id'):
if len(sr_ids) == 1:
return '+%s:%d' % (field, sr_ids[0])
else:
data = { 'document': json.dumps(content), 'document_id': doc_id}
return '+(%s)' % ' OR '.join(('%s:%s' % (field, sr_id))
for sr_id in sr_ids)
def _run(self, start=0, num=1000, _update=False):
q = []
q.append(self.query)
if self.sr == All or not self.sr or self.sr == Default:
pass
#elif self.sr == Default:
# q.append(self._req_fs(
# Subreddit.user_subreddits(c.user,over18=c.over18,
# ids=True, limit=None)))
elif isinstance(self.sr, MultiReddit):
q.append(self._req_fs(
self.sr.sr_ids))
elif self.sr == Friends and c.user_is_loggedin and c.user.friends:
friend_ids = c.user.friends[:100] # we're not about to
# look up more than 100
# of these
friends = Account._byID(friend_ids, data=True, return_dict=False)
friend_names = map(lambda x: x.name, friends)
q.append(self._req_fs(
friend_names, field='author'))
elif isinstance(self.sr, ModContribSR):
q.append(self._req_fs(
self.sr.sr_ids()))
elif not isinstance(self.sr, FakeSubreddit):
q.append(self._req_fs([self.sr._id]))
query = ' '.join(q)
return self._run_cached(query, sorts[self.sort], start=start, num=num,
_update=_update)
return self.__api_call("index/add",index_code=index_code,params=data)
def boost(self, doc_id, timestamp=None, boosts=None, index_code=None):
data = {'document_id': doc_id}
if timestamp:
data.update({ 'timestamp': str(timestamp)})
if boosts:
data.update({ 'boosts': json.dumps(boosts)})
return self.__api_call("index/boost",index_code=index_code,params=data)
@classmethod
def _run_cached(cls, query, sort, start=0, num=1000, _update=False):
# we take and ignore the _update parameter to make plugging in
# a @memoize later easy
def promote(self, doc_id, query):
data = { 'document_id' : doc_id, 'query' : query }
return self.__api_call("index/promote", index_code=index_code, params=data)
if g.sqlprinting:
g.log.info('%s: %r %r' % (cls.__name__, query, sort))
def add_function(self, function_index, definition, index_code=None):
data = { 'function_id': function_index, 'definition': definition }
return self.__api_call("index/add_function", index_code=index_code, params=data)
def del_function(self, function_index, index_code=None):
data = { 'function_id': function_index }
return self.__api_call("index/remove_function", index_code=index_code, params=data)
def list_functions(self, index_code=None):
return self.__api_call("index/list_functions", index_code=index_code, params={})
def update(self, doc_id, content, index_code=None):
data = { 'document': json.dumps(content), 'document_id':doc_id}
return self.__api_call("index/update", index_code=index_code, params=data)
def delete(self, doc_id, index_code=None):
data = { 'document_id':doc_id}
return self.__api_call("index/delete",index_code=index_code,params=data)
def search(self, query, index_code=None, start=0, len=10, relevance_function=None, snippet_fields=None, fetch_fields=None):
data = { 'query':query, 'start':start, 'len':len, 'snippet_fields':snippet_fields, 'fetch_fields':fetch_fields,}
if relevance_function is not None:
data['relevance_function'] = relevance_function
return self.__api_call("search/query",index_code=index_code,params=data)
resp = index.search(query.encode('utf-8'), start=start, len=num,
scoring_function=sort)
def complete(self, query, index_code=None):
data = { 'query':query }
return self.__api_call("search/complete", index_code=index_code, params=data)
def index_stats(self, index_code=None):
return self.__api_call("index/stats",index_code=index_code)
def search_stats(self, index_code=None):
return self.__api_call("search/stats",index_code=index_code)
docs = [t['docid'] for t in resp['results']]
hits = resp['matches']
return Results(docs, hits)
def yesno(b):
return 'yes' if b else 'no'
def maps_from_things(things, boost_only = False):
"""We only know how to do links for now"""
maps = []
if not boost_only:
# we can avoid looking these up at all if only the boosts were
# updated
author_ids = [thing.author_id for thing in things
if hasattr(thing, 'author_id') ]
accounts = Account._byID(author_ids, data = True, return_dict = True)
sr_ids = [thing.sr_id for thing in things
if hasattr(thing, 'sr_id')]
srs = Subreddit._byID(sr_ids, data=True, return_dict=True)
for thing in things:
try:
d = dict(fullname = thing._fullname,
ups = thing._ups,
downs = thing._downs,
num_comments = getattr(thing, 'num_comments', 0))
if not boost_only:
a = accounts[thing.author_id]
sr = srs[thing.sr_id]
if a._deleted:
# if the author was deleted, we won't updated it in
# indextank at all
continue
d.update(dict(fullname = thing._fullname,
subreddit = sr.name,
reddit = sr.name,
text = ' '.join([thing.title, a.name, sr.name]),
author = a.name,
timestamp = thing._date.strftime("%s"),
sr_id = str(thing.sr_id),
over18 = yesno(sr.over_18),
is_self = yesno(thing.is_self),
))
if thing.is_self:
d['site'] = g.domain
if thing.selftext:
d['selftext'] = thing.selftext
else:
d['url'] = thing.url
d['site'] = ' '.join(UrlParser(thing.url).domain_permutations())
maps.append(d)
except AttributeError:
pass
return maps
def to_variables(ups, downs, num_comments):
return {0: ups,
1: downs,
2: num_comments}
def inject_maps(maps, boost_only=False):
for d in maps:
fullname = d.pop("fullname")
ups = d.pop("ups")
downs = d.pop("downs")
num_comments = d.pop("num_comments")
boosts = to_variables(ups, downs, num_comments)
if boost_only:
index.update_variables(docid=fullname, variables=boosts)
else:
index.add_document(docid=fullname, fields=d, variables=boosts)
def delete_thing(thing):
index.delete_document(docid=thing._fullname)
def inject(things, boost_only=False):
things = [x for x in things if isinstance(x, indextank_indexed_types)]
update_things = [x for x in things if not x._spam and not x._deleted
and x.promoted is None
and getattr(x, 'sr_id', None) != -1]
delete_things = [x for x in things if x._spam or x._deleted]
if update_things:
maps = maps_from_things(update_things, boost_only = boost_only)
inject_maps(maps, boost_only=boost_only)
if delete_things:
for thing in delete_things:
delete_thing(thing)
def rebuild_index(after_id = None, estimate=10000000):
cls = Link
# don't pull spam/deleted
q = cls._query(sort=desc('_date'), data=True)
if after_id:
q._after(cls._byID(after_id))
q = fetch_things2(q)
def key(link):
# we're going back in time, so this will give us a good idea
# of how far we've gone
return "%s/%s" % (link._id, link._date)
q = progress(q, verbosity=1000, estimate=estimate, persec=True, key=key)
for chunk in in_chunks(q):
inject(chunk)
def run_changed(drain=False, limit=1000):
"""
Run by `cron` (through `paster run`) on a schedule to send Things to
IndexTank
"""
def _run_changed(msgs, chan):
changed = map(lambda x: strordict_fullname(x.body), msgs)
boost = set()
add = set()
# an item can request that only its boost fields be updated,
# so we need to separate those out
for item in changed:
fname = item['fullname']
boost_only = item.get('boost_only', False)
if fname in add:
# we're already going to do all of the work
continue
if boost_only:
boost.add(fname)
else:
if fname in boost:
# we've previously seen an instance of this fname
# that requested that only its boosts be updated,
# but now we have to update the whole thing
boost.remove(fname)
add.add(fname)
things = Thing._by_fullname(boost | add, data=True, return_dict=True)
print ("%d messages: %d docs, %d boosts (%d duplicates, %s remaining)"
% (len(changed),
len(add),
len(boost),
len(changed) - len(things),
msgs[-1].delivery_info.get('message_count', 'unknown'),
))
if boost:
inject([things[fname] for fname in boost], boost_only=True)
if add:
inject([things[fname] for fname in add])
amqp.handle_items('indextank_changes', _run_changed, limit=limit,
drain=drain, verbose=False)

View File

@@ -1,132 +0,0 @@
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is Reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of the
# Original Code is CondeNet, Inc.
#
# All portions of the code written by CondeNet are Copyright (c) 2006-2010
# CondeNet, Inc. All Rights Reserved.
################################################################################
"""
Module for communication reddit-level communication with IndexTank
"""
from pylons import g, config
from r2.models import *
from r2.lib import amqp, indextank
from r2.lib.utils import in_chunks, progress
indextank_indexed_types = (Link,)
index = indextank.IndexTank(api_key = g.INDEXTANK_API_KEY,
index_code = g.INDEXTANK_IDX_CODE)
def maps_from_things(things):
"""We only know how to do links for now"""
maps = []
author_ids = [ thing.author_id for thing in things ]
accounts = Account._byID(author_ids, data = True, return_dict = True)
for thing in things:
a = accounts[thing.author_id]
if a._deleted:
continue
d = dict(fullname = thing._fullname,
text = thing.title,
author = a.name,
timestamp = thing._date.strftime("%s"),
ups = thing._ups,
downs = thing._downs,
num_comments = getattr(thing, "num_comments", 0),
sr_id = str(thing.sr_id))
if thing.is_self and thing.selftext:
d['selftext'] = thing.selftext
elif not thing.is_self:
d['url'] = thing.url
maps.append(d)
return maps
def to_boosts(ups, downs, num_comments):
result = {}
result[0] = ups
result[1] = downs
result[2] = num_comments
return result
def inject_maps(maps):
for d in maps:
fullname = d.pop("fullname")
ups = d.pop("ups")
downs = d.pop("downs")
num_comments = d.pop("num_comments")
boosts = to_boosts(ups, downs, num_comments)
if ups not in (0, 1) or downs != 0 or num_comments > 0:
ok, result = index.boost(fullname, boosts=boosts)
if not ok:
raise Exception(result)
ok, result = index.add(fullname, d, boosts)
if not ok:
raise Exception(result)
def delete_thing(thing):
ok, result = index.delete(thing._fullname)
if not ok:
raise Exception(result)
def inject(things):
things = [x for x in things if isinstance(x, indextank_indexed_types)]
update_things = [x for x in things if not x._spam and not x._deleted
and x.promoted is None
and getattr(x, 'sr_id') != -1]
delete_things = [x for x in things if x._spam or x._deleted]
if update_things:
maps = maps_from_things(update_things)
inject_maps(maps)
if delete_things:
for thing in delete_things:
delete_thing(thing)
def rebuild_index(after_id = None):
cls = Link
# don't pull spam/deleted
q = cls._query(sort=desc('_date'), data=True)
if after_id:
q._after(cls._byID(after_id))
q = fetch_things2(q)
q = progress(q, verbosity=1000, estimate=10000000, persec=True)
for chunk in in_chunks(q):
inject(chunk)
def run_changed(drain=False):
"""
Run by `cron` (through `paster run`) on a schedule to send Things to
IndexTank
"""
def _run_changed(msgs, chan):
fullnames = set([x.body for x in msgs])
things = Thing._by_fullname(fullnames, data=True, return_dict=False)
inject(things)
amqp.handle_items('indextank_changes', _run_changed, limit=1000,
drain=drain)

View File

@@ -24,6 +24,7 @@ from pylons import c, request, g
from utils import query_string, timeago
from strings import StringHandler, plurals
from r2.lib.db import operators
from r2.lib.indextank import sorts as indextank_sorts
from r2.lib.filters import _force_unicode
from pylons.i18n import _
@@ -422,6 +423,10 @@ class SortMenu(SimpleGetMenu):
elif sort == 'confidence':
return operators.desc('_confidence')
class ProfileSortMenu(SortMenu):
default = 'new'
options = ('hot', 'new', 'top', 'controversial')
class CommentSortMenu(SortMenu):
"""Sort menu for comments pages"""
default = 'confidence'
@@ -430,11 +435,7 @@ class CommentSortMenu(SortMenu):
class SearchSortMenu(SortMenu):
"""Sort menu for search pages."""
default = 'relevance'
mapping = dict(relevance = 'score desc',
hot = 'hot desc',
new = 'date desc',
old = 'date asc',
top = 'points desc')
mapping = indextank_sorts
options = mapping.keys()
@classmethod

View File

@@ -392,7 +392,7 @@ def prime_url_cache(f, verbosity = 10000):
tid, key, url, kind = line.split('|')
tid = int(tid)
if url.lower() != "self":
key = Link.by_url_key(url)
key = Link.by_url_key_new(url)
link_ids = g.urlcache.get(key) or []
if tid not in link_ids:
link_ids.append(tid)

View File

@@ -0,0 +1,134 @@
"""
Generate the data for the listings for the time-based Subreddit
queries. The format is eventually that of the CachedResults objects
used by r2.lib.db.queries (with some intermediate steps), so changes
there may warrant changes here
"""
# to run:
"""
export LINKDBHOST=prec01
export USER=ri
export INI=production.ini
cd ~/reddit/r2
time psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
-c "\\copy (select t.thing_id, 'thing', 'link',
t.ups, t.downs, t.deleted, t.spam, extract(epoch from t.date)
from reddit_thing_link t
where not t.spam and not t.deleted
)
to 'reddit_thing_link.dump'"
time psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
-c "\\copy (select d.thing_id, 'data', 'link',
d.key, d.value
from reddit_data_link d
where d.key = 'url' ) to 'reddit_data_link.dump'"
cat reddit_data_link.dump reddit_thing_link.dump | sort -T. -S200m | paster --plugin=r2 run $INI r2/lib/migrate/mr_domains.py -c "join_links()" > links.joined
cat links.joined | paster --plugin=r2 run $INI r2/lib/migrate/mr_domains.py -c "time_listings()" | sort -T. -S200m | paster --plugin=r2 run $INI r2/lib/migrate/mr_domains.py -c "write_permacache()"
"""
import sys
from r2.models import Account, Subreddit, Link
from r2.lib.db.sorts import epoch_seconds, score, controversy, _hot
from r2.lib.db import queries
from r2.lib import mr_tools
from r2.lib.utils import timeago, UrlParser
from r2.lib.jsontemplates import make_fullname # what a strange place
# for this function
def join_links():
mr_tools.join_things(('url',))
def time_listings(times = ('all',)):
oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
for t in times if t != "all")
oldests['all'] = epoch_seconds(timeago('10 years'))
@mr_tools.dataspec_m_thing(("url", str),)
def process(link):
assert link.thing_type == 'link'
timestamp = link.timestamp
fname = make_fullname(Link, link.thing_id)
if not link.spam and not link.deleted:
if link.url:
domains = UrlParser(link.url).domain_permutations()
else:
domains = []
ups, downs = link.ups, link.downs
for tkey, oldest in oldests.iteritems():
if timestamp > oldest:
sc = score(ups, downs)
contr = controversy(ups, downs)
h = _hot(ups, downs, timestamp)
for domain in domains:
yield ('domain/top/%s/%s' % (tkey, domain),
sc, timestamp, fname)
yield ('domain/controversial/%s/%s' % (tkey, domain),
contr, timestamp, fname)
if tkey == "all":
yield ('domain/hot/%s/%s' % (tkey, domain),
h, timestamp, fname)
yield ('domain/new/%s/%s' % (tkey, domain),
timestamp, timestamp, fname)
mr_tools.mr_map(process)
def store_keys(key, maxes):
# we're building queries using queries.py, but we could make the
# queries ourselves if we wanted to avoid the individual lookups
# for accounts and subreddits.
# Note that we're only generating the 'sr-' type queries here, but
# we're also able to process the other listings generated by the
# old migrate.mr_permacache for convenience
userrel_fns = dict(liked = queries.get_liked,
disliked = queries.get_disliked,
saved = queries.get_saved,
hidden = queries.get_hidden)
if key.startswith('user-'):
acc_str, keytype, account_id = key.split('-')
account_id = int(account_id)
fn = queries.get_submitted if keytype == 'submitted' else queries.get_comments
q = fn(Account._byID(account_id), 'new', 'all')
q._insert_tuples([(fname, float(timestamp))
for (timestamp, fname)
in maxes])
elif key.startswith('sr-'):
sr_str, sort, time, sr_id = key.split('-')
sr_id = int(sr_id)
if sort == 'controversy':
# I screwed this up in the mapper and it's too late to fix
# it
sort = 'controversial'
q = queries.get_links(Subreddit._byID(sr_id), sort, time)
q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
for item in maxes])
elif key.startswith('domain/'):
d_str, sort, time, domain = key.split('/')
q = queries.get_domain_links(domain, sort, time)
q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
for item in maxes])
elif key.split('-')[0] in userrel_fns:
key_type, account_id = key.split('-')
account_id = int(account_id)
fn = userrel_fns[key_type]
q = fn(Account._byID(account_id))
q._insert_tuples([tuple([item[-1]] + map(float, item[:-1]))
for item in maxes])
def write_permacache(fd = sys.stdin):
mr_tools.mr_reduce_max_per_key(lambda x: map(float, x[:-1]), num=1000,
post=store_keys,
fd = fd)

91
r2/r2/lib/mr_gold.py Normal file
View File

@@ -0,0 +1,91 @@
"""
psql -F"\t" -A -t -d newreddit -U ri -h $LINKDBHOST \
-c "\\copy (select t.thing_id,
'link',
t.ups,
t.downs,
t.deleted,
t.spam,
extract(epoch from t.date),
d.value
from reddit_thing_link t,
reddit_data_link d,
reddit_data_account a
where t.thing_id = d.thing_id
and not t.deleted
and d.key = 'author_id'
and a.thing_id = cast(d.value as int)
and a.key = 'gold'
and t.date > now() - interval '1 year'
) to 'gold.joined'"
cat gold.joined | paster --plugin=r2 run $INI r2/lib/mr_gold.py -c "time_listings()" | sort -T. -S200m | paster --plugin=r2 run $INI r2/lib/mr_gold.py -c "write_permacache()"
"""
import sys
from r2.models import Account, Subreddit, Link
from r2.lib.db.sorts import epoch_seconds, score, controversy, _hot
from r2.lib.db import queries
from r2.lib import mr_tools
from r2.lib.utils import timeago, UrlParser
from r2.lib.jsontemplates import make_fullname # what a strange place
# for this function
def time_listings(times = ('year','month','week','day','hour', 'all')):
oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
for t in times if t != 'all')
oldests['all'] = 0
@mr_tools.dataspec_m_thing(('author_id', int),)
def process(link):
assert link.thing_type == 'link'
timestamp = link.timestamp
fname = make_fullname(Link, link.thing_id)
if not link.spam and not link.deleted:
author_id = link.author_id
ups, downs = link.ups, link.downs
sc = score(ups, downs)
contr = controversy(ups, downs)
h = _hot(ups, downs, timestamp)
for tkey, oldest in oldests.iteritems():
if timestamp > oldest:
yield ('user-top-%s-%d' % (tkey, author_id),
sc, timestamp, fname)
yield ('user-controversial-%s-%d' % (tkey, author_id),
contr, timestamp, fname)
if tkey == 'all':
yield ('user-new-%s-%d' % (tkey, author_id),
timestamp, timestamp, fname)
yield ('user-hot-%s-%d' % (tkey, author_id),
h, timestamp, fname)
mr_tools.mr_map(process)
def store_keys(key, maxes):
# we're building queries using queries.py, but we could make the
# queries ourselves if we wanted to avoid the individual lookups
# for accounts and subreddits.
# Note that we're only generating the 'sr-' type queries here, but
# we're also able to process the other listings generated by the
# old migrate.mr_permacache for convenience
if key.startswith('user-'):
acc_str, sort, time, account_id = key.split('-')
account_id = int(account_id)
fn = queries.get_submitted
q = fn(Account._byID(account_id), sort, time)
q._replace([tuple([item[-1]] + map(float, item[:-1]))
for item in maxes])
def write_permacache(fd = sys.stdin):
mr_tools.mr_reduce_max_per_key(lambda x: map(float, x[:-1]), num=1000,
post=store_keys,
fd = fd)

View File

@@ -8,27 +8,52 @@ there may warrant changes here
# to run:
"""
export LINKDBHOST=prec01
export USER=ri
export INI=production.ini
cd ~/reddit/r2
psql -F"\t" -A -t -d newreddit -U ri -h $LINKDBHOST \
-c "\\copy (select t.thing_id,
'link',
t.ups,
t.downs,
t.deleted,
t.spam,
extract(epoch from t.date),
d.value
from reddit_thing_link t,
reddit_data_link d
where t.thing_id = d.thing_id
and not t.spam and not t.deleted
and d.key = 'sr_id'
and t.date > now() - interval '1 year'
) to 'links.year.joined'"
cat links.year.joined | paster --plugin=r2 run production.ini r2/lib/mr_top.py -c "time_listings()" \
| sort -T. -S200m \
| paster --plugin=r2 run production.ini r2/lib/mr_top.py -c "write_permacache()"
time psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
-c "\\copy (select t.thing_id, 'thing', 'link',
t.ups, t.downs, t.deleted, t.spam, extract(epoch from t.date)
from reddit_thing_link t
where not t.spam and not t.deleted
and t.date > now() - interval '1 year'
)
to 'reddit_thing_link.dump'"
time psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
-c "\\copy (select d.thing_id, 'data', 'link',
d.key, d.value
from reddit_data_link d, reddit_data_link t
where t.thing_id = d.thing_id
and not t.spam and not t.deleted
and (d.key = 'url' or d.key = 'sr_id')
and t.date > now() - interval '1 year'
)
to 'reddit_data_link.dump'"
cat reddit_data_link.dump reddit_thing_link.dump | sort -T. -S200m | paster --plugin=r2 run $INI r2/lib/mr_top.py -c "join_links()" > links.joined
cat links.joined | paster --plugin=r2 run $INI r2/lib/mr_top.py -c "time_listings()" | sort -T. -S200m | paster --plugin=r2 run $INI r2/lib/mr_top.py -c "write_permacache()"
"""
## """
## psql -F"\t" -A -t -d newreddit -U ri -h $LINKDBHOST \
## -c "\\copy (select t.thing_id,
## 'link',
## t.ups,
## t.downs,
## t.deleted,
## t.spam,
## extract(epoch from t.date),
## d.value
## from reddit_thing_link t,
## reddit_data_link d
## where t.thing_id = d.thing_id
## and not t.spam and not t.deleted
## and d.key = 'sr_id'
## and t.date > now() - interval '1 year'
## ) to 'links.year.joined'"
## cat links.year.joined | paster --plugin=r2 run production.ini r2/lib/mr_top.py -c "time_listings()" \
## | sort -T. -S200mW \
## | paster --plugin=r2 run production.ini r2/lib/mr_top.py -c "write_permacache()"
## """
# that can be run with s/year/hour/g and
# s/time_listings/time_listings(('hour',))/ for a much faster version
# that just does the hour listings. Usually these jobs dump the thing
@@ -48,32 +73,46 @@ from r2.models import Account, Subreddit, Link
from r2.lib.db.sorts import epoch_seconds, score, controversy
from r2.lib.db import queries
from r2.lib import mr_tools
from r2.lib.utils import timeago
from r2.lib.utils import timeago, UrlParser
from r2.lib.jsontemplates import make_fullname # what a strange place
# for this function
def join_links():
mr_tools.join_things(('url', 'sr_id'))
def time_listings(times = ('year','month','week','day','hour')):
oldests = dict((t, epoch_seconds(timeago('1 %s' % t)))
for t in times)
@mr_tools.dataspec_m_thing(('sr_id', int),)
@mr_tools.dataspec_m_thing(("url", str),('sr_id', int),)
def process(link):
assert link.thing_type == 'link'
timestamp = link.timestamp
fname = make_fullname(Link, link.thing_id)
if not link.spam:
if not link.spam and not link.deleted:
sr_id = link.sr_id
if link.url:
domains = UrlParser(link.url).domain_permutations()
else:
domains = []
ups, downs = link.ups, link.downs
for tkey, oldest in oldests.iteritems():
if timestamp > oldest:
sc = score(ups, downs)
contr = controversy(ups, downs)
yield ('sr-top-%s-%d' % (tkey, sr_id),
score(ups, downs), timestamp, fname)
sc, timestamp, fname)
yield ('sr-controversial-%s-%d' % (tkey, sr_id),
controversy(ups, downs),
timestamp, fname)
contr, timestamp, fname)
for domain in domains:
yield ('domain/top/%s/%s' % (tkey, domain),
sc, timestamp, fname)
yield ('domain/controversial/%s/%s' % (tkey, domain),
contr, timestamp, fname)
mr_tools.mr_map(process)
@@ -112,6 +151,12 @@ def store_keys(key, maxes):
q = queries.get_links(Subreddit._byID(sr_id), sort, time)
q._replace([tuple([item[-1]] + map(float, item[:-1]))
for item in maxes])
elif key.startswith('domain/'):
d_str, sort, time, domain = key.split('/')
q = queries.get_domain_links(domain, sort, time)
q._replace([tuple([item[-1]] + map(float, item[:-1]))
for item in maxes])
elif key.split('-')[0] in userrel_fns:
key_type, account_id = key.split('-')

View File

@@ -206,7 +206,8 @@ class Reddit(Templated):
#don't show the subreddit info bar on cnames
if not isinstance(c.site, FakeSubreddit) and not c.cname:
ps.append(SubredditInfoBar())
ps.append(Ads())
if c.user.pref_show_adbox or not c.user.gold:
ps.append(Ads())
no_ads_yet = False
if self.submit_box:
@@ -222,6 +223,15 @@ class Reddit(Templated):
subtitles = rand_strings.get("create_reddit", 2),
show_cover = True, nocname=True))
if not c.user.gold and self.submit_box:
ps.append(SideBox(_('New subscriber features'),
'http://blog.reddit.com/2010/07/three-new-features-for-reddit-gold.html',
'gold',
sr_path = False,
subtitles = ["reddit gold just got better!",
"(read all about it on the blog)"],
show_cover = False, nocname = True))
if not isinstance(c.site, FakeSubreddit) and not c.cname:
moderators = self.sr_moderators()
if moderators:
@@ -243,7 +253,8 @@ class Reddit(Templated):
if no_ads_yet:
ps.append(Ads())
if c.user.pref_show_adbox or not c.user.gold:
ps.append(Ads())
if c.user_is_admin:
ps.append(Admin_Rightbox())
@@ -1277,6 +1288,26 @@ class UploadedImage(Templated):
Templated.__init__(self, status=status, img_src=img_src, name = name,
form_id = form_id)
class Thanks(Templated):
"""The page to claim reddit gold trophies"""
def __init__(self, secret=None):
if g.cache.get("recent-gold-" + c.user.name):
status = "recent"
elif c.user.gold:
status = "gold"
else:
status = "mundane"
if g.lounge_reddit:
lounge_url = "/r/" + g.lounge_reddit
lounge_html = (SC_OFF +
markdown(strings.lounge_msg % dict(link=lounge_url))
+ SC_ON)
else:
lounge_html = None
Templated.__init__(self, status=status, secret=secret,
lounge_html=lounge_html)
class Password(Templated):
"""Form encountered when 'recover password' is clicked in the LoginFormWide."""
def __init__(self, success=False):
@@ -1994,10 +2025,14 @@ class WrappedUser(CachedTemplate):
attribs.sort()
author_cls = 'author'
author_title = None
if gray:
author_cls += ' gray'
for tup in attribs:
author_cls += " " + tup[2]
# Hack: '(' should be in tup[3] iff this friend has a note
if tup[1] == 'F' and '(' in tup[3]:
author_title = tup[3]
target = None
ip_span = None
@@ -2014,6 +2049,7 @@ class WrappedUser(CachedTemplate):
CachedTemplate.__init__(self,
name = user.name,
author_cls = author_cls,
author_title = author_title,
attribs = attribs,
context_thing = context_thing,
karma = karma,
@@ -2028,12 +2064,13 @@ class WrappedUser(CachedTemplate):
class UserTableItem(Templated):
"""A single row in a UserList of type 'type' and of name
'container_name' for a given user. The provided list of 'cells'
will determine what order the different columns are rendered in.
Currently, this list can consist of 'user', 'sendmessage' and
'remove'."""
will determine what order the different columns are rendered in."""
def __init__(self, user, type, cellnames, container_name, editable,
remove_action):
self.user, self.type, self.cells = user, type, cellnames
remove_action, rel=None):
self.user = user
self.type = type
self.cells = cellnames
self.rel = rel
self.container_name = container_name
self.editable = editable
self.remove_action = remove_action
@@ -2094,6 +2131,15 @@ class FriendList(UserList):
"""Friend list on /pref/friends"""
type = 'friend'
def __init__(self, editable = True):
if c.user.gold:
self.friend_rels = c.user.friend_rels()
self.cells = ('user', 'sendmessage', 'note', 'age', 'remove')
self._class = "gold-accent rounded"
self.table_headers = (_('user'), '', _('note'), _('friendship'), '')
UserList.__init__(self)
@property
def form_title(self):
return _('add a friend')
@@ -2105,6 +2151,14 @@ class FriendList(UserList):
def user_ids(self):
return c.user.friends
def user_row(self, user):
if not getattr(self, "friend_rels", None):
return UserList.user_row(self, user)
else:
rel = self.friend_rels[user._id]
return UserTableItem(user, self.type, self.cells, self.container_name,
True, self.remove_action, rel)
@property
def container_name(self):
return c.user._fullname
@@ -2122,7 +2176,12 @@ class ContributorList(UserList):
return _("approved submitters for %(reddit)s") % dict(reddit = c.site.name)
def user_ids(self):
return c.site.contributors
if c.site.name == g.lounge_reddit:
return [] # /r/lounge has too many subscribers to load without timing out,
# and besides, some people might not want this list to be so
# easily accessible.
else:
return c.site.contributors
class ModList(UserList):
"""Moderator list for a reddit."""

View File

@@ -85,8 +85,6 @@ class RedditQueueMap(QueueMap):
def newlink_bindings(self):
self._bind('new_link', 'scraper_q')
# note that we don't add search_changes here, because the
# initial vote on that item will add it
# this isn't in use until the spam_q plumbing is
#self._bind('new_link', 'newpage_q')
@@ -96,8 +94,7 @@ class RedditQueueMap(QueueMap):
self._bind('new_comment', 'commentstree_q')
def newsubreddit_bindings(self):
self._bind('new_subreddit', 'solrsearch_changes')
self._bind('new_subreddit', 'indextank_changes')
pass
try:
from r2admin.lib.adminqueues import *

View File

@@ -39,8 +39,8 @@ from pylons import g, config
from r2.models import *
from r2.lib.contrib import pysolr
from r2.lib.contrib.pysolr import SolrError
from r2.lib.utils import timeago
from r2.lib.utils import unicode_safe, tup
from r2.lib.utils import timeago, UrlParser
from r2.lib.utils import unicode_safe, tup, get_after, strordict_fullname
from r2.lib.cache import SelfEmptyingCache
from r2.lib import amqp
@@ -97,27 +97,6 @@ class ThingField(Field):
return ("<ThingField: (%s,%s,%s,%s)>"
% (self.name,self.cls,self.id_attr,self.lu_attr_name))
def domain_permutations(s):
"""
Takes a domain like `www.reddit.com`, and returns a list of ways
that a user might search for it, like:
* www
* reddit
* com
* www.reddit.com
* reddit.com
* com
"""
ret = []
r = s.split('.')
for x in xrange(len(r)):
ret.append('.'.join(r[x:len(r)]))
for x in r:
ret.append(x)
return set(ret)
# Describes the fields of Thing objects and subclasses that are passed
# to Solr for indexing. All must have a 'contents' field, since that
# will be used for language-agnostic searching, and will be copied
@@ -160,9 +139,9 @@ search_fields={Thing: (Field('fullname', '_fullname'),
Field('sr_id'),
Field('url', tokenize = True),
#Field('domain',
# lambda l: domain_permutations(domain(l.url))),
# lambda l: UrlParser(l.url).domain_permutations()),
Field('site',
lambda l: domain_permutations(domain(l.url))),
lambda l: UrlParser(l.url).domain_permutations()),
#Field('is_self','is_self'),
),
Comment: (Field('contents', 'body', tokenize = True),
@@ -670,17 +649,6 @@ class DomainSearchQuery(SearchQuery):
return q, dict(fl='fullname',
qt='standard')
def get_after(fullnames, fullname, num):
if not fullname:
return fullnames[:num]
for i, item in enumerate(fullnames):
if item == fullname:
return fullnames[i+1:i+num+1]
return fullnames[:num]
def run_commit(optimize=False):
with SolrConnection(commit=True, optimize=optimize) as s:
pass
@@ -695,8 +663,10 @@ def run_changed(drain=False):
"""
def _run_changed(msgs, chan):
print "changed: Processing %d items" % len(msgs)
msgs = [strordict_fullname(msg.body)
for msg in msgs]
fullnames = set(msg['fullname'] for msg in msgs)
fullnames = set([x.body for x in msgs])
things = Thing._by_fullname(fullnames, data=True, return_dict=False)
things = [x for x in things if isinstance(x, indexed_types)]

View File

@@ -66,7 +66,6 @@ def cache_lists():
# because of permissions
continue
g.log.debug(sr.name)
for lang in 'all', sr.lang:
over18s = ['allow_over18']
if sr.over_18:
@@ -80,15 +79,14 @@ def cache_lists():
# keep the lists small while we work
if len(bylang[k]) > limit*2:
g.log.debug('Shrinking %s' % (k,))
bylang[k] = _chop(bylang[k])
for (lang, over18), srs in bylang.iteritems():
srs = _chop(srs)
sr_tuples = map(lambda sr: (sr._downs, sr.allow_top, sr._id), srs)
g.log.debug("For %s/%s setting %s" % (lang, over18,
map(lambda sr: sr.name, srs)))
print "For %s/%s setting %s" % (lang, over18,
map(lambda sr: sr.name, srs[:50]))
g.permacache.set(cached_srs_key(lang, over18), sr_tuples)

View File

@@ -137,6 +137,8 @@ string_dict = dict(
verified_quota_msg = _("You've submitted several links recently that haven't been doing very well. You'll have to wait a while before you can submit again, or [write to the moderators of this reddit](%(link)s) and ask for an exemption."),
unverified_quota_msg = _("You haven't [verified your email address](%(link1)s); until you do, your submitting privileges will be severely limited. Please try again in an hour or verify your email address. If you'd like an exemption from this rule, please [write to the moderators of this reddit](%(link2)s)."),
read_only_msg = _("reddit is in \"emergency read-only mode\" right now. :( you won't be able to log in. we're sorry, and are working frantically to fix the problem."),
lounge_msg = _("please grab a drink and join us in [the lounge](%(link)s)"),
postcard_msg = _("You sent us a postcard! (Or something similar.) When we run out of room on our refrigerator, we might one day auction off the stuff that people sent in. Is it okay if we include your thing?"),
)
class StringHandler(object):

View File

@@ -241,3 +241,26 @@ def flatten(list lists):
ret.extend(l)
return ret
cdef list _l(l):
"""Return a listified version of l, just returning l if it's
already listified"""
if isinstance(l, list):
return l
else:
return list(l)
def get_after(list fullnames, fullname, int num, bool reverse=False):
cdef int i
if reverse:
fullnames = _l(reversed(fullnames))
if not fullname:
return fullnames[:num]
for i, item in enumerate(fullnames):
if item == fullname:
return fullnames[i+1:i+num+1]
return fullnames[:num]

View File

@@ -83,3 +83,16 @@ def bench_cache_lifetime_multi(attempts=10, minutes=60*24):
% (x+1, attempts, this_attempt, minimum, maximum, mean))
return (minimum, maximum, mean)
def subs_contribs(sr_name = 'betateam'):
"""Convert all subscribers of a given subreddit to
contributors. Useful for forming opt-in beta teams"""
from r2.models import Subreddit, SRMember
sr = Subreddit._by_name(sr_name)
q = SRMember._query(SRMember.c._thing1_id == sr._id)
for rel in rels:
if rel._name == 'subscriber':
sr.add_contributor(rel._thing2)
Subreddit.special_reddits(rel._thing2, 'contributor', _update=True)

View File

@@ -169,17 +169,6 @@ class Enum(Storage):
return Storage.__contains__(self, item)
class Enum(Storage):
def __init__(self, *a):
self.name = tuple(a)
Storage.__init__(self, ((e, i) for i, e in enumerate(a)))
def __contains__(self, item):
if isinstance(item, int):
return item in self.values()
else:
return Storage.__contains__(self, item)
class Results():
def __init__(self, sa_ResultProxy, build_fn, do_batch=False):
self.rp = sa_ResultProxy
@@ -296,6 +285,8 @@ def sanitize_url(url, require_scheme = False):
# if there is a scheme and no hostname, it is a bad url.
if not u.hostname:
return
if u.username is not None or u.password is not None:
return
labels = u.hostname.split('.')
for label in labels:
try:
@@ -313,7 +304,6 @@ def sanitize_url(url, require_scheme = False):
return
return url
# Truncate a time to a certain number of minutes
# e.g, trunc_time(5:52, 30) == 5:30
def trunc_time(time, mins, hours=None):
@@ -330,12 +320,6 @@ def trunc_time(time, mins, hours=None):
microsecond = 0)
def median(l):
if l:
s = sorted(l)
i = len(s) / 2
return s[i]
def median(l):
if l:
s = sorted(l)
@@ -465,7 +449,7 @@ class UrlParser(object):
# if there is a netloc, there had better be a scheme
if self.netloc and not self.scheme:
self.scheme = "http"
return urlunparse((self.scheme, self.netloc,
self.path.replace('//', '/'),
self.params, q, self.fragment))
@@ -481,7 +465,7 @@ class UrlParser(object):
def get_subreddit(self):
"""checks if the current url refers to a subreddit and returns
that subreddit object. The cases here are:
* the hostname is unset or is g.domain, in which case it
looks for /r/XXXX or /reddits. The default in this case
is Default.
@@ -521,7 +505,7 @@ class UrlParser(object):
self.hostname.endswith(subreddit.domain)))
def path_add_subreddit(self, subreddit):
"""
"""
Adds the subreddit's path to the path if another subreddit's
prefix is not already present.
"""
@@ -540,7 +524,7 @@ class UrlParser(object):
elif self.port:
return self.hostname + ":" + str(self.port)
return self.hostname
def mk_cname(self, require_frame = True, subreddit = None, port = None):
"""
Converts a ?cnameframe url into the corresponding cnamed
@@ -550,7 +534,7 @@ class UrlParser(object):
# make sure the url is indeed in a frame
if require_frame and not self.query_dict.has_key(self.cname_get):
return self
# fetch the subreddit and make sure it
subreddit = subreddit or self.get_subreddit()
if subreddit and subreddit.domain:
@@ -570,7 +554,7 @@ class UrlParser(object):
self.path = lstrips(self.path, subreddit.path)
if not self.path.startswith('/'):
self.path = '/' + self.path
return self
def is_in_frame(self):
@@ -589,6 +573,46 @@ class UrlParser(object):
def __repr__(self):
return "<URL %s>" % repr(self.unparse())
def domain_permutations(self, fragments=False, subdomains=True):
"""
Takes a domain like `www.reddit.com`, and returns a list of ways
that a user might search for it, like:
* www
* reddit
* com
* www.reddit.com
* reddit.com
* com
"""
ret = set()
if self.hostname:
r = self.hostname.split('.')
if subdomains:
for x in xrange(len(r)-1):
ret.add('.'.join(r[x:len(r)]))
if fragments:
for x in r:
ret.add(x)
return ret
@classmethod
def base_url(cls, url):
u = cls(url)
# strip off any www and lowercase the hostname:
netloc = u.netloc.lower()
if len(netloc.split('.')) > 2 and netloc.startswith("www."):
netloc = netloc[4:]
# http://code.google.com/web/ajaxcrawling/docs/specification.html
fragment = u.fragment if u.fragment.startswith("!") else ""
return urlunparse((u.scheme.lower(), netloc,
u.path, u.params, u.query, fragment))
def to_js(content, callback="document.write", escape=True):
before = after = ''
@@ -1181,3 +1205,22 @@ class Bomb(object):
@classmethod
def __repr__(cls):
raise Hell()
def strordict_fullname(item, key='fullname'):
"""Sometimes we migrate AMQP queues from simple strings to pickled
dictionaries. During the migratory period there may be items in
the queue of both types, so this function tries to detect which
the item is. It shouldn't really be used on a given queue for more
than a few hours or days"""
try:
d = pickle.loads(item)
except:
d = {key: item}
if (not isinstance(d, dict)
or key not in d
or not isinstance(d[key], str)):
raise ValueError('Error trying to migrate %r (%r)'
% (item, d))
return d

View File

@@ -32,4 +32,5 @@ from trial import *
from ad import *
from bidding import *
from mail_queue import Email, has_opted_out, opt_count
from gold import *
from admintools import *

View File

@@ -64,6 +64,8 @@ class Account(Thing):
pref_threaded_messages = True,
pref_collapse_read_messages = False,
pref_private_feeds = True,
pref_show_adbox = True,
pref_show_sponsors = True,
mobile_compress = False,
mobile_thumbnail = True,
trusted_sponsor = False,
@@ -80,7 +82,9 @@ class Account(Thing):
email = "",
email_verified = False,
ignorereports = False,
pref_show_promote = None,
pref_show_promote = None,
gold = False,
creddits = 0,
)
def karma(self, kind, sr = None):
@@ -236,6 +240,26 @@ class Account(Thing):
def friends(self):
return self.friend_ids()
# Used on the goldmember version of /prefs/friends
@memoize('account.friend_rels')
def friend_rels_cache(self):
q = Friend._query(Friend.c._thing1_id == self._id,
Friend.c._name == 'friend')
return list(f._id for f in q)
def friend_rels(self, _update = False):
rel_ids = self.friend_rels_cache(_update=_update)
rels = Friend._byID_rel(rel_ids, return_dict=False,
eager_load = True, data = True,
thing_data = True)
return dict((r._thing2_id, r) for r in rels)
def add_friend_note(self, friend, note):
rels = self.friend_rels()
rel = rels[friend._id]
rel.note = note
rel._commit()
def delete(self):
self._deleted = True
self._commit()

View File

@@ -22,7 +22,7 @@
from r2.lib.utils import tup
from r2.lib.filters import websafe
from r2.lib.log import log_text
from r2.models import Report, Account
from r2.models import Report, Account, Subreddit
from pylons import g
@@ -173,6 +173,30 @@ class AdminTools(object):
sr._commit()
sr._incr('mod_actions', len(sr_things))
def engolden(self, account, was_postcard=False):
from r2.lib.db.queries import changed
account.gold = True
if was_postcard:
description = "Postcard Brigade"
else:
description = "Charter Member"
Award.give_if_needed("reddit_gold", account,
description=description,
url="/help/gold")
account._commit()
account.friend_rels_cache(_update=True)
if g.lounge_reddit:
sr = Subreddit._by_name(g.lounge_reddit)
sr.add_contributor(account)
def degolden(self, account):
from r2.lib.db.queries import changed
account.gold = False
Award.take_away("reddit_gold", account)
account._commit()
if g.lounge_reddit:
sr = Subreddit._by_name(g.lounge_reddit)
sr.remove_contributor(account)
admintools = AdminTools()

View File

@@ -65,7 +65,8 @@ class Award (Thing):
raise NotFound, 'Award %s' % codename
@classmethod
def give_if_needed(cls, codename, user, cup_info=None):
def give_if_needed(cls, codename, user,
description=None, url=None, cup_info=None):
"""Give an award to a user, unless they already have it.
Returns silently (except for g.log.debug) if the award
doesn't exist"""
@@ -83,7 +84,8 @@ class Award (Thing):
g.log.debug("%s already has %s" % (user, codename))
return
Trophy._new(user, award, cup_info=cup_info)
Trophy._new(user, award, description=description,
url=url, cup_info=cup_info)
g.log.debug("Gave %s to %s" % (codename, user))
@classmethod

View File

@@ -67,11 +67,14 @@ class Builder(object):
authors = {}
cup_infos = {}
email_attrses = {}
friend_rels = None
if aids:
authors = Account._byID(aids, True) if aids else {}
cup_infos = Account.cup_info_multi(aids)
if c.user_is_admin:
email_attrses = admintools.email_attrs(aids, return_dict=True)
if c.user.gold:
friend_rels = c.user.friend_rels()
subreddits = Subreddit.load_subreddits(items)
@@ -125,8 +128,15 @@ class Builder(object):
if user and item.author_id in user.friends:
# deprecated old way:
w.friend = True
# new way:
add_attr(w.attribs, 'F')
label = None
if friend_rels:
rel = friend_rels[item.author_id]
note = getattr(rel, "note", None)
if note:
label = "%s (%s)" % (_("friend"), note)
add_attr(w.attribs, 'F', label)
except AttributeError:
pass

116
r2/r2/models/gold.py Normal file
View File

@@ -0,0 +1,116 @@
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is Reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of the
# Original Code is CondeNet, Inc.
#
# All portions of the code written by CondeNet are Copyright (c) 2006-2010
# CondeNet, Inc. All Rights Reserved.
################################################################################
from r2.lib.db.tdb_sql import make_metadata, index_str, create_table
from pylons import g
import sqlalchemy as sa
ENGINE_NAME = 'authorize'
ENGINE = g.dbm.get_engine(ENGINE_NAME)
METADATA = make_metadata(ENGINE)
gold_table = sa.Table('reddit_gold', METADATA,
sa.Column('trans_id', sa.String, nullable = False,
primary_key = True),
# status can be: invalid, unclaimed, claimed
sa.Column('status', sa.String, nullable = False),
sa.Column('date', sa.DateTime(timezone=True),
nullable=False),
sa.Column('payer_email', sa.String, nullable = False),
sa.Column('paying_id', sa.String, nullable = False),
sa.Column('pennies', sa.Integer, nullable = False),
sa.Column('secret', sa.String, nullable = True),
sa.Column('account_id', sa.String, nullable = True))
indices = [index_str(gold_table, 'status', 'status'),
index_str(gold_table, 'date', 'date'),
index_str(gold_table, 'account_id', 'account_id'),
index_str(gold_table, 'secret', 'secret', unique = True),
index_str(gold_table, 'payer_email', 'payer_email')]
create_table(gold_table, indices)
def create_unclaimed_gold (trans_id, payer_email, paying_id,
pennies, secret, date):
gold_table.insert().execute(trans_id=trans_id,
status="unclaimed",
payer_email=payer_email,
paying_id=paying_id,
pennies=pennies,
secret=secret,
date=date)
# caller is expected to engoldenate user
def create_claimed_gold (trans_id, payer_email, paying_id,
pennies, account, date):
gold_table.insert().execute(trans_id=trans_id,
status="claimed",
payer_email=payer_email,
paying_id=paying_id,
pennies=pennies,
account_id=account._id,
date=date)
# returns the number of pennies paid, if there's valid unclaimed gold
# returns 0 if the ID is valid but the gold's already been claimed
# returns None if the ID was never valid
def claim_gold(secret, account_id):
if not secret:
return None
# The donation email has the code at the end of the sentence,
# so they might get sloppy and catch the period or some whitespace.
secret = secret.strip(". ")
rp = gold_table.update(sa.and_(gold_table.c.status == 'unclaimed',
gold_table.c.secret == secret),
values = {
gold_table.c.status: 'claimed',
gold_table.c.account_id: account_id,
},
).execute()
if rp.rowcount == 0:
just_claimed = False
elif rp.rowcount == 1:
just_claimed = True
else:
raise ValueError("rowcount == %d?" % rp.rowcount)
s = sa.select([gold_table.c.pennies],
gold_table.c.secret == secret,
limit = 1)
rows = s.execute().fetchall()
if not rows:
return None
elif just_claimed:
return rows[0].pennies
else:
return 0
def check_by_email(email):
s = sa.select([gold_table.c.status,
gold_table.c.secret,
gold_table.c.pennies,
gold_table.c.account_id],
gold_table.c.payer_email == email)
return s.execute().fetchall()

View File

@@ -22,7 +22,7 @@
from r2.lib.db.thing import Thing, Relation, NotFound, MultiRelation, \
CreationError
from r2.lib.db.operators import desc
from r2.lib.utils import base_url, tup, domain, title_to_url
from r2.lib.utils import base_url, tup, domain, title_to_url, UrlParser
from r2.lib.utils.trial_utils import trial_info
from account import Account, DeletedUser
from subreddit import Subreddit
@@ -65,6 +65,15 @@ class Link(Thing, Printable):
def __init__(self, *a, **kw):
Thing.__init__(self, *a, **kw)
@classmethod
def by_url_key_new(cls, url):
maxlen = 250
template = 'byurl(%s,%s)'
keyurl = _force_utf8(UrlParser.base_url(url.lower()))
hexdigest = md5(keyurl).hexdigest()
usable_len = maxlen-len(template)-len(hexdigest)
return template % (hexdigest, keyurl[:usable_len])
@classmethod
def by_url_key(cls, url):
maxlen = 250
@@ -224,10 +233,10 @@ class Link(Thing, Printable):
#
# skip the item if 18+ and the user has that preference set
# ignore skip if we are visiting a nsfw reddit
#if ( (user and user.pref_no_profanity) or
# (not user and g.filter_over18) ) and wrapped.subreddit != c.site:
# return not bool(wrapped.subreddit.over_18 or
# wrapped._nsfw.findall(wrapped.title))
if ( not c.user_is_loggedin and
(wrapped.subreddit != c.site or c.site.name == 'multi')):
return not bool(wrapped.subreddit.over_18 or
wrapped.over_18)
return True
@@ -433,8 +442,6 @@ class Link(Thing, Printable):
item._deleted,
item._spam))
item.is_author = (user == item.author)
# bits that we will render stubs (to make the cached
# version more flexible)
item.num = CachedVariable("num")
@@ -521,11 +528,16 @@ class Comment(Thing, Printable):
pass
def _delete(self):
from r2.lib.db.queries import changed
link = Link._byID(self.link_id, data = True)
link._incr('num_comments', -1)
changed(link, True)
@classmethod
def _new(cls, author, link, parent, body, ip):
from r2.lib.db.queries import changed
c = Comment(_ups = 1,
body = body,
link_id = link._id,
@@ -551,6 +563,8 @@ class Comment(Thing, Printable):
c._commit()
changed(link, True) # only the number of comments has changed
inbox_rel = None
# only global admins can be message spammed.
if to and (not c._spam or to.name in g.admins):
@@ -658,7 +672,11 @@ class Comment(Thing, Printable):
else:
item.parent_permalink = None
item.can_reply = c.can_reply or (item.sr_id in can_reply_srs)
item.can_reply = False
if c.can_reply or (item.sr_id in can_reply_srs):
age = c.start_time - item._date
if age.days < g.REPLY_AGE_LIMIT:
item.can_reply = True
# not deleted on profile pages,

View File

@@ -300,7 +300,8 @@ class Email(object):
"FINISHED_PROMO",
"NEW_PROMO",
"HELP_TRANSLATE",
"NERDMAIL"
"NERDMAIL",
"GOLDMAIL",
)
subjects = {
@@ -320,6 +321,7 @@ class Email(object):
Kind.NEW_PROMO : _("[reddit] your promotion has been created"),
Kind.HELP_TRANSLATE : _("[i18n] translation offer from '%(user)s'"),
Kind.NERDMAIL : _("[reddit] hey, nerd!"),
Kind.GOLDMAIL : _("[reddit] reddit gold activation link")
}
def __init__(self, user, thing, email, from_name, date, ip, banned_ip,

View File

@@ -370,7 +370,7 @@ class Subreddit(Thing, Printable):
else Subreddit._byID(sr_ids, data=True, return_dict=False))
@classmethod
def default_subreddits(cls, ids = True, limit = g.num_default_reddits):
def default_subreddits(cls, ids = True, over18 = False, limit = g.num_default_reddits):
"""
Generates a list of the subreddits any user with the current
set of language preferences and no subscriptions would see.
@@ -386,7 +386,7 @@ class Subreddit(Thing, Printable):
srs = cls.top_lang_srs(c.content_langs, limit + len(auto_srs),
filter_allow_top = True,
over18 = c.over18, ids = True)
over18 = over18, ids = True)
rv = []
for sr in srs:
@@ -419,7 +419,7 @@ class Subreddit(Thing, Printable):
if srs else Subreddit._by_name(g.default_sr))
@classmethod
def user_subreddits(cls, user, ids = True, limit = sr_limit):
def user_subreddits(cls, user, ids = True, over18=False, limit = sr_limit):
"""
subreddits that appear in a user's listings. If the user has
subscribed, returns the stored set of subscriptions.
@@ -438,7 +438,7 @@ class Subreddit(Thing, Printable):
return_dict=False)
else:
limit = g.num_default_reddits if limit is None else limit
return cls.default_subreddits(ids = ids, limit = limit)
return cls.default_subreddits(ids = ids, over18=over18, limit = limit)
@classmethod
@memoize('subreddit.special_reddits')
@@ -832,6 +832,8 @@ class DomainSR(FakeSubreddit):
def get_links(self, sort, time):
from r2.lib.db import queries
# TODO: once the lists are precomputed properly, this can be
# switched over to use the non-_old variety.
return queries.get_domain_links(self.domain, sort, time)
Sub = SubSR()

View File

@@ -151,11 +151,6 @@ class Vote(MultiRelation('vote',
v.organic = organic
v._commit()
g.cache.delete(queries.prequeued_vote_key(sub, obj))
v._fast_query_timestamp_touch(sub)
v._fast_query_timestamp_touch(sub)
v._fast_query_timestamp_touch(sub)
@@ -192,6 +187,8 @@ class Vote(MultiRelation('vote',
cv.organic = v.organic
cv._commit()
queries.changed(votee, True)
return v
#TODO make this generic and put on multirelation?

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@@ -335,7 +335,8 @@ function friend(user_name, container_name, type) {
showcover();
}
else {
$.request("friend",
encoded = encodeURIComponent(reddit.referer);
$.request("friend?note=" + encoded,
{name: user_name, container: container_name, type: type});
}
}
@@ -343,7 +344,7 @@ function friend(user_name, container_name, type) {
function unfriend(user_name, container_name, type) {
return function() {
$.request("unfriend",
$.request("unfriend",
{name: user_name, container: container_name, type: type});
}
};

Binary file not shown.

Before

Width:  |  Height:  |  Size: 195 KiB

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 KiB

After

Width:  |  Height:  |  Size: 31 KiB

View File

@@ -245,6 +245,19 @@
</td>
</tr>
%endif
%if c.user.gold:
<tr class="gold-accent">
<th>${_("gold options")}</th>
<td class="prefright">
${checkbox(_("show the right sidebar ad box"), "show_adbox")}
&#32;<span class="little gray">(${_("The attractive 300x250 one that usually only has reddit t-shirts in it.")})</span>
<br/>
${checkbox(_("show sponsored links"), "show_sponsors")}
&#32;<span class="little gray">(${_("The blue advertisements that sometimes appear on the top of the page.")})</span>
<br/>
</td>
</tr>
%endif
<tr>
<td>
<input type="submit" class="btn" value="${_('save options')}"/>

View File

@@ -111,7 +111,6 @@
<script src="${static('jquery.js')}" type="text/javascript"></script>
<script src="${static('jquery.json.js')}" type="text/javascript"></script>
<script src="${static('jquery.reddit.js')}" type="text/javascript"></script>
<script src="${static('jquery.lazyload.js')}" type="text/javascript"></script>
<script src="${static('reddit.js')}" type="text/javascript"></script>
</%def>
@@ -132,6 +131,7 @@
%if thing.content:
##<div class="fixedwidth"><!--IE6sux--></div>
##<div class="clearleft"><!--IE6sux--></div>
<a name="content"></a>
<div class="content">
${thing.content()}
</div>

View File

@@ -29,6 +29,7 @@
<%namespace file="utils.html" import="plain_link, text_with_js, img_link, separator, logout"/>
<div id="header">
<a tabindex="1" href="#content" id="jumpToContent">${_('jump to content')}</a>
${thing.srtopbar}
<div id="header-bottom-${'right' if c.lang_rtl else 'left'}">
<%
@@ -127,6 +128,7 @@
<div class="morelink nub"></div>
<div class="mlh"></div>
<div class="mlhn"></div>
<div class="sidebox gold"><div class="morelink"></div></div>
<a class="pretty-button negative"></a>
<a class="pretty-button negative pressed"></a>
<a class="pretty-button positive"></a>

View File

@@ -0,0 +1,124 @@
## The contents of this file are subject to the Common Public Attribution
## License Version 1.0. (the "License"); you may not use this file except in
## compliance with the License. You may obtain a copy of the License at
## http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
## License Version 1.1, but Sections 14 and 15 have been added to cover use of
## software over a computer network and provide for limited attribution for the
## Original Developer. In addition, Exhibit A has been modified to be consistent
## with Exhibit B.
##
## Software distributed under the License is distributed on an "AS IS" basis,
## WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
## the specific language governing rights and limitations under the License.
##
## The Original Code is Reddit.
##
## The Original Developer is the Initial Developer. The Initial Developer of
## the Original Code is CondeNet, Inc.
##
## All portions of the code written by CondeNet are Copyright (c) 2006-2010
## CondeNet, Inc. All Rights Reserved.
################################################################################
<%!
from r2.lib.template_helpers import static
%>
<%namespace name="utils" file="utils.html"/>
<%namespace file="promotelinkform.html" name="p" />
<%namespace file="utils.html"
import="error_field, checkbox, image_upload, reddit_selector" />
<script type="text/javascript" src="${static('ui.core.js')}"></script>
<script type="text/javascript" src="${static('ui.datepicker.js')}"></script>
<script type="text/javascript" src="${static('sponsored.js')}"></script>
<div class="pretty-form campaign">
<%utils:line_field title="${_('create a roadblock')}" id="campaign-field">
<%
start_title = "Date when your sponsored link will start running. We start new campaigns at midnight UTC+5"
end_title = "Date when your sponsored link will end (at midnight UTC+5)"
targeting_title = "name of the community that you are targeting. A blank entry here means that the ad is untargeted and will run site-wise "
newcamp_title = "click to create a new campaign. To edit an existing campaing in the table below, click the 'edit' button."
%>
<table class="preftable">
<tr>
<th>for date</th>
<td class="prefright">
${error_field("BAD_DATE", "startdate", "div")}
${error_field("BAD_FUTURE_DATE", "startdate", "div")}
${error_field("BAD_DATE", "enddate", "div")}
${error_field("BAD_FUTURE_DATE", "enddate", "div")}
${error_field("BAD_DATE_RANGE", "enddate", "div")}
<%p:datepicker name="startdate", value="${thing.startdate}"
minDateSrc="#date-min" initfuncname="init_startdate">
function(elem) {
var other = $("#enddate");
if(dateFromInput("#startdate") >= dateFromInput("#enddate")) {
var newd = new Date();
newd.setTime($(elem).datepicker('getDate').getTime() + 86400*1000);
$("#enddate").val((newd.getMonth()+1) + "/" +
newd.getDate() + "/" + newd.getFullYear());
}
$("#datepicker-enddate").datepicker("destroy");
update_bid(elem);
}
</%p:datepicker>
-
<%p:datepicker name="enddate", value="${thing.enddate}"
minDateSrc="#startdate" initfuncname="init_enddate">
function(elem) { update_bid(elem); }
</%p:datepicker>
</td>
</tr>
</table>
<script type="text/javascript">
$(function() {
init_startdate();
init_enddate();
})
</script>
<div class="targeting">
${reddit_selector(thing.default_sr, thing.sr_searches, thing.subreddits)}
</div>
<div class="buttons">
<input type="hidden" name="indx" value="" />
<span class="status error"></span>
<button name="create"
onclick="return post_pseudo_form('.campaign', 'add_roadblock')">
create
</button>
</div>
<div class="clear"></div>
</div>
</%utils:line_field>
</div>
<%utils:line_field title="${_('existing roadblocks')}">
<div class='existing-campaigns'>
%if thing.roadblocks:
<table>
<tr>
<th>reddit</th>
<th>Start Date</th>
<th>End Date</th>
<th></th>
</tr>
%for reddit, startdate, enddate in thing.roadblocks:
<tr>
<td>${reddit}</td>
<td>${startdate.strftime("%Y-%m-%d")}</td>
<td>${enddate.strftime("%Y-%m-%d")}</td>
<td>
<form method="post" action="/post/rm_roadblock" onsubmit="return post_form(this, 'rm_roadblock')">
<input type="hidden" name="sr" value="${reddit}"/>
<input type="hidden" name="startdate" value='${startdate.strftime("%m/%d/%Y")}'/>
<input type="hidden" name="enddate" value='${enddate.strftime("%m/%d/%Y")}'/>
<button type="submit">remove</button>
</form>
</td>
</tr>
%endfor
</table>
%endif
</div>
</%utils:line_field>

View File

@@ -0,0 +1,80 @@
## The contents of this file are subject to the Common Public Attribution
## License Version 1.0. (the "License"); you may not use this file except in
## compliance with the License. You may obtain a copy of the License at
## http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
## License Version 1.1, but Sections 14 and 15 have been added to cover use of
## software over a computer network and provide for limited attribution for the
## Original Developer. In addition, Exhibit A has been modified to be consistent
## with Exhibit B.
##
## Software distributed under the License is distributed on an "AS IS" basis,
## WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
## the specific language governing rights and limitations under the License.
##
## The Original Code is Reddit.
##
## The Original Developer is the Initial Developer. The Initial Developer of
## the Original Code is CondeNet, Inc.
##
## All portions of the code written by CondeNet are Copyright (c) 2006-2010
## CondeNet, Inc. All Rights Reserved.
################################################################################
<%namespace file="utils.html" import="error_field, success_field"/>
<%namespace name="utils" file="utils.html"/>
<%namespace file="createsubreddit.html" import="radio_type"/>
<%! from r2.lib.strings import strings %>
<form id="passform" action="/api/claimgold" method="post"
class="content"
onsubmit="return post_form(this, 'claimgold');">
<h1>${_("thanks for subscribing!")}</h1>
<p>
%if thing.status == "mundane":
${_("enter your confirmation code below to activate reddit gold")}
%else:
%if thing.status == "recent":
${_("You claimed a reddit gold subscription in the last few minutes.")}
%else:
${_("You're already a reddit gold subscriber.")}
%endif
&#32;
${_("But if you just gave us even more money, enter the new confirmation code below and we'll add the extra credit to your account.")}</p>
%endif
<div class="spacer">
<%utils:round_field title="">
<input type="text" name="code" value="${thing.secret}" />
${error_field("INVALID_CODE", "code")}
${error_field("CLAIMED_CODE", "code")}
${error_field("NO_TEXT", "code")}
</%utils:round_field>
</div>
<div class="postcard">
<%utils:round_field title="${_('we\'ve got mail!')}">
<div class="postcard-msg rounded">
${strings.postcard_msg}
<table>
${radio_type("postcard", "yes", _("sure, that sounds awesome!"),
"", False)}
${radio_type("postcard", "no", _("no, please shred it when you're done"),
"", False)}
</table>
</div>
</%utils:round_field>
</div>
<button type="submit" class="btn">${_("claim your prize")}</button>
<span class="status"></span>
%if thing.lounge_html:
<span class="lounge">
${unsafe(thing.lounge_html)}
</span>
%endif
</form>

View File

@@ -26,7 +26,7 @@
%if thing.editable:
<form action="/post/${thing.destination}"
method="post" class="pretty-form medium-text"
onsubmit="return post_form(this, '${thing.destination}');"
onsubmit="return post_form(this, '${thing.destination}');"
id="${thing.type}">
<h1>${thing.form_title}</h1>
@@ -47,6 +47,13 @@
</h1>
<table>
%if getattr(thing, "table_headers", None):
<tr>
%for header in thing.table_headers:
<th>${header}</th>
%endfor
</tr>
%endif
%if thing.users:
%for item in thing.users:
${item}
@@ -56,5 +63,6 @@
%endif
</table>
</div>
</div>

View File

@@ -23,6 +23,10 @@
<%namespace file="printablebuttons.html" import="ynbutton" />
<%namespace file="utils.html" import="plain_link"/>
<%!
from r2.lib.utils import timesince
%>
<tr>
%for cell in thing.cells:
<td>
@@ -33,17 +37,17 @@
</td>
%endfor
</tr>
<%def name="cell_type()">
%if thing.name == "user":
<span class="user">
${plain_link(thing.user.name, "/user/%s/" % thing.user.name,
${plain_link(thing.user.name, "/user/%s/" % thing.user.name,
_sr_path=False)}
&nbsp;(<b>${thing.user.safe_karma}</b>)
</span>
&nbsp;
%elif c.user_is_loggedin and thing.name == "sendmessage" and c.user != thing.user:
${plain_link(_("send message"),
${plain_link(_("send message"),
"/message/compose?to=%s" % (thing.user.name))}
&nbsp;
%elif thing.name == "remove":
@@ -56,5 +60,20 @@
%else:
<span class="gray">${_("can't remove")}</span>
%endif
%elif thing.name == "note":
<form action="/post/friendnote" id="friendnote-${thing.rel._fullname}"
method="post" class="pretty-form medium-text friend-note"
onsubmit="return post_form(this, 'friendnote');">
<input type="hidden" name="name" value="${thing.user.name}" />
<input type="text" maxlength="300" name="note" class="tiny"
onfocus="$(this).parent().addClass('edited')"
value="${getattr(thing.rel, 'note', '')}" />
<button onclick="$(this).parent().removeClass('edited')" type="submit">submit</button>
<span class="status"></span>
</form>
%elif thing.name == "age":
<span title="${thing.rel._date.strftime('%Y-%m-%d %H:%M:%S')}">
${timesince(thing.rel._date)}
</span>
%endif
</%def>

View File

@@ -72,6 +72,7 @@
${error_field("TOO_LONG", thing.name, "span")}
${error_field("RATELIMIT", "ratelimit", "span")}
${error_field("NO_TEXT", thing.name, "span")}
${error_field("TOO_OLD", "parent", "span")}
${error_field("DELETED_COMMENT", "parent", "span")}
${error_field("DELETED_LINK", "parent", "span")}
<div class="usertext-buttons">

View File

@@ -29,8 +29,8 @@
<span>[deleted]</span>
%else:
${plain_link(thing.name + thing.karma, "/user/%s" % thing.name,
_class = thing.author_cls + (" id-%s" % thing.fullname),
_sr_path = False, target=target)}
_class = thing.author_cls + (" id-%s" % thing.fullname),
_sr_path = False, target=target, title=thing.author_title)}
<span class="userattrs">
%if thing.attribs:
&#32;[

View File

@@ -1 +0,0 @@
development.ini

View File

@@ -143,6 +143,7 @@ setup(
"psycopg2",
"py_interface",
"pycountry",
"python-cassandra",
"thrift" # required by Cassandra
],
packages=find_packages(),
@@ -171,13 +172,7 @@ setup(
# the cassandra stuff we'll need. down here because it needs to be
# done *after* thrift is installed
try:
import cassandra, pycassa
except ImportError:
# we'll need thrift too, but that is done by install_depends below
easy_install(['http://github.com/downloads/ieure/python-cassandra/Cassandra-0.5.0.tar.gz', # required by pycassa
'http://github.com/downloads/ketralnis/pycassa/pycassa-0.1.1.tar.gz',
])
easy_install(["http://github.com/downloads/vomjom/pycassa/pycassa-0.3.0.tar.gz"])
# running setup.py always fucks up the build directory, which we don't
# need anyway.

View File

@@ -1,4 +0,0 @@
#!/bin/bash
cd ~/reddit/r2
/usr/local/bin/paster run run.ini r2/lib/utils/utils.py -c "from r2.lib.db import queries; queries.catch_up_batch_queries()"

55
scripts/gen_time_listings.sh Executable file
View File

@@ -0,0 +1,55 @@
#!/bin/bash
USER=ri
LINKDBHOST=prec01
# e.g. 'year'
INTERVAL="$1"
# e.g. '("hour","day","week","month","year")'
LISTINGS="$2"
INI=production_batch.ini
FNAME=links.$INTERVAL.joined
DNAME=data.$INTERVAL.joined
export PATH=/usr/local/pgsql/bin:/usr/local/bin:$PATH
cd $HOME/reddit/r2
if [ -e $FNAME ]; then
echo cannot start because $FNAME existss
exit 1
fi
# make this exist immediately to act as a lock
touch $FNAME
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
-c "\\copy (select t.thing_id, 'thing', 'link',
t.ups, t.downs, t.deleted, t.spam, extract(epoch from t.date)
from reddit_thing_link t
where not t.spam and not t.deleted
and t.date > now() - interval '1 $INTERVAL'
)
to '$FNAME'"
psql -F"\t" -A -t -d newreddit -U $USER -h $LINKDBHOST \
-c "\\copy (select t.thing_id, 'data', 'link',
d.key, d.value
from reddit_data_link d, reddit_thing_link t
where t.thing_id = d.thing_id
and not t.spam and not t.deleted
and (d.key = 'url' or d.key = 'sr_id')
and t.date > now() - interval '1 $INTERVAL'
) to '$DNAME'"
cat $FNAME $DNAME | sort -T. -S200m | \
paster --plugin=r2 run $INI r2/lib/mr_top.py -c "join_links()" | \
paster --plugin=r2 run $INI r2/lib/mr_top.py -c "time_listings($LISTINGS)" | \
sort -T. -S200m | \
paster --plugin=r2 run $INI r2/lib/mr_top.py -c "write_permacache()"
rm $FNAME $DNAME

View File

@@ -1,395 +0,0 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<Storage>
<!--======================================================================-->
<!-- Basic Configuration -->
<!--======================================================================-->
<!--
~ The name of this cluster. This is mainly used to prevent machines in
~ one logical cluster from joining another.
-->
<ClusterName>redditdev</ClusterName>
<!--
~ Turn on to make new [non-seed] nodes automatically migrate the right data
~ to themselves. (If no InitialToken is specified, they will pick one
~ such that they will get half the range of the most-loaded node.)
~ If a node starts up without bootstrapping, it will mark itself bootstrapped
~ so that you can't subsequently accidently bootstrap a node with
~ data on it. (You can reset this by wiping your data and commitlog
~ directories.)
~
~ Off by default so that new clusters and upgraders from 0.4 don't
~ bootstrap immediately. You should turn this on when you start adding
~ new nodes to a cluster that already has data on it. (If you are upgrading
~ from 0.4, start your cluster with it off once before changing it to true.
~ Otherwise, no data will be lost but you will incur a lot of unnecessary
~ I/O before your cluster starts up.)
-->
<AutoBootstrap>true</AutoBootstrap>
<!--
~ Keyspaces and ColumnFamilies:
~ A ColumnFamily is the Cassandra concept closest to a relational
~ table. Keyspaces are separate groups of ColumnFamilies. Except in
~ very unusual circumstances you will have one Keyspace per application.
~ There is an implicit keyspace named 'system' for Cassandra internals.
-->
<Keyspaces>
<Keyspace Name="reddit">
<!-- Relations -->
<ColumnFamily CompareWith="UTF8Type" Name="LinkVote" />
<ColumnFamily CompareWith="UTF8Type" Name="CommentVote" />
<!-- Views -->
<ColumnFamily CompareWith="UTF8Type" Name="VotesByLink" />
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>
<Keyspace Name="permacache">
<ColumnFamily CompareWith="BytesType" Name="permacache" RowsCached="3000000" />
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<ReplicationFactor>3</ReplicationFactor>
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>
<Keyspace Name="urls">
<!--
~ ColumnFamily definitions have one required attribute (Name)
~ and several optional ones.
~
~ The CompareWith attribute tells Cassandra how to sort the columns
~ for slicing operations. The default is BytesType, which is a
~ straightforward lexical comparison of the bytes in each column.
~ Other options are AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType,
~ and LongType. You can also specify the fully-qualified class
~ name to a class of your choice extending
~ org.apache.cassandra.db.marshal.AbstractType.
~
~ SuperColumns have a similar CompareSubcolumnsWith attribute.
~
~ BytesType: Simple sort by byte value. No validation is performed.
~ AsciiType: Like BytesType, but validates that the input can be
~ parsed as US-ASCII.
~ UTF8Type: A string encoded as UTF8
~ LongType: A 64bit long
~ LexicalUUIDType: A 128bit UUID, compared lexically (by byte value)
~ TimeUUIDType: a 128bit version 1 UUID, compared by timestamp
~
~ (To get the closest approximation to 0.3-style supercolumns, you
~ would use CompareWith=UTF8Type CompareSubcolumnsWith=LongType.)
~
~ An optional `Comment` attribute may be used to attach additional
~ human-readable information about the column family to its definition.
~
~ The optional KeysCached attribute specifies
~ the number of keys per sstable whose locations we keep in
~ memory in "mostly LRU" order. (JUST the key locations, NOT any
~ column values.) Specify a fraction (value less than 1), a percentage
~ (ending in a % sign) or an absolute number of keys to cache.
~ KeysCached defaults to 200000 keys.
~
~ The optional RowsCached attribute specifies the number of rows
~ whose entire contents we cache in memory. Do not use this on
~ ColumnFamilies with large rows, or ColumnFamilies with high write:read
~ ratios. Specify a fraction (value less than 1), a percentage (ending in
~ a % sign) or an absolute number of rows to cache.
~ RowsCached defaults to 0, i.e., row cache is off by default.
~
~ Remember, when using caches as a percentage, they WILL grow with
~ your data set!
-->
<ColumnFamily CompareWith="BytesType" Name="urls"
/>
<!-- <ColumnFamily Name="Super2"
ColumnType="Super"
CompareWith="UTF8Type"
CompareSubcolumnsWith="UTF8Type"
RowsCached="10000"
KeysCached="50%"
Comment="A column family with supercolumns, whose column and subcolumn names are UTF8 strings"/> -->
<!--
~ Strategy: Setting this to the class that implements
~ IReplicaPlacementStrategy will change the way the node picker works.
~ Out of the box, Cassandra provides
~ org.apache.cassandra.locator.RackUnawareStrategy and
~ org.apache.cassandra.locator.RackAwareStrategy (place one replica in
~ a different datacenter, and the others on different racks in the same
~ one.)
-->
<ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
<!-- Number of replicas of the data -->
<ReplicationFactor>2</ReplicationFactor>
<!--
~ EndPointSnitch: Setting this to the class that implements
~ AbstractEndpointSnitch, which lets Cassandra know enough
~ about your network topology to route requests efficiently.
~ Out of the box, Cassandra provides org.apache.cassandra.locator.EndPointSnitch,
~ and PropertyFileEndPointSnitch is available in contrib/.
-->
<EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
</Keyspace>
</Keyspaces>
<!--
~ Authenticator: any IAuthenticator may be used, including your own as long
~ as it is on the classpath. Out of the box, Cassandra provides
~ org.apache.cassandra.auth.AllowAllAuthenticator and,
~ org.apache.cassandra.auth.SimpleAuthenticator
~ (SimpleAuthenticator uses access.properties and passwd.properties by
~ default).
~
~ If you don't specify an authenticator, AllowAllAuthenticator is used.
-->
<Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
<!--
~ Partitioner: any IPartitioner may be used, including your own as long
~ as it is on the classpath. Out of the box, Cassandra provides
~ org.apache.cassandra.dht.RandomPartitioner,
~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
~ (CollatingOPP colates according to EN,US rules, not naive byte
~ ordering. Use this as an example if you need locale-aware collation.)
~ Range queries require using an order-preserving partitioner.
~
~ Achtung! Changing this parameter requires wiping your data
~ directories, since the partitioner can modify the sstable on-disk
~ format.
-->
<Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
<!--
~ If you are using an order-preserving partitioner and you know your key
~ distribution, you can specify the token for this node to use. (Keys
~ are sent to the node with the "closest" token, so distributing your
~ tokens equally along the key distribution space will spread keys
~ evenly across your cluster.) This setting is only checked the first
~ time a node is started.
~ This can also be useful with RandomPartitioner to force equal spacing
~ of tokens around the hash space, especially for clusters with a small
~ number of nodes.
-->
<InitialToken></InitialToken>
<!--
~ Directories: Specify where Cassandra should store different data on
~ disk. Keep the data disks and the CommitLog disks separate for best
~ performance
-->
<CommitLogDirectory>/cassandra/commitlog</CommitLogDirectory>
<DataFileDirectories>
<DataFileDirectory>/cassandra/data</DataFileDirectory>
</DataFileDirectories>
<!--
~ Addresses of hosts that are deemed contact points. Cassandra nodes
~ use this list of hosts to find each other and learn the topology of
~ the ring. You must change this if you are running multiple nodes!
-->
<Seeds>
<Seed>reddit.local</Seed>
</Seeds>
<!-- Miscellaneous -->
<!-- Time to wait for a reply from other nodes before failing the command -->
<RpcTimeoutInMillis>30000</RpcTimeoutInMillis>
<!-- Size to allow commitlog to grow to before creating a new segment -->
<CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
<!-- Local hosts and ports -->
<!--
~ Address to bind to and tell other nodes to connect to. You _must_
~ change this if you want multiple nodes to be able to communicate!
~
~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
~ will always do the Right Thing *if* the node is properly configured
~ (hostname, name resolution, etc), and the Right Thing is to use the
~ address associated with the hostname (it might not be).
-->
<ListenAddress></ListenAddress>
<!-- internal communications port -->
<StoragePort>7000</StoragePort>
<!--
~ The address to bind the Thrift RPC service to. Unlike ListenAddress
~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
~ all interfaces.
~
~ Leaving this blank has the same effect it does for ListenAddress,
~ (i.e. it will be based on the configured hostname of the node).
-->
<ThriftAddress></ThriftAddress>
<!-- Thrift RPC port (the port clients connect to). -->
<ThriftPort>9160</ThriftPort>
<!--
~ Whether or not to use a framed transport for Thrift. If this option
~ is set to true then you must also use a framed transport on the
~ client-side, (framed and non-framed transports are not compatible).
-->
<ThriftFramedTransport>false</ThriftFramedTransport>
<!--======================================================================-->
<!-- Memory, Disk, and Performance -->
<!--======================================================================-->
<!--
~ Access mode. mmapped i/o is substantially faster, but only practical on
~ a 64bit machine (which notably does not include EC2 "small" instances)
~ or relatively small datasets. "auto", the safe choice, will enable
~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
~ (which may allow you to get part of the benefits of mmap on a 32bit
~ machine by mmapping only index files) and "standard".
~ (The buffer size settings that follow only apply to standard,
~ non-mmapped i/o.)
-->
<DiskAccessMode>auto</DiskAccessMode>
<!--
~ Size of compacted row above which to log a warning. (If compacted
~ rows do not fit in memory, Cassandra will crash. This is explained
~ in http://wiki.apache.org/cassandra/CassandraLimitations and is
~ scheduled to be fixed in 0.7.)
-->
<RowWarningThresholdInMB>512</RowWarningThresholdInMB>
<!--
~ Buffer size to use when performing contiguous column slices. Increase
~ this to the size of the column slices you typically perform.
~ (Name-based queries are performed with a buffer size of
~ ColumnIndexSizeInKB.)
-->
<SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
<!--
~ Buffer size to use when flushing memtables to disk. (Only one
~ memtable is ever flushed at a time.) Increase (decrease) the index
~ buffer size relative to the data buffer if you have few (many)
~ columns per key. Bigger is only better _if_ your memtables get large
~ enough to use the space. (Check in your data directory after your
~ app has been running long enough.) -->
<FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
<FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
<!--
~ Add column indexes to a row after its contents reach this size.
~ Increase if your column values are large, or if you have a very large
~ number of columns. The competing causes are, Cassandra has to
~ deserialize this much of the row to read a single column, so you want
~ it to be small - at least if you do many partial-row reads - but all
~ the index data is read for each access, so you don't want to generate
~ that wastefully either.
-->
<ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
<!--
~ Flush memtable after this much data has been inserted, including
~ overwritten data. There is one memtable per column family, and
~ this threshold is based solely on the amount of data stored, not
~ actual heap memory usage (there is some overhead in indexing the
~ columns).
-->
<MemtableThroughputInMB>64</MemtableThroughputInMB>
<!--
~ Throughput setting for Binary Memtables. Typically these are
~ used for bulk load so you want them to be larger.
-->
<BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
<!--
~ The maximum number of columns in millions to store in memory per
~ ColumnFamily before flushing to disk. This is also a per-memtable
~ setting. Use with MemtableThroughputInMB to tune memory usage.
-->
<MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
<!--
~ The maximum time to leave a dirty memtable unflushed.
~ (While any affected columnfamilies have unflushed data from a
~ commit log segment, that segment cannot be deleted.)
~ This needs to be large enough that it won't cause a flush storm
~ of all your memtables flushing at once because none has hit
~ the size or count thresholds yet. For production, a larger
~ value such as 1440 is recommended.
-->
<MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
<!--
~ Unlike most systems, in Cassandra writes are faster than reads, so
~ you can afford more of those in parallel. A good rule of thumb is 2
~ concurrent reads per processor core. Increase ConcurrentWrites to
~ the number of clients writing at once if you enable CommitLogSync +
~ CommitLogSyncDelay. -->
<ConcurrentReads>8</ConcurrentReads>
<ConcurrentWrites>32</ConcurrentWrites>
<!--
~ CommitLogSync may be either "periodic" or "batch." When in batch
~ mode, Cassandra won't ack writes until the commit log has been
~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
~ milliseconds for other writes, before performing the sync.
~ This is less necessary in Cassandra than in traditional databases
~ since replication reduces the odds of losing data from a failure
~ after writing the log entry but before it actually reaches the disk.
~ So the other option is "periodic," where writes may be acked immediately
~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
~ milliseconds.
-->
<CommitLogSync>periodic</CommitLogSync>
<!--
~ Interval at which to perform syncs of the CommitLog in periodic mode.
~ Usually the default of 10000ms is fine; increase it if your i/o
~ load is such that syncs are taking excessively long times.
-->
<CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
<!--
~ Delay (in milliseconds) during which additional commit log entries
~ may be written before fsync in batch mode. This will increase
~ latency slightly, but can vastly improve throughput where there are
~ many writers. Set to zero to disable (each entry will be synced
~ individually). Reasonable values range from a minimal 0.1 to 10 or
~ even more if throughput matters more than latency.
-->
<!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
<!--
~ Time to wait before garbage-collection deletion markers. Set this to
~ a large enough value that you are confident that the deletion marker
~ will be propagated to all replicas by the time this many seconds has
~ elapsed, even in the face of hardware failures. The default value is
~ ten days.
-->
<GCGraceSeconds>864000</GCGraceSeconds>
</Storage>

View File

@@ -1,3 +0,0 @@
#!/bin/sh
umask 0027
exec setuidgid reddit multilog ./main

View File

@@ -1,8 +0,0 @@
#!/bin/sh
#
# This stub needs to have an actual consumer hooked up to it if you would like to run search
export HOME=/home/reddit
cd $HOME/reddit/r2
exec 2>&1
exec setuidgid reddit /usr/local/bin/paster run run.ini -c "from r2.lib import amqp; amqp.consume_items('log_q', lambda x: 0)"

View File

@@ -1,3 +0,0 @@
#!/bin/sh
umask 0027
exec setuidgid reddit multilog ./main

View File

@@ -1,8 +0,0 @@
#!/bin/sh
#
# This stub needs to have an actual consumer hooked up to it if you would like to run search
export HOME=/home/reddit
cd $HOME/reddit/r2
exec 2>&1
exec setuidgid reddit /usr/local/bin/paster run run.ini -c "from r2.lib import amqp; amqp.consume_items('searchchanges_q', lambda x: 0)"