From cf8b55274c45bdbf54b7eb8078e844c8b77b8d95 Mon Sep 17 00:00:00 2001 From: Neil Williams Date: Sun, 10 Jun 2012 19:21:46 -0700 Subject: [PATCH] Use new LastModified system for Cassandra-based relations. By necessity, this changes the API of tdb_cassandra.Relation._fast_query to take and return thing objects rather than ID36s. --- r2/r2/lib/db/queries.py | 14 ++------- r2/r2/lib/db/tdb_cassandra.py | 59 +++++++++++++++++++++++++---------- r2/r2/models/link.py | 16 +++------- r2/r2/models/vote.py | 14 ++++----- 4 files changed, 56 insertions(+), 47 deletions(-) diff --git a/r2/r2/lib/db/queries.py b/r2/r2/lib/db/queries.py index ad5a6a449..5f8dea26a 100755 --- a/r2/r2/lib/db/queries.py +++ b/r2/r2/lib/db/queries.py @@ -1200,25 +1200,15 @@ def get_likes(user, items): else False if v == '-1' else None) - # avoid requesting items that they can't have voted on (we're - # still using the tdb_sql Thing API for this). TODO: we should do - # this before the prequeued_vote_keys lookup, so that in extreme - # cases we can avoid hitting the cache for them at all, but in the - # current state that precludes brand new comments from appearing - # to have been voted on for item in items: + # already retrieved above if (user, item) in res: continue # we can only vote on links and comments - if isinstance(item, (Link, Comment)): - rel = Vote.rel(user.__class__, item.__class__) - if rel._can_skip_lookup(user, item): - res[(user, item)] = None - else: + if not isinstance(item, (Link, Comment)): res[(user, item)] = None - # now hit Cassandra with the remainder likes = Vote.likes(user, [i for i in items if (user, i) not in res]) res.update(likes) diff --git a/r2/r2/lib/db/tdb_cassandra.py b/r2/r2/lib/db/tdb_cassandra.py index 7287e3cde..d7327dd57 100644 --- a/r2/r2/lib/db/tdb_cassandra.py +++ b/r2/r2/lib/db/tdb_cassandra.py @@ -813,30 +813,57 @@ class Relation(ThingBase): pass @classmethod - def _fast_query(cls, thing1_ids, thing2_ids, properties = None, **kw): + def _fast_query(cls, thing1s, thing2s, properties = None, **kw): """Find all of the relations of this class between all of the members of thing1_ids and thing2_ids""" - thing1_ids, thing1s_is_single = tup(thing1_ids, True) - thing2_ids, thing2s_is_single = tup(thing2_ids, True) + thing1s, thing1s_is_single = tup(thing1s, True) + thing2s, thing2s_is_single = tup(thing2s, True) - if not thing1_ids or not thing2_ids: - # nothing to permute + if not thing1s or not thing2s: return {} + # grab the last time each thing1 modified this relation class so we can + # know which relations not to even bother looking up + if thing1s: + from r2.models.last_modified import LastModified + fullnames = [cls._thing1_cls._fullname_from_id36(thing1._id36) + for thing1 in thing1s] + timestamps = LastModified.get_multi(fullnames, + cls._cf.column_family) + + # build up a list of ids to look up, throwing out the ones that the + # timestamp fetched above indicates are pointless + ids = set() + thing1_ids, thing2_ids = {}, {} + for thing1 in thing1s: + last_modification = timestamps.get(thing1._fullname) + + if not last_modification: + continue + + for thing2 in thing2s: + key = cls._rowkey(thing1._id36, thing2._id36) + + if key in ids: + continue + + if thing2._date > last_modification: + continue + + ids.add(key) + thing2_ids[thing2._id36] = thing2 + thing1_ids[thing1._id36] = thing1 + + # all relations must load these properties, even if unrequested if properties is not None: properties = set(properties) - # all relations must load these properties, even if - # unrequested properties.add('thing1_id') properties.add('thing2_id') - # permute all of the pairs - ids = set(cls._rowkey(x, y) - for x in thing1_ids - for y in thing2_ids) - - rels = cls._byID(ids, properties = properties).values() + rels = {} + if ids: + rels = cls._byID(ids, properties=properties).values() if thing1s_is_single and thing2s_is_single: if rels: @@ -844,10 +871,10 @@ class Relation(ThingBase): return rels[0] else: raise NotFound("<%s %r>" % (cls.__name__, - cls._rowkey(thing1_ids[0], - thing2_ids[0]))) + cls._rowkey(thing1s[0]._id36, + thing2s[0]._id36))) - return dict(((rel.thing1_id, rel.thing2_id), rel) + return dict(((thing1_ids[rel.thing1_id], thing2_ids[rel.thing2_id]), rel) for rel in rels) @classmethod diff --git a/r2/r2/models/link.py b/r2/r2/models/link.py index e02c48250..594ad786b 100755 --- a/r2/r2/models/link.py +++ b/r2/r2/models/link.py @@ -336,14 +336,8 @@ class Link(Thing, Printable): site = c.site if user_is_loggedin: - saved_lu = [] - for item in wrapped: - if not SaveHide._can_skip_lookup(user, item): - saved_lu.append(item._id36) - - saved = CassandraSave._fast_query(user._id36, saved_lu) - hidden = CassandraHide._fast_query(user._id36, saved_lu) - + saved = CassandraSave._fast_query(user, wrapped) + hidden = CassandraHide._fast_query(user, wrapped) clicked = {} else: saved = hidden = clicked = {} @@ -408,8 +402,8 @@ class Link(Thing, Printable): item.urlprefix = '' if user_is_loggedin: - item.saved = (user._id36, item._id36) in saved - item.hidden = (user._id36, item._id36) in hidden + item.saved = (user, item) in saved + item.hidden = (user, item) in hidden item.clicked = bool(clicked.get((user, item, 'click'))) else: @@ -1218,7 +1212,7 @@ class SimpleRelation(tdb_cassandra.Relation): @classmethod def _uncreate(cls, user, link): try: - cls._fast_query(user._id36, link._id36)._destroy() + cls._fast_query(user, link)._destroy() except tdb_cassandra.NotFound: pass diff --git a/r2/r2/models/vote.py b/r2/r2/models/vote.py index 36f5f925d..c9b5519a6 100644 --- a/r2/r2/models/vote.py +++ b/r2/r2/models/vote.py @@ -70,7 +70,7 @@ class CassandraVote(tdb_cassandra.Relation): votee = v._thing2 cvc = cls._rel(Account, votee.__class__) try: - cv = cvc._fast_query(voter._id36, votee._id36) + cv = cvc._fast_query(voter, votee) except tdb_cassandra.NotFound: cv = cvc(thing1_id = voter._id36, thing2_id = votee._id36) cv.name = v._name @@ -268,14 +268,12 @@ class Vote(MultiRelation('vote', ret = {} for relcls, items in rels.iteritems(): - ids = dict((item._id36, item) - for item in items) - votes = relcls._fast_query(sub._id36, ids, + votes = relcls._fast_query(sub, items, properties=['name']) - for (thing1_id36, thing2_id36), rel in votes.iteritems(): - ret[(sub, ids[thing2_id36])] = (True if rel.name == '1' - else False if rel.name == '-1' - else None) + for cross, rel in votes.iteritems(): + ret[cross] = (True if rel.name == '1' + else False if rel.name == '-1' + else None) return ret def test():