From fdb67396242c79cf407a9975a20cbb6a2f8cdf3d Mon Sep 17 00:00:00 2001 From: David King Date: Mon, 11 Feb 2013 22:54:51 -0800 Subject: [PATCH] Add models and skeleton for recommender --- r2/r2/lib/db/tdb_cassandra.py | 10 +++-- r2/r2/lib/recommender.py | 76 +++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 r2/r2/lib/recommender.py diff --git a/r2/r2/lib/db/tdb_cassandra.py b/r2/r2/lib/db/tdb_cassandra.py index 9a4118961..35fc4141a 100644 --- a/r2/r2/lib/db/tdb_cassandra.py +++ b/r2/r2/lib/db/tdb_cassandra.py @@ -34,7 +34,7 @@ from pycassa.cassandra.ttypes import ConsistencyLevel, NotFoundException from pycassa.system_manager import (SystemManager, UTF8_TYPE, COUNTER_COLUMN_TYPE, TIME_UUID_TYPE, ASCII_TYPE) -from pycassa.types import DateType +from pycassa.types import DateType, LongType, IntegerType from r2.lib.utils import tup, Storage from r2.lib import cache from uuid import uuid1, UUID @@ -646,7 +646,9 @@ class ThingBase(object): self._column_ttls.clear() def __getattr__(self, attr): - if attr.startswith('_'): + if isinstance(attr, basestring) and attr.startswith('_'): + # TODO: I bet this interferes with Views whose column names can + # start with a _ try: return self.__dict__[attr] except KeyError: @@ -669,7 +671,9 @@ class ThingBase(object): if attr == '_id' and self._committed: raise ValueError('cannot change _id on a committed %r' % (self.__class__)) - if attr.startswith('_'): + if isinstance(attr, basestring) and attr.startswith('_'): + # TODO: I bet this interferes with Views whose column names can + # start with a _ return object.__setattr__(self, attr, val) try: diff --git a/r2/r2/lib/recommender.py b/r2/r2/lib/recommender.py new file mode 100644 index 000000000..37c1d2de1 --- /dev/null +++ b/r2/r2/lib/recommender.py @@ -0,0 +1,76 @@ +# The contents of this file are subject to the Common Public Attribution +# License Version 1.0. (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public +# License Version 1.1, but Sections 14 and 15 have been added to cover use of +# software over a computer network and provide for limited attribution for the +# Original Developer. In addition, Exhibit A has been modified to be consistent +# with Exhibit B. +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for +# the specific language governing rights and limitations under the License. +# +# The Original Code is reddit. +# +# The Original Developer is the Initial Developer. The Initial Developer of +# the Original Code is reddit Inc. +# +# All portions of the code written by reddit are Copyright (c) 2006-2012 reddit +# Inc. All Rights Reserved. +############################################################################### + +from datetime import timedelta + +from r2.models import Subreddit +from r2.lib.db import tdb_cassandra +from r2.lib.memoize import memoize + +def get_recommendations(srs): + """ + Return the subreddits recommended if you like the given subreddit + """ + + # for now, but keep the API open for multireddits later + assert len(srs) == 1 and srs[0].__class__ == Subreddit + + sr = srs[0] + recs = _get_recommendations(sr._id36) + if not recs: + return [] + + srs = Subreddit._byID36(recs, return_dict=True, data=True) + + return srs + +@memoize('_get_recommendations', stale=True) +def _get_recommendations(srid36): + return SRRecommendation.for_sr(srid36) + +class SRRecommendation(tdb_cassandra.View): + _use_db = True + + _compare_with = tdb_cassandra.LongType() + + # don't keep these around if a run hasn't happened lately, or if the last + # N runs didn't generate recommendations for a given subreddit + _ttl = timedelta(days=2) + + @classmethod + def for_sr(cls, srid36, count=5): + """ + Return the subreddits ID36s recommended by the sr whose id36 is passed + """ + + cq = tdb_cassandra.ColumnQuery(cls, [srid36], + column_count = count+1) + recs = [ r[1] for r in cq if r[1] != srid36 ][:count] + + return recs + + def _to_recs(self): + recs = self._values() # [ {rank, srid} ] + recs = sorted(recs.items(), key=lambda x: int(x[0])) + recs = [x[1] for x in recs] + return recs +