diff --git a/r2/r2/lib/migrate/migrate.py b/r2/r2/lib/migrate/migrate.py index 08d315537..31b80c608 100644 --- a/r2/r2/lib/migrate/migrate.py +++ b/r2/r2/lib/migrate/migrate.py @@ -381,3 +381,19 @@ def port_cassahides(): for sh in q: CassandraHide._hide(sh._thing1, sh._thing2, write_consistency_level=CL.ONE) + +def convert_query_cache_to_json(): + import cPickle + from r2.models.query_cache import json, UserQueryCache + + with UserQueryCache._cf.batch() as m: + for key, columns in UserQueryCache._cf.get_range(): + out = {} + for ckey, cvalue in columns.iteritems(): + try: + raw = cPickle.loads(cvalue) + except cPickle.UnpicklingError: + continue + out[ckey] = json.dumps(raw) + m.insert(key, out) + diff --git a/r2/r2/models/query_cache.py b/r2/r2/models/query_cache.py index a94ee26ac..9f0a1f113 100644 --- a/r2/r2/models/query_cache.py +++ b/r2/r2/models/query_cache.py @@ -1,10 +1,9 @@ import random -import cPickle import datetime import collections from pylons import g -from pycassa.system_manager import ASCII_TYPE +from pycassa.system_manager import ASCII_TYPE, UTF8_TYPE from pycassa.batch import Mutator from r2.models import Thing @@ -19,6 +18,16 @@ PRUNE_CHANCE = g.querycache_prune_chance MAX_CACHED_ITEMS = 1000 +# if cjson is installed, use it. it's faster. +try: + import cjson as json +except ImportError: + LOG.warning("Couldn't import cjson. Using (slower) python implementation.") + import json +else: + json.dumps, json.loads = json.encode, json.decode + + class ThingTupleComparator(object): def __init__(self, sorts): self.sorts = sorts @@ -260,7 +269,8 @@ def merged_cached_query(fn): class BaseQueryCache(object): __metaclass__ = tdb_cassandra.ThingMeta _connection_pool = 'main' - _extra_schema_creation_args = dict(key_validation_class=ASCII_TYPE) + _extra_schema_creation_args = dict(key_validation_class=ASCII_TYPE, + default_validation_class=UTF8_TYPE) _compare_with = ASCII_TYPE _use_db = False @@ -278,8 +288,8 @@ class BaseQueryCache(object): timestamps = [] for (key, (value, timestamp)) in columns.iteritems(): - value = cPickle.loads(value) - data.append((key,) + value) + value = json.loads(value) + data.append((key,) + tuple(value)) timestamps.append((key, timestamp)) res[row] = (data, dict(timestamps)) @@ -289,7 +299,7 @@ class BaseQueryCache(object): @classmethod @tdb_cassandra.will_write def insert(cls, mutator, key, columns): - updates = dict((key, cPickle.dumps(value, protocol=2)) + updates = dict((key, json.dumps(value)) for key, value in columns.iteritems()) mutator.insert(cls._cf, key, updates)