Experiments: add a decorator for whitelisting loid-based experiments

This will allow us to use loid-based experiments along side the pagecache, and intelligently vary the cache accordingly.
2026-01-14 01:18:15 -05:00 · 2016-03-08 11:01:48 -08:00
parent 178d7a2ae0
commit 4e045fd831
8 changed files with 114 additions and 1 deletions
--- a/r2/example.ini
+++ b/r2/example.ini
@@ -924,6 +924,9 @@ modmail_forwarding_email =
 # modmail email mapping from sender name to reddit account name
 modmail_account_map =

+# experiments that require the page cache to always be varied
+global_loid_experiments = 
+
 # precomputed comment orders
 precomputed_comment_sort_read_chance = 0
 precomputed_comment_sorts =
--- a/r2/r2/config/feature/README.md
+++ b/r2/r2/config/feature/README.md
@@ -177,6 +177,29 @@ specify a variant with a secondary syntax for a few flag conditions:
 feature_some_flag = {"url": {"some_flag_something": "test_something", "some_flag_something_else": "test_something_else"}}
 ```

+## An important note about loggedout ("loid-based") experiments
+
+Logged out experiments are special in that many of logged-out requests are
+aggressively cached.  Thus, in order to work with the cache, the cache has to be
+varied based on the variant of a given experiment for a given loid.  
+
+Since the page cache is tested before the handler (and in fact the handler may
+never be called if the page is found in the pagecache), the app needs to be made
+aware of these experiments up front.  This is done in one of two ways:
+
+ 1. If the experiment will affect most requests (such as a content experiment or
+ a major UI update), it should be added into `global_loid_experiments` (in the
+ live config).    
+ 2. If the effects are localized to a single handler, the
+ `vary_pagecache_on_experiments` decorator needs to be used on that handler.
+ It takes the list of all experiments that should be varied on for the sake of
+ that handler, and works in a way similar to the `pagecache_policy` decorator.
+
+**NOTE** Use of one or both of these functions is mandatory for the logged out
+experiment to work properly.  If the logged out experiment is not properly whitelisted,
+`r2.config.feature.state._get_experiment_variant` will emit a
+`"feature.non_whitelisted_experiment"` message to graphite and will also write
+`"loid-based experiment is not whitelisted"` to `logging.debug`.  

 ## When should I use this?

--- a/r2/r2/config/feature/state.py
+++ b/r2/r2/config/feature/state.py
@@ -20,6 +20,7 @@
 # Inc. All Rights Reserved.
 ###############################################################################

+import logging
 import json
 import hashlib

@@ -353,6 +354,15 @@ class FeatureState(object):
            bucket = self._calculate_bucket(user._fullname)
        # for logged out users, bucket based on the loid if we have one
        elif g.enable_loggedout_experiments:
+            # if the experiment is logged-out, the pagecache has to know about
+            # it or we're going to have a bad time
+            if not self.world.is_whitelisted_experiment(self.name):
+                self.world.simple_event("feature.non_whitelisted_experiment")
+                logging.debug(
+                    "loid-based experiment is not whitelisted: %s",
+                    self.name
+                )
+                return None
            loid = self.world.current_loid()
            # we can't run an experiment if we have no id to vary on.
            if not loid:
--- a/r2/r2/config/feature/world.py
+++ b/r2/r2/config/feature/world.py
@@ -64,6 +64,10 @@ class World(object):
            return ''
        return site.name

+    def is_whitelisted_experiment(self, name):
+        exps = self.stacked_proxy_safe_get(c, "whitelisted_loid_experiments")
+        return exps and name in exps
+
    def current_subdomain(self):
        return self.stacked_proxy_safe_get(c, 'subdomain')

@@ -113,3 +117,8 @@ class World(object):
    def live_config(self, name):
        live = self.stacked_proxy_safe_get(g, 'live_config', {})
        return live.get(name)
+
+    def simple_event(self, name):
+        stats = self.stacked_proxy_safe_get(g, 'stats', None)
+        if stats:
+            return stats.simple_event(name)
--- a/r2/r2/controllers/front.py
+++ b/r2/r2/controllers/front.py
@@ -29,6 +29,7 @@ from r2.controllers.reddit_base import (
    paginated_listing,
    RedditController,
    require_https,
+    vary_pagecache_on_experiments,
 )
 from r2 import config
 from r2.models import *
@@ -1094,6 +1095,8 @@ class FrontController(RedditController):

    search_help_page = "/wiki/search"
    verify_langs_regex = re.compile(r"\A[a-z][a-z](,[a-z][a-z])*\Z")
+
+    @vary_pagecache_on_experiments("aa_test_loggedout")
    @base_listing
    @require_oauth2_scope("read")
    @validate(query=VLength('q', max_length=512),
--- a/r2/r2/controllers/reddit_base.py
+++ b/r2/r2/controllers/reddit_base.py
@@ -161,6 +161,35 @@ def pagecache_policy(policy):
    return pagecache_decorator


+def vary_pagecache_on_experiments(*names):
+    """Keep track of which loid-based experiments are affecting the pagecache.
+
+    Since loid-based experiments will vary the resulting content, this is used
+    to accumulate a whitelist of experiments referenced in the decorator below.
+    """
+    # we are going to use this to build the cache key, so
+    # consistent ordering at compile time would be nice
+    global_experiments = g.live_config.get("global_loid_experiments")
+    if global_experiments:
+        names = names + tuple(global_experiments)
+    names = sorted(names)
+
+    def _vary_pagecache_on_experiments(fn):
+        # store this list on the handler itself, since (by the nature of the
+        # page cache) we won't actually *call* this handler.  We'll set
+        # the whitelist on c in `request_key` and in the decorated body
+        fn.whitelisted_loid_experiments = names
+
+        @wraps(fn)
+        def _vary_pagecache_on_experiments_inner(self, *a, **kw):
+            # For checking the whitelist in the feature methods, set it on
+            # the request context.
+            c.whitelisted_loid_experiments = names
+            return fn(self, *a, **kw)
+        return _vary_pagecache_on_experiments_inner
+    return _vary_pagecache_on_experiments
+
+
 cache_affecting_cookies = ('over18', '_options', 'secure_session')
 # Cookies which may be set in a response without making it uncacheable
 CACHEABLE_COOKIES = ()
@@ -836,6 +865,23 @@ class MinimalController(BaseController):
        else:
            location = None

+        whitelisted_variants = []
+        # if there are logged out experiments expected, we need to vary
+        # the cache key on them
+        if (
+            g.enable_loggedout_experiments and
+            not c.user_is_loggedin and c.loid
+        ):
+            handler = self._get_action_handler()
+            if hasattr(handler, "whitelisted_loid_experiments"):
+                # pull the whitelist onto `c` as we check there in features
+                whitelist = handler.whitelisted_loid_experiments
+                c.whitelisted_loid_experiments = whitelist
+                for name in whitelist:
+                    whitelisted_variants.append(
+                        (name, feature.variant(name, None))
+                    )
+
        _id = make_key_id(
            c.lang,
            request.host,
@@ -848,6 +894,7 @@ class MinimalController(BaseController):
            feature.is_enabled("https_redirect"),
            request.environ.get("WANT_RAW_JSON"),
            cookies_key,
+            whitelisted_variants,
        )
        key = "page:%s" % _id
        return key
--- a/r2/r2/lib/app_globals.py
+++ b/r2/r2/lib/app_globals.py
@@ -373,6 +373,7 @@ class Globals(object):
            'discovery_srs',
            'proxy_gilding_accounts',
            'mweb_blacklist_expressions',
+            'global_loid_experiments',
            'precomputed_comment_sorts',
        ],
        ConfigValue.str: [
--- a/r2/r2/tests/unit/config/experiment_test.py
+++ b/r2/r2/tests/unit/config/experiment_test.py
@@ -25,6 +25,8 @@ import itertools
 import math
 import mock

+from pylons import app_globals as g, tmpl_context as c
+
 from r2.config.feature.state import FeatureState
 from . feature_test import TestFeatureBase, MockAccount

@@ -246,6 +248,7 @@ class TestExperiment(TestFeatureBase):

    def test_loggedout_experiment(self, num_users=2000):
        """Test variant distn for logged out users."""
+        c.whitelisted_loid_experiments = ['test_state']
        self.do_experiment_simulation(
            self.get_loggedout_users(num_users),
            experiment={
@@ -254,6 +257,16 @@ class TestExperiment(TestFeatureBase):
            },
        )

+    def test_loggedout_experiment_no_whitelist(self, num_users=2000):
+        """Test variant distn for logged out users."""
+        self.assert_no_experiment(
+            self.get_loggedout_users(num_users),
+            experiment={
+                "loggedout": True,
+                'variants': {'larger': 5, 'smaller': 10},
+            },
+        )
+
    def test_loggedout_experiment_missing_loids(self, num_users=2000):
        """Ensure logged out experiments with no loids do not bucket."""
        self.assert_no_experiment(
@@ -267,6 +280,7 @@ class TestExperiment(TestFeatureBase):

    def test_loggedout_experiment_explicit_enable(self, num_users=2000):
        """Test variant distn for logged out users with explicit enable."""
+        c.whitelisted_loid_experiments = ['test_state']
        self.do_experiment_simulation(
            self.get_loggedout_users(num_users),
            experiment={
@@ -289,8 +303,11 @@ class TestExperiment(TestFeatureBase):

    def test_loggedout_experiment_global_disable(self, num_users=2000):
        """Test we can disable loid-experiments via configuration."""
-        self.patch_g(enable_loggedout_experiments=False)
+        # we already patch this attr in setUp, so we can just explicitly change
+        # it and rely on *that* cleanup
+        g.enable_loggedout_experiments = False

+        c.whitelist_loid_experiments = ['test_state']
        self.assert_no_experiment(
            self.get_loggedout_users(num_users),
            experiment={