Merge pull request #478 from rstudio/remove-literate-programming

Stop using literate programming
2026-04-07 03:00:20 -04:00 · 2014-05-09 19:39:58 -07:00
parent e84a76cebd dee6fbcb8f
commit 11bf02eb56
9 changed files with 350 additions and 297 deletions
--- a/R/middleware.R
+++ b/R/middleware.R
@@ -0,0 +1,349 @@
+# This file contains a general toolkit for routing and combining bits of
+# HTTP-handling logic. It is similar in spirit to Rook (and Rack, and WSGI, and
+# Connect, and...) but adds cascading and routing.
+#
+# This file is called "middleware" because that's the term used for these bits
+# of logic in these other frameworks. However, our code uses the word "handler"
+# so we'll stick to that for the rest of this document; just know that they're
+# basically the same concept.
+#
+# ## Intro to handlers
+#
+# A **handler** (or sometimes, **httpHandler**) is a function that takes a
+# `req` parameter--a request object as described in the Rook specification--and
+# returns `NULL`, or an `httpResponse`.
+#
+## ------------------------------------------------------------------------
+httpResponse <- function(status = 200,
+                         content_type = "text/html; charset=UTF-8",
+                         content = "",
+                         headers = list()) {
+  # Make sure it's a list, not a vector
+  headers <- as.list(headers)
+  if (is.null(headers$`X-UA-Compatible`))
+    headers$`X-UA-Compatible` <- "chrome=1"
+  resp <- list(status = status, content_type = content_type, content = content,
+               headers = headers)
+  class(resp) <- 'httpResponse'
+  return(resp)
+}
+
+#
+# You can think of a web application as being simply an aggregation of these
+# functions, each of which performs one kind of duty. Each handler in turn gets
+# a look at the request and can decide whether it knows how to handle it. If
+# so, it returns an `httpResponse` and processing terminates; if not, it
+# returns `NULL` and the next handler gets to execute. If the final handler
+# returns `NULL`, a 404 response should be returned.
+#
+# We have a similar construct for websockets: **websocket handlers** or
+# **wsHandlers**. These take a single `ws` argument which is the websocket
+# connection that was just opened, and they can either return `TRUE` if they
+# are handling the connection, and `NULL` to pass responsibility on to the next
+# wsHandler.
+#
+# ### Combining handlers
+#
+# Since it's so common for httpHandlers to be invoked in this "cascading"
+# fashion, we'll introduce a function that takes zero or more handlers and
+# returns a single handler. And while we're at it, making a directory of static
+# content available is such a common thing to do, we'll allow strings
+# representing paths to be used instead of handlers; any such strings we
+# encounter will be converted into `staticHandler` objects.
+#
+## ------------------------------------------------------------------------
+joinHandlers <- function(handlers) {
+  # Zero handlers; return a null handler
+  if (length(handlers) == 0)
+    return(function(req) NULL)
+
+  # Just one handler (function)? Return it.
+  if (is.function(handlers))
+    return(handlers)
+
+  handlers <- lapply(handlers, function(h) {
+    if (is.character(h))
+      return(staticHandler(h))
+    else
+      return(h)
+  })
+
+  # Filter out NULL
+  handlers <- handlers[!sapply(handlers, is.null)]
+
+  if (length(handlers) == 0)
+    return(function(req) NULL)
+  if (length(handlers) == 1)
+    return(handlers[[1]])
+
+  function(req) {
+    for (handler in handlers) {
+      response <- handler(req)
+      if (!is.null(response))
+        return(response)
+    }
+    return(NULL)
+  }
+}
+
+#
+# Note that we don't have an equivalent of `joinHandlers` for wsHandlers. It's
+# easy to imagine it, we just haven't needed one.
+#
+# ### Handler routing
+#
+# Handlers do not have a built-in notion of routing. Conceptually, given a list
+# of handlers, all the handlers are peers and they all get to see every request
+# (well, up until the point that a handler returns a response).
+#
+# You could implement routing in each handler by checking the request's
+# `PATH_INFO` field, but since it's such a common need, let's make it simple by
+# introducing a `routeHandler` function. This is a handler
+# [decorator](http://en.wikipedia.org/wiki/Decorator_pattern) and it's
+# responsible for 1) filtering out requests that don't match the given route,
+# and 2) temporarily modifying the request object to take the matched part of
+# the route off of the `PATH_INFO` (and add it to the end of `SCRIPT_NAME`).
+# This way, the handler doesn't need to figure out about what part of its URL
+# path has already been matched via routing.
+#
+# (BTW, it's safe for `routeHandler` calls to nest.)
+#
+## ------------------------------------------------------------------------
+routeHandler <- function(prefix, handler) {
+  force(prefix)
+  force(handler)
+
+  if (identical("", prefix))
+    return(handler)
+
+  if (length(prefix) != 1 || !isTRUE(grepl("^/[^\\]+$", prefix))) {
+    stop("Invalid URL prefix \"", prefix, "\"")
+  }
+
+  pathPattern <- paste("^\\Q", prefix, "\\E/", sep = "")
+  function(req) {
+    if (isTRUE(grepl(pathPattern, req$PATH_INFO))) {
+      origScript <- req$SCRIPT_NAME
+      origPath <- req$PATH_INFO
+      on.exit({
+        req$SCRIPT_NAME <- origScript
+        req$PATH_INFO <- origPath
+      }, add = TRUE)
+      pathInfo <- substr(req$PATH_INFO, nchar(prefix)+1, nchar(req$PATH_INFO))
+      req$SCRIPT_NAME <- paste(req$SCRIPT_NAME, prefix, sep = "")
+      req$PATH_INFO <- pathInfo
+      return(handler(req))
+    } else {
+      return(NULL)
+    }
+  }
+}
+
+#
+# We have a version for websocket handlers as well. Pity about the copy/paste
+# job.
+#
+## ------------------------------------------------------------------------
+routeWSHandler <- function(prefix, wshandler) {
+  force(prefix)
+  force(wshandler)
+
+  if (identical("", prefix))
+    return(wshandler)
+
+  if (length(prefix) != 1 || !isTRUE(grepl("^/[^\\]+$", prefix))) {
+    stop("Invalid URL prefix \"", prefix, "\"")
+  }
+
+  pathPattern <- paste("^\\Q", prefix, "\\E/", sep = "")
+  function(ws) {
+    req <- ws$request
+    if (isTRUE(grepl(pathPattern, req$PATH_INFO))) {
+      origScript <- req$SCRIPT_NAME
+      origPath <- req$PATH_INFO
+      on.exit({
+        req$SCRIPT_NAME <- origScript
+        req$PATH_INFO <- origPath
+      }, add = TRUE)
+      pathInfo <- substr(req$PATH_INFO, nchar(prefix)+1, nchar(req$PATH_INFO))
+      req$SCRIPT_NAME <- paste(req$SCRIPT_NAME, prefix, sep = "")
+      req$PATH_INFO <- pathInfo
+      return(wshandler(ws))
+    } else {
+      return(NULL)
+    }
+  }
+}
+
+#
+# ### Handler implementations
+#
+# Now let's actually write some handlers. Note that these functions aren't
+# *themselves* handlers, you call them and they *return* a handler. Handler
+# factory functions, if you will.
+#
+# Here's one that serves up static assets from a directory.
+#
+## ------------------------------------------------------------------------
+staticHandler <- function(root) {
+  force(root)
+  return(function(req) {
+    if (!identical(req$REQUEST_METHOD, 'GET'))
+      return(NULL)
+
+    path <- req$PATH_INFO
+
+    if (is.null(path))
+      return(httpResponse(400, content="<h1>Bad Request</h1>"))
+
+    if (path == '/')
+      path <- '/index.html'
+
+    abs.path <- resolve(root, path)
+    if (is.null(abs.path))
+      return(NULL)
+
+    ext <- tools::file_ext(abs.path)
+    content.type <- getContentType(ext)
+    response.content <- readBin(abs.path, 'raw', n=file.info(abs.path)$size)
+    return(httpResponse(200, content.type, response.content))
+  })
+}
+
+#
+# ## Handler manager
+#
+# The handler manager gives you a place to register handlers (of both http and
+# websocket varieties) and provides an httpuv-compatible set of callbacks for
+# invoking them.
+#
+# Create one of these, make zero or more calls to `addHandler` and
+# `addWSHandler` methods (order matters--first one wins!), and then pass the
+# return value of `createHttpuvApp` to httpuv's `startServer` function.
+#
+## ------------------------------------------------------------------------
+HandlerList <- setRefClass("HandlerList",
+  fields = list(
+    handlers = "list"
+  ),
+  methods = list(
+    add = function(handler, key, tail = FALSE) {
+      if (!is.null(handlers[[key]]))
+        stop("Key ", key, " already in use")
+      newList <- structure(names=key, list(handler))
+
+      if (length(handlers) == 0)
+        handlers <<- newList
+      else if (tail)
+        handlers <<- c(handlers, newList)
+      else
+        handlers <<- c(newList, handlers)
+    },
+    remove = function(key) {
+      handlers[key] <<- NULL
+    },
+    clear = function() {
+      handlers <<- list()
+    },
+    invoke = function(...) {
+      for (handler in handlers) {
+        result <- handler(...)
+        if (!is.null(result))
+          return(result)
+      }
+      return(NULL)
+    }
+  )
+)
+
+HandlerManager <- setRefClass("HandlerManager",
+  fields = list(
+    handlers = "HandlerList",
+    wsHandlers = "HandlerList"
+  ),
+  methods = list(
+    addHandler = function(handler, key, tail = FALSE) {
+      handlers$add(handler, key, tail)
+    },
+    removeHandler = function(key) {
+      handlers$remove(key)
+    },
+    addWSHandler = function(wsHandler, key, tail = FALSE) {
+      wsHandlers$add(wsHandler, key, tail)
+    },
+    removeWSHandler = function(key) {
+      wsHandlers$remove(key)
+    },
+    clear = function() {
+      handlers$clear()
+      wsHandlers$clear()
+    },
+    createHttpuvApp = function() {
+      list(
+        onHeaders = function(req) {
+          maxSize <- getOption('shiny.maxRequestSize', 5 * 1024 * 1024)
+          if (maxSize <= 0)
+            return(NULL)
+
+          reqSize <- 0
+          if (length(req$CONTENT_LENGTH) > 0)
+            reqSize <- as.numeric(req$CONTENT_LENGTH)
+          else if (length(req$HTTP_TRANSFER_ENCODING) > 0)
+            reqSize <- Inf
+
+          if (reqSize > maxSize) {
+            return(list(status = 413L,
+              headers = list(
+                'Content-Type' = 'text/plain'
+              ),
+              body = 'Maximum upload size exceeded'))
+          }
+          else {
+            return(NULL)
+          }
+        },
+        call = .httpServer(
+          function (req) {
+            return(handlers$invoke(req))
+          },
+          getOption('shiny.sharedSecret', NULL)
+        ),
+        onWSOpen = function(ws) {
+          return(wsHandlers$invoke(ws))
+        }
+      )
+    },
+    .httpServer = function(handler, sharedSecret) {
+      filter <- getOption('shiny.http.response.filter', NULL)
+      if (is.null(filter))
+        filter <- function(req, response) response
+
+      function(req) {
+        if (!is.null(sharedSecret)
+          && !identical(sharedSecret, req$HTTP_SHINY_SHARED_SECRET)) {
+          return(list(status=403,
+            body='<h1>403 Forbidden</h1><p>Shared secret mismatch</p>',
+            headers=list('Content-Type' = 'text/html')))
+        }
+
+        response <- handler(req)
+        if (is.null(response))
+          response <- httpResponse(404, content="<h1>Not Found</h1>")
+
+        headers <- as.list(response$headers)
+        headers$'Content-Type' <- response$content_type
+
+        response <- filter(req, response)
+        return(list(status=response$status,
+          body=response$content,
+          headers=headers))
+      }
+    }
+  )
+)
+
+#
+# ## Next steps
+#
+# See server.R and middleware-shiny.R to see actual implementation and usage of
+# handlers in the context of Shiny.
--- a/R/reactive-domains.R
+++ b/R/reactive-domains.R
@@ -0,0 +1,252 @@
+#' @include globals.R
+NULL
+
+#
+# Over the last few months we've seen a number of cases where it'd be helpful
+# for objects that are instantiated within a Shiny app to know what Shiny
+# session they are "owned" by. I put "owned" in quotes because there isn't a
+# built-in notion of object ownership in Shiny today, any more than there is a
+# notion of one object owning another in R.
+#
+# But it's intuitive to everyone, I think, that the outputs for a session are
+# owned by that session, and any logic that is executed as part of the output
+# is done on behalf of that session. And it seems like in the vast majority of
+# cases, observers that are created inside a shinyServer function (i.e. one per
+# session) are also intuitively owned by the session that's starting up.
+#
+# This notion of ownership is important/helpful for a few scenarios that have
+# come up in recent months:
+#
+# 1. The showcase mode that Jonathan implemented recently highlights
+# observers/reactives as they execute. In order for sessions to only receive
+# highlights for their own code execution, we need to know which sessions own
+# which observers. 2. We've seen a number of apps crash out when observers
+# outlive their sessions and then try to do things with their sessions (the
+# most common error message was something like "Can't write to a closed
+# websocket", but we now silently ignore writes to closed websockets). It'd be
+# convenient for the default behavior of observers to be that they don't
+# outlive their parent sessions. 3. The reactive log visualizer currently
+# visualizes all reactivity in the process; it would be great if by default it
+# only visualized the current session. 4. When an observer has an error, it
+# would be great to be able to send the error to the session so it can do its
+# own handling (such as sending the error info to the client so the user can be
+# notified). 5. Shiny Server Pro wants to show the admin how much time is being
+# spent servicing each session.
+#
+# So what are the rules for establishing ownership?
+#
+# 1. Define the "current domain" as a global variable whose value will own any
+# newly created observer (by default). A domain is a reference class or
+# environment that contains the functions `onEnded(callback)`, `isEnded()`, and
+# `reactlog(logEntry)`.
+#
+## ------------------------------------------------------------------------
+createMockDomain <- function() {
+  callbacks <- list()
+  ended <- FALSE
+  domain <- new.env(parent = emptyenv())
+  domain$onEnded <- function(callback) {
+    callbacks <<- c(callbacks, callback)
+  }
+  domain$isEnded <- function() {
+    ended
+  }
+  domain$reactlog <- function(logEntry) NULL
+  domain$end <- function() {
+    if (!ended) {
+      ended <<- TRUE
+      lapply(callbacks, do.call, list())
+    }
+    invisible()
+  }
+  return(domain)
+}
+
+#
+# 2. The initial value of "current domain" is null.
+#
+## ------------------------------------------------------------------------
+.globals$domain <- NULL
+
+#
+# 3. Objects that can be owned include observers, reactive expressions,
+# invalidateLater instances, reactiveTimer instances. Whenever one of these is
+# created, by default its owner will be the current domain.
+#
+## ------------------------------------------------------------------------
+
+#' @rdname domains
+#' @export
+getDefaultReactiveDomain <- function() {
+  .globals$domain
+}
+
+#
+# 4. While a session is being created and the shinyServer function is executed,
+# the current domain is set to the new session. When the shinyServer function
+# is done executing, the previous value of the current domain is restored. This
+# is made foolproof using a `withReactiveDomain` function.
+#
+## ------------------------------------------------------------------------
+
+#' @rdname domains
+#' @export
+withReactiveDomain <- function(domain, expr) {
+  oldValue <- .globals$domain
+  .globals$domain <- domain
+  on.exit(.globals$domain <- oldValue)
+
+  expr
+}
+
+#
+# 5. While an observer or reactive expression is executing, the current domain
+# is set to the owner of the observer. When the observer completes, the
+# previous value of the current domain is restored.
+#
+# 6. Note that once created, an observer/reactive expression belongs to the
+# same domain forever, regardless of how many times it is invalidated and
+# re-executed, and regardless of what caused the invalidation to happen.
+#
+# 7. When a session ends, any observers that it owns are suspended, any
+# invalidateLater/reactiveTimers are stopped.
+#
+## ------------------------------------------------------------------------
+
+#' @rdname domains
+#' @export
+onReactiveDomainEnded <- function(domain, callback, failIfNull = FALSE) {
+  if (is.null(domain)) {
+    if (isTRUE(failIfNull))
+      stop("onReactiveDomainEnded called with null domain and failIfNull=TRUE")
+    else
+      return()
+  }
+  domain$onEnded(callback)
+}
+
+#
+# 8. If an uncaught error occurs while executing an observer, the session gets
+# a chance to handle it. I suppose the default behavior would be to send the
+# message to the client if possible, and then perhaps end the session (or not,
+# I could argue either way).
+#
+# The basic idea here is inspired by Node.js domains, which you can think of as
+# a way to track execution contexts across callback- or listener-oriented
+# asynchronous code. They use it to unify error handling code across a graph of
+# related objects. Our domains will be to unify both lifetime and error
+# handling across a graph of related reactive primitives.
+#
+# (You could imagine that as a client update is being processed, the session
+# associated with that client would become the current domain. IIRC this is how
+# showcase mode is implemented today. I don't think this would cover any cases
+# not covered by rule 5 above, and the absence of rule 5 would leave cases that
+# this rule would not cover.)
+#
+# Pitfalls/open issues:
+#
+# 1. Our current approach has the issue of observers staying alive longer than
+# they ought to. This proposal introduces the opposite risk: that
+# observers/invalidateLater/reactiveTimer instances, having implicitly been
+# assigned a parent, are suspended/disposed earlier than they ought to have
+# been. I find this especially worrisome for invalidateLater/reactiveTimer,
+# which will often be called in a reactive expression, and thus execute under
+# unpredictable circumstances. Perhaps those should continue to accept an
+# explicit "session=" parameter that the user is warned about if they don't
+# provide a value.
+#
+# 2. Are there situations where it is ambiguous what the right thing to do is,
+# and we should warn/error to ask the user to provide a domain explicitly?
+#
+## ------------------------------------------------------------------------
+
+#' Reactive domains
+#'
+#' Reactive domains are a mechanism for establishing ownership over reactive
+#' primitives (like reactive expressions and observers), even if the set of
+#' reactive primitives is dynamically created. This is useful for lifetime
+#' management (i.e. destroying observers when the Shiny session that created
+#' them ends) and error handling.
+#'
+#' At any given time, there can be either a single "default" reactive domain
+#' object, or none (i.e. the reactive domain object is \code{NULL}). You can
+#' access the current default reactive domain by calling
+#' \code{getDefaultReactiveDomain}.
+#'
+#' Unless you specify otherwise, newly created observers and reactive
+#' expressions will be assigned to the current default domain (if any). You can
+#' override this assignment by providing an explicit \code{domain} argument to
+#' \code{\link{reactive}} or \code{\link{observe}}.
+#'
+#' For advanced usage, it's possible to override the default domain using
+#' \code{withReactiveDomain}. The \code{domain} argument will be made the
+#' default domain while \code{expr} is evaluated.
+#'
+#' Implementers of new reactive primitives can use \code{onReactiveDomainEnded}
+#' as a convenience function for registering callbacks. If the reactive domain
+#' is \code{NULL} and \code{failIfNull} is \code{FALSE}, then the callback will
+#' never be invoked.
+#'
+#' @name domains
+#' @param domain A valid domain object (for example, a Shiny session), or
+#'   \code{NULL}
+#' @param expr An expression to evaluate under \code{domain}
+#' @param callback A callback function to be invoked
+#' @param failIfNull If \code{TRUE} then an error is given if the \code{domain}
+#'   is \code{NULL}
+NULL
+
+#
+# Example 1
+# ---
+# ```
+# obs1 <- observe({
+# })
+# shinyServer(function(input, output) {
+#   obs2 <- observe({
+#     obs3 <- observe({
+#     })
+#   })
+# })
+# # obs1 would have no domain, obs2 and obs3 would be owned by the session
+# ```
+#
+# Example 2
+# ---
+# ```
+# globalValues <- reactiveValues(broadcast="")
+# shinyServer(function(input, output) {
+#   sessionValues <- reactiveValues()
+#   output$messageOutput <- renderText({
+#     globalValues$broadcast
+#     obs1 <- observe({...})
+#   })
+#   observe({
+#     if (input$goButton == 0) return()
+#     isolate( globalValues$broadcast <- input$messageInput )
+#   })
+# })
+# # The observer behind messageOutput would be owned by the session,
+# # as would all the many instances of obs1 that were created.
+# ```
+# ---
+#
+# Example 3
+# ---
+# ```
+# rexpr1 <- reactive({
+#   invalidateLater(1000)
+#   obs1 <- observe({...})
+# })
+# observeSomething <- function() {
+#   obs2 <- observe({...})
+# })
+# shinyServer(function(input, output) {
+#   obs3 <- observe({
+#     observeSomething()
+#     rexpr1()
+#   })
+# })
+# # rexpr1, the invalidateLater call, and obs1 would all have no owner;
+# # obs2 and obs3 would be owned by the session.
+# ```