Update caches from code review feedback

This commit is contained in:
Winston Chang
2018-07-19 14:32:05 -05:00
parent bc0fb3f44c
commit 86ea023e2e
9 changed files with 243 additions and 224 deletions

View File

@@ -55,11 +55,18 @@
#'
#' @section Cache pruning:
#'
#' Cache pruning occurs each time \code{get()} and \code{set()} are called, or
#' it can be invoked manually by calling \code{prune()}.
#' Cache pruning occurs when \code{set()} is called, or it can be invoked
#' manually by calling \code{prune()}.
#'
#' If there are any objects that are older than \code{max_age}, they will be
#' removed when a pruning occurs.
#' The disk cache will throttle the pruning so that it does not happen on
#' every call to \code{set()}, because the filesystem operations for checking
#' the status of files can be slow. Instead, it will prune once in every 20
#' calls to \code{set()}, or if at least 5 seconds have elapsed since the last
#' prune occurred, whichever is first. These parameters are currently not
#' customizable, but may be in the future.
#'
#' When a pruning occurs, if there are any objects that are older than
#' \code{max_age}, they will be removed.
#'
#' The \code{max_size} and \code{max_n} parameters are applied to the cache as
#' a whole, in contrast to \code{max_age}, which is applied to each object
@@ -78,6 +85,9 @@
#' stored in blocks on disk. For example, if the block size is 4096 bytes,
#' then a file that is one byte in size will take 4096 bytes on disk.
#'
#' Another time that objects can be removed from the cache is when
#' \code{get()} is called. If the target object is older than \code{max_age},
#' it will be removed and the cache will report it as a missing value.
#'
#' @section Eviction policies:
#'
@@ -165,13 +175,13 @@
#' @param dir Directory to store files for the cache. If \code{NULL} (the
#' default) it will create and use a temporary directory.
#' @param max_age Maximum age of files in cache before they are evicted, in
#' seconds.
#' seconds. Use \code{Inf} for no age limit.
#' @param max_size Maximum size of the cache, in bytes. If the cache exceeds
#' this size, cached objects will be removed according to the value of the
#' \code{evict}.
#' \code{evict}. Use \code{Inf} for no size limit.
#' @param max_n Maximum number of objects in the cache. If the number of objects
#' exceeds this value, then cached objects will be removed according to the
#' value of \code{evict}.
#' value of \code{evict}. Use \code{Inf} for no limit of number of items.
#' @param evict The eviction policy to use to decide which objects are removed
#' when a cache pruning occurs. Currently, \code{"lru"} and \code{"fifo"} are
#' supported.
@@ -242,8 +252,16 @@ DiskCache <- R6Class("DiskCache",
if (is.null(dir)) {
dir <- tempfile("DiskCache-")
}
if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.")
if (!is.numeric(max_age)) stop("max_age must be a number. Use `Inf` for no limit.")
if (!is.numeric(max_n)) stop("max_n must be a number. Use `Inf` for no limit.")
private$dir <- absolutePath(dir)
if (!dirExists(dir)) {
private$log(paste0("initialize: Creating ", dir))
dir.create(dir, recursive = TRUE)
}
private$dir <- normalizePath(dir)
private$max_size <- max_size
private$max_age <- max_age
private$max_n <- max_n
@@ -261,17 +279,16 @@ DiskCache <- R6Class("DiskCache",
private$missing <- missing
private$exec_missing <- exec_missing
private$logfile <- logfile
if (!dirExists(dir)) {
private$log(paste0("initialize: Creating ", dir))
dir.create(dir, recursive = TRUE, mode = "0700")
}
private$prune_last_time <- as.numeric(Sys.time())
},
get = function(key, missing = private$missing, exec_missing = private$exec_missing) {
private$log(paste0('get: key "', key, '"'))
self$is_destroyed(throw = TRUE)
validate_key(key)
private$maybe_prune_single(key)
filename <- private$key_to_filename(key)
# Instead of calling exists() before fetching the value, just try to
@@ -300,7 +317,6 @@ DiskCache <- R6Class("DiskCache",
}
private$log(paste0('get: key "', key, '" found'))
self$prune()
value
},
@@ -308,24 +324,27 @@ DiskCache <- R6Class("DiskCache",
private$log(paste0('set: key "', key, '"'))
self$is_destroyed(throw = TRUE)
validate_key(key)
if (!private$exec_missing && identical(value, private$missing)) {
stop("Attempted to store sentinel value representing a missing key.")
}
file <- private$key_to_filename(key)
temp_file <- paste0(file, "-temp-", shiny::createUniqueId(8))
temp_file <- paste0(file, "-temp-", createUniqueId(8))
save_error <- FALSE
ref_object <- FALSE
tryCatch(
saveRDS(value, file = temp_file,
refhook = function(x) {
ref_object <<- TRUE
NULL
}
),
{
saveRDS(value, file = temp_file,
refhook = function(x) {
ref_object <<- TRUE
NULL
}
)
file.rename(temp_file, file)
},
error = function(e) {
save_error <<- TRUE
# Unlike file.remove(), unlink() does not raise warning if file does
# not exist.
unlink(temp_file)
}
)
if (save_error) {
@@ -337,8 +356,7 @@ DiskCache <- R6Class("DiskCache",
warning("A reference object was cached in a serialized format. The restored object may not work as expected.")
}
file.rename(temp_file, file)
self$prune()
private$prune_throttled()
invisible(self)
},
@@ -381,53 +399,66 @@ DiskCache <- R6Class("DiskCache",
private$log(paste0('prune'))
self$is_destroyed(throw = TRUE)
current_time <- Sys.time()
filenames <- dir(private$dir, "\\.rds$", full.names = TRUE)
files <- file.info(filenames)
files <- files[files$isdir == FALSE, ]
files$name <- rownames(files)
rownames(files) <- NULL
info <- file.info(filenames)
info <- info[info$isdir == FALSE, ]
info$name <- rownames(info)
rownames(info) <- NULL
# Files could be removed between the dir() and file.info() calls. The
# entire row for such files will have NA values. Remove those rows.
files <- files[!is.na(files$size), ]
info <- info[!is.na(info$size), ]
# 1. Remove any files where the age exceeds max age.
timediff <- as.numeric(Sys.time() - files[["mtime"]], units = "secs")
rm_idx <- timediff > private$max_age
if (any(rm_idx)) {
private$log(paste0("prune max_age: Removing ", paste(files$name[rm_idx], collapse = ", ")))
if (is.finite(private$max_age)) {
timediff <- as.numeric(current_time - info$mtime, units = "secs")
rm_idx <- timediff > private$max_age
if (any(rm_idx)) {
private$log(paste0("prune max_age: Removing ", paste(info$name[rm_idx], collapse = ", ")))
file.remove(info$name[rm_idx])
info <- info[!rm_idx, ]
}
}
file.remove(files$name[rm_idx])
# Remove rows of files that were deleted.
files <- files[!rm_idx, ]
# Sort objects by priority, according to eviction policy. The sorting is
# done in a function which can be called multiple times but only does
# the work the first time.
info_is_sorted <- FALSE
ensure_info_is_sorted <- function() {
if (info_is_sorted) return()
# Sort files by priority, according to eviction policy.
if (private$evict == "lru") {
files <- files[order(files[["atime"]], decreasing = TRUE), ]
} else if (private$evict == "fifo") {
files <- files[order(files[["mtime"]], decreasing = TRUE), ]
} else {
stop('Unknown eviction policy "', private$evict, '"')
if (private$evict == "lru") {
info <<- info[order(info$atime, decreasing = TRUE), ]
} else if (private$evict == "fifo") {
info <<- info[order(info$mtime, decreasing = TRUE), ]
} else {
stop('Unknown eviction policy "', private$evict, '"')
}
info_is_sorted <<- TRUE
}
# 2. Remove files if there are too many.
if (nrow(files) > private$max_n) {
rm_idx <- seq_len(nrow(files)) > private$max_n
if (any(rm_idx)) {
private$log(paste0("prune max_n: Removing ", paste(files$name[rm_idx], collapse = ", ")))
}
file.remove(files$name[rm_idx])
if (is.finite(private$max_n) && nrow(info) > private$max_n) {
ensure_info_is_sorted()
rm_idx <- seq_len(nrow(info)) > private$max_n
private$log(paste0("prune max_n: Removing ", paste(info$name[rm_idx], collapse = ", ")))
rm_success <- file.remove(info$name[rm_idx])
info <- info[!rm_success, ]
}
# 3. Remove files if cache is too large.
if (sum(files$size) > private$max_size) {
cum_size <- cumsum(files$size)
if (is.finite(private$max_size) && sum(info$size) > private$max_size) {
ensure_info_is_sorted()
cum_size <- cumsum(info$size)
rm_idx <- cum_size > private$max_size
if (any(rm_idx)) {
private$log(paste0("prune max_size: Removing ", paste(files$name[rm_idx], collapse = ", ")))
}
file.remove(files$name[rm_idx])
private$log(paste0("prune max_size: Removing ", paste(info$name[rm_idx], collapse = ", ")))
rm_success <- file.remove(info$name[rm_idx])
info <- info[!rm_success, ]
}
private$prune_last_time <- as.numeric(current_time)
invisible(self)
},
@@ -490,13 +521,49 @@ DiskCache <- R6Class("DiskCache",
exec_missing = FALSE,
logfile = NULL,
prune_throttle_counter = 0,
prune_last_time = NULL,
key_to_filename = function(key) {
if (! (is.character(key) && length(key)==1) ) {
stop("Key must be a character vector of length 1.")
validate_key(key)
# Additional validation. This 80-char limit is arbitrary, and is
# intended to avoid hitting a filename length limit on Windows.
if (nchar(key) > 80) {
stop("Invalid key: key must have fewer than 80 characters.")
}
file.path(private$dir, paste0(key, ".rds"))
},
# A wrapper for prune() that throttles it, because prune() can be
# expensive due to filesystem operations. This function will prune only
# once every 20 times it is called, or if it has been more than 5 seconds
# since the last time the cache was actually pruned, whichever is first.
# In the future, the behavior may be customizable.
prune_throttled = function() {
# Count the number of times prune() has been called.
private$prune_throttle_counter <- private$prune_throttle_counter + 1
if (private$prune_throttle_counter > 20 ||
private$prune_last_time - as.numeric(Sys.time()) > 5)
{
self$prune()
private$prune_throttle_counter <- 0
}
},
# Prunes a single object if it exceeds max_age. If the object does not
# exceed max_age, or if the object doesn't exist, do nothing.
maybe_prune_single = function(key) {
obj <- private$cache[[key]]
if (is.null(obj)) return()
timediff <- as.numeric(Sys.time()) - obj$mtime
if (timediff > private$max_age) {
private$log(paste0("pruning single object exceeding max_age: Removing ", key))
rm(list = key, envir = private$cache)
}
},
log = function(text) {
if (is.null(private$logfile)) return()

View File

@@ -61,11 +61,11 @@
#'
#' @section Cache pruning:
#'
#' Cache pruning occurs each time \code{get()} and \code{set()} are called, or
#' it can be invoked manually by calling \code{prune()}.
#' Cache pruning occurs when \code{set()} is called, or it can be invoked
#' manually by calling \code{prune()}.
#'
#' If there are any objects that are older than \code{max_age}, they will be
#' removed when a pruning occurs.
#' When a pruning occurs, if there are any objects that are older than
#' \code{max_age}, they will be removed.
#'
#' The \code{max_size} and \code{max_n} parameters are applied to the cache as
#' a whole, in contrast to \code{max_age}, which is applied to each object
@@ -78,11 +78,15 @@
#'
#' If the size of the objects in the cache exceeds \code{max_size}, then
#' objects will be removed from the cache. Objects will be removed from the
#' cache so that the total size remains under \code{max_size}. The size is
#' calculated by calling \code{\link{object.size}} on an object. Note that if
#' two keys are associated with the same object, the size calculation will
#' count the object's size twice, even though there is only one copy in
#' memory.
#' cache so that the total size remains under \code{max_size}. Note that the
#' size is calculated using the size of the files, not the size of disk space
#' used by the files -- these two values can differ because of files are
#' stored in blocks on disk. For example, if the block size is 4096 bytes,
#' then a file that is one byte in size will take 4096 bytes on disk.
#'
#' Another time that objects can be removed from the cache is when
#' \code{get()} is called. If the target object is older than \code{max_age},
#' it will be removed and the cache will report it as a missing value.
#'
#' @section Eviction policies:
#'
@@ -172,6 +176,9 @@ MemoryCache <- R6Class("MemoryCache",
if (exec_missing && (!is.function(missing) || length(formals(missing)) == 0)) {
stop("When `exec_missing` is true, `missing` must be a function that takes one argument.")
}
if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.")
if (!is.numeric(max_age)) stop("max_age must be a number. Use `Inf` for no limit.")
if (!is.numeric(max_n)) stop("max_n must be a number. Use `Inf` for no limit.")
private$cache <- new.env(parent = emptyenv())
private$max_size <- max_size
private$max_age <- max_age
@@ -186,6 +193,8 @@ MemoryCache <- R6Class("MemoryCache",
private$log(paste0('get: key "', key, '"'))
validate_key(key)
private$maybe_prune_single(key)
if (!self$exists(key)) {
private$log(paste0('get: key "', key, '" is missing'))
if (exec_missing) {
@@ -200,22 +209,27 @@ MemoryCache <- R6Class("MemoryCache",
private$log(paste0('get: key "', key, '" found'))
value <- private$cache[[key]]$value
self$prune()
value
},
set = function(key, value) {
private$log(paste0('set: key "', key, '"'))
validate_key(key)
if (!private$exec_missing && identical(value, private$missing)) {
stop("Attempted to store sentinel value representing a missing key.")
}
time <- as.numeric(Sys.time())
# Only record size if we're actually using max_size for pruning.
if (is.finite(private$max_size)) {
# Reported size is rough! See ?object.size.
size <- as.numeric(object.size(value))
} else {
size <- NULL
}
private$cache[[key]] <- list(
key = key,
value = value,
size = as.numeric(object.size(value)), # Reported size is rough! See ?object.size.
size = size,
mtime = time,
atime = time
)
@@ -225,7 +239,8 @@ MemoryCache <- R6Class("MemoryCache",
exists = function(key) {
validate_key(key)
exists(key, envir = private$cache, inherits = FALSE)
# Faster than `exists(key, envir = private$cache, inherits = FALSE)
!is.null(private$cache[[key]])
},
keys = function() {
@@ -250,71 +265,63 @@ MemoryCache <- R6Class("MemoryCache",
info <- private$object_info()
# 1. Remove any objects where the age exceeds max age.
time <- as.numeric(Sys.time())
timediff <- time - info$mtime
rm_idx <- timediff > private$max_age
if (any(rm_idx)) {
private$log(paste0("prune max_age: Removing ", paste(info$key[rm_idx], collapse = ", ")))
if (is.finite(private$max_age)) {
time <- as.numeric(Sys.time())
timediff <- time - info$mtime
rm_idx <- timediff > private$max_age
if (any(rm_idx)) {
private$log(paste0("prune max_age: Removing ", paste(info$key[rm_idx], collapse = ", ")))
rm(list = info$key[rm_idx], envir = private$cache)
info <- info[!rm_idx, ]
}
}
# Remove items from cache
rm(list = info$key[rm_idx], envir = private$cache)
info <- info[!rm_idx, ]
# Sort objects by priority, according to eviction policy. The sorting is
# done in a function which can be called multiple times but only does
# the work the first time.
info_is_sorted <- FALSE
ensure_info_is_sorted <- function() {
if (info_is_sorted) return()
# Sort objects by priority, according to eviction policy.
if (private$evict == "lru") {
info <- info[order(info[["atime"]], decreasing = TRUE), ]
} else if (private$evict == "fifo") {
info <- info[order(info[["mtime"]], decreasing = TRUE), ]
} else {
stop('Unknown eviction policy "', private$evict, '"')
if (private$evict == "lru") {
info <<- info[order(info$atime, decreasing = TRUE), ]
} else if (private$evict == "fifo") {
info <<- info[order(info$mtime, decreasing = TRUE), ]
} else {
stop('Unknown eviction policy "', private$evict, '"')
}
info_is_sorted <<- TRUE
}
# 2. Remove objects if there are too many.
if (nrow(info) > private$max_n) {
if (is.finite(private$max_n) && nrow(info) > private$max_n) {
ensure_info_is_sorted()
rm_idx <- seq_len(nrow(info)) > private$max_n
if (any(rm_idx)) {
private$log(paste0("prune max_n: Removing ", paste(info$key[rm_idx], collapse = ", ")))
}
private$log(paste0("prune max_n: Removing ", paste(info$key[rm_idx], collapse = ", ")))
rm(list = info$key[rm_idx], envir = private$cache)
info <- info[!rm_idx, ]
}
# 3. Remove objects if cache is too large.
if (sum(info$size) > private$max_size) {
if (is.finite(private$max_size) && sum(info$size) > private$max_size) {
ensure_info_is_sorted()
cum_size <- cumsum(info$size)
rm_idx <- cum_size > private$max_size
if (any(rm_idx)) {
private$log(paste0("prune max_size: Removing ", paste(info$key[rm_idx], collapse = ", ")))
}
private$log(paste0("prune max_size: Removing ", paste(info$key[rm_idx], collapse = ", ")))
rm(list = info$key[rm_idx], envir = private$cache)
info <- info[!rm_idx, ]
}
}
invisible(self)
},
size = function() {
# TODO: Validate this against metadata?
length(self$key())
},
destroy = function() {
if (is.null(private$cache)) {
return(invisible)
}
private$log(paste0("destroy"))
private$cache <- NULL
},
is_destroyed = function() {
is.null(private$cache)
length(self$keys())
}
),
private = list(
cache = NULL,
meta = NULL, # Metadata used for pruning
max_age = NULL,
max_size = NULL,
max_n = NULL,
@@ -323,6 +330,21 @@ MemoryCache <- R6Class("MemoryCache",
exec_missing = NULL,
logfile = NULL,
# Prunes a single object if it exceeds max_age. If the object does not
# exceed max_age, or if the object doesn't exist, do nothing.
maybe_prune_single = function(key) {
if (!is.finite(private$max_age)) return()
obj <- private$cache[[key]]
if (is.null(obj)) return()
timediff <- as.numeric(Sys.time()) - obj$mtime
if (timediff > private$max_age) {
private$log(paste0("pruning single object exceeding max_age: Removing ", key))
rm(list = key, envir = private$cache)
}
},
object_info = function() {
keys <- ls(private$cache, sorted = FALSE)
data.frame(

View File

@@ -22,6 +22,9 @@ print.key_missing <- function(x, ...) {
validate_key <- function(key) {
if (!is.character(key) || length(key) != 1 || nchar(key) == 0) {
stop("Invalid key: key must be single non-empty string.")
}
if (grepl("[^a-z0-9]", key)) {
stop("Invalid key: ", key, ". Only lowercase letters and numbers are allowed.")
}

View File

@@ -304,7 +304,7 @@ renderCachedPlot <- function(expr,
cacheKeyExpr <- substitute(cacheKeyExpr)
# The real cache key we'll use also includes width, height, res, pixelratio.
# This is just the part supplied by the user.
userCacheKey <- reactive(cacheKeyExpr, env = parent.frame(), quoted = TRUE)
userCacheKey <- reactive(cacheKeyExpr, env = parent.frame(), quoted = TRUE, label = "userCacheKey")
ensureCacheSetup <- function() {
# For our purposes, cache objects must support these methods.

View File

@@ -288,31 +288,6 @@ dirRemove <- function(path) {
}
}
# If the path exists, this returns a canonical path (like normalizePath). If it
# does not exist, it returns an absolute path, relative to the current dir. The
# difference is that a canonical path follows symlinks and doesn't have any
# `..`, while an absolute path here is simply one that starts with `/`.
absolutePath <- function(path) {
if (!is.character(path) || length(path) != 1 || path == "") {
stop("path must be a single non-empty string.")
}
if (substr(path, 1, 1) == "/") {
return(path)
}
if (isWindows()) {
# C:/abcd or c:\abcd
if (grepl("^[A-Za-z]:[/\\]", path)) {
return(path)
}
}
norm_path <- normalizePath(path, mustWork = FALSE)
if (path == norm_path) {
file.path(getwd(), path)
} else {
norm_path
}
}
# Attempt to join a path and relative path, and turn the result into a
# (normalized) absolute path. The result will only be returned if it is an
# existing file/directory and is a descendant of dir.