diff --git a/R/geom-smooth.r b/R/geom-smooth.r index 6c10e98ba5..a4e224450f 100644 --- a/R/geom-smooth.r +++ b/R/geom-smooth.r @@ -78,8 +78,8 @@ geom_smooth <- function(mapping = NULL, data = NULL, stat = "smooth", position = "identity", ..., - method = "auto", - formula = y ~ x, + method = NULL, + formula = NULL, se = TRUE, na.rm = FALSE, show.legend = NA, diff --git a/R/stat-smooth.r b/R/stat-smooth.r index 86e2e9dcab..a1cc84d762 100644 --- a/R/stat-smooth.r +++ b/R/stat-smooth.r @@ -1,8 +1,10 @@ -#' @param method Smoothing method (function) to use, accepts either a character vector, -#' e.g. `"auto"`, `"lm"`, `"glm"`, `"gam"`, `"loess"` or a function, e.g. -#' `MASS::rlm` or `mgcv::gam`, `stats::lm`, or `stats::loess`. +#' @param method Smoothing method (function) to use, accepts either +#' `NULL` or a character vector, e.g. `"lm"`, `"glm"`, `"gam"`, `"loess"` +#' or a function, e.g. `MASS::rlm` or `mgcv::gam`, `stats::lm`, or `stats::loess`. +#' `"auto"` is also accepted for backwards compatibility. It is equivalent to +#' `NULL`. #' -#' For `method = "auto"` the smoothing method is chosen based on the +#' For `method = NULL` the smoothing method is chosen based on the #' size of the largest group (across all panels). [stats::loess()] is #' used for less than 1,000 observations; otherwise [mgcv::gam()] is #' used with `formula = y ~ s(x, bs = "cs")` with `method = "REML"`. Somewhat anecdotally, @@ -10,10 +12,12 @@ #' so does not work for larger datasets. #' #' If you have fewer than 1,000 observations but want to use the same `gam()` -#' model that `method = "auto"` would use, then set +#' model that `method = NULL` would use, then set #' `method = "gam", formula = y ~ s(x, bs = "cs")`. #' @param formula Formula to use in smoothing function, eg. `y ~ x`, -#' `y ~ poly(x, 2)`, `y ~ log(x)` +#' `y ~ poly(x, 2)`, `y ~ log(x)`. `NULL` by default, in which case +#' `method = NULL` implies `formula = y ~ x` when there are fewer than 1,000 +#' observations and `formula = y ~ s(x, bs = "cs")` otherwise. #' @param se Display confidence interval around smooth? (`TRUE` by default, see #' `level` to control.) #' @param fullrange Should the fit span the full range of the plot, or just @@ -37,8 +41,8 @@ stat_smooth <- function(mapping = NULL, data = NULL, geom = "smooth", position = "identity", ..., - method = "auto", - formula = y ~ x, + method = NULL, + formula = NULL, se = TRUE, n = 80, span = 0.75, @@ -77,7 +81,8 @@ stat_smooth <- function(mapping = NULL, data = NULL, #' @export StatSmooth <- ggproto("StatSmooth", Stat, setup_params = function(data, params) { - if (identical(params$method, "auto")) { + msg <- character() + if (is.null(params$method) || identical(params$method, "auto")) { # Use loess for small datasets, gam with a cubic regression basis for # larger. Based on size of the _largest_ group to avoid bad memory # behaviour of loess @@ -87,18 +92,30 @@ StatSmooth <- ggproto("StatSmooth", Stat, params$method <- "loess" } else { params$method <- "gam" + } + msg <- c(msg, paste0("method = '", params$method, "'")) + } + + if (is.null(params$formula)) { + if (identical(params$method, "gam")) { params$formula <- y ~ s(x, bs = "cs") + } else { + params$formula <- y ~ x } - message( - "`geom_smooth()` using method = '", params$method, - "' and formula '", deparse(params$formula), "'" - ) + msg <- c(msg, paste0("formula '", deparse(params$formula), "'")) + } + if (identical(params$method, "gam")) { + params$method <- mgcv::gam + } + + if (length(msg) > 0) { + message("`geom_smooth()` using ", paste0(msg, collapse = " and ")) } params }, - compute_group = function(data, scales, method = "auto", formula = y ~ x, + compute_group = function(data, scales, method = NULL, formula = NULL, se = TRUE, n = 80, span = 0.75, fullrange = FALSE, xseq = NULL, level = 0.95, method.args = list(), na.rm = FALSE) { diff --git a/man/geom_smooth.Rd b/man/geom_smooth.Rd index c29a0fa0d0..540207b2db 100644 --- a/man/geom_smooth.Rd +++ b/man/geom_smooth.Rd @@ -6,11 +6,11 @@ \title{Smoothed conditional means} \usage{ geom_smooth(mapping = NULL, data = NULL, stat = "smooth", - position = "identity", ..., method = "auto", formula = y ~ x, + position = "identity", ..., method = NULL, formula = NULL, se = TRUE, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) stat_smooth(mapping = NULL, data = NULL, geom = "smooth", - position = "identity", ..., method = "auto", formula = y ~ x, + position = "identity", ..., method = NULL, formula = NULL, se = TRUE, n = 80, span = 0.75, fullrange = FALSE, level = 0.95, method.args = list(), na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) @@ -44,11 +44,13 @@ often aesthetics, used to set an aesthetic to a fixed value, like \code{colour = "red"} or \code{size = 3}. They may also be parameters to the paired geom/stat.} -\item{method}{Smoothing method (function) to use, accepts either a character vector, -e.g. \code{"auto"}, \code{"lm"}, \code{"glm"}, \code{"gam"}, \code{"loess"} or a function, e.g. -\code{MASS::rlm} or \code{mgcv::gam}, \code{stats::lm}, or \code{stats::loess}. +\item{method}{Smoothing method (function) to use, accepts either +\code{NULL} or a character vector, e.g. \code{"lm"}, \code{"glm"}, \code{"gam"}, \code{"loess"} +or a function, e.g. \code{MASS::rlm} or \code{mgcv::gam}, \code{stats::lm}, or \code{stats::loess}. +\code{"auto"} is also accepted for backwards compatibility. It is equivalent to +\code{NULL}. -For \code{method = "auto"} the smoothing method is chosen based on the +For \code{method = NULL} the smoothing method is chosen based on the size of the largest group (across all panels). \code{\link[stats:loess]{stats::loess()}} is used for less than 1,000 observations; otherwise \code{\link[mgcv:gam]{mgcv::gam()}} is used with \code{formula = y ~ s(x, bs = "cs")} with \code{method = "REML"}. Somewhat anecdotally, @@ -56,11 +58,13 @@ used with \code{formula = y ~ s(x, bs = "cs")} with \code{method = "REML"}. Some so does not work for larger datasets. If you have fewer than 1,000 observations but want to use the same \code{gam()} -model that \code{method = "auto"} would use, then set +model that \code{method = NULL} would use, then set \code{method = "gam", formula = y ~ s(x, bs = "cs")}.} \item{formula}{Formula to use in smoothing function, eg. \code{y ~ x}, -\code{y ~ poly(x, 2)}, \code{y ~ log(x)}} +\code{y ~ poly(x, 2)}, \code{y ~ log(x)}. \code{NULL} by default, in which case +\code{method = NULL} implies \code{formula = y ~ x} when there are fewer than 1,000 +observations and \code{formula = y ~ s(x, bs = "cs")} otherwise.} \item{se}{Display confidence interval around smooth? (\code{TRUE} by default, see \code{level} to control.)} diff --git a/tests/testthat/test-geom-smooth.R b/tests/testthat/test-geom-smooth.R index 0c378eae31..e84f8dae6a 100644 --- a/tests/testthat/test-geom-smooth.R +++ b/tests/testthat/test-geom-smooth.R @@ -48,6 +48,15 @@ test_that("default smoothing methods for small and large data sets work", { "method = 'gam' and formula 'y ~ s\\(x, bs = \"cs\"\\)" ) expect_equal(plot_data$y, as.numeric(out)) + + # backwards compatibility of method = "auto" + p <- ggplot(df, aes(x, y)) + geom_smooth(method = "auto") + + expect_message( + plot_data <- layer_data(p), + "method = 'gam' and formula 'y ~ s\\(x, bs = \"cs\"\\)" + ) + expect_equal(plot_data$y, as.numeric(out)) })