tern coverage - 94.05%

Files
Source

#' Difference Test for Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Various tests were implemented to test the difference between two proportions.
#'
#' @inheritParams argument_convention
#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
#'   to calculate the p-value.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("test_proportion_diff")`
#'   to see available statistics for this function.
#'
#' @seealso [h_prop_diff_test]
#'
#' @name prop_diff_test
#' @order 1
NULL

#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
#'
#' @return
#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
#'   describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
#'
#' @keywords internal
s_test_proportion_diff <- function(df,
                                   .var,
                                   .ref_group,
                                   .in_ref_col,
                                   variables = list(strata = NULL),
                                   method = c("chisq", "schouten", "fisher", "cmh")) {
  method <- match.arg(method)
  y <- list(pval = "")

  if (!.in_ref_col) {
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))
    rsp <- factor(
      c(.ref_group[[.var]], df[[.var]]),
      levels = c("TRUE", "FALSE")
    )
    grp <- factor(
      rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata) || method == "cmh") {
      strata <- variables$strata
      checkmate::assert_false(is.null(strata))
      strata_vars <- stats::setNames(as.list(strata), strata)
      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)
      strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
    }

    tbl <- switch(method,
      cmh = table(grp, rsp, strata),
      table(grp, rsp)
    )

    y$pval <- switch(method,
      chisq = prop_chisq(tbl),
      cmh = prop_cmh(tbl),
      fisher = prop_fisher(tbl),
      schouten = prop_schouten(tbl)
    )
  }

  y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
  y
}

#' Description of the Difference Test Between Two Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
#'
#' @inheritParams s_test_proportion_diff
#'
#' @return `string` describing the test from which the p-value is derived.
#'
#' @export
d_test_proportion_diff <- function(method) {
  checkmate::assert_string(method)
  meth_part <- switch(method,
    "schouten" = "Chi-Squared Test with Schouten Correction",
    "chisq" = "Chi-Squared Test",
    "cmh" = "Cochran-Mantel-Haenszel Test",
    "fisher" = "Fisher's Exact Test",
    stop(paste(method, "does not have a description"))
  )
  paste0("p-value (", meth_part, ")")
}

#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
#'
#' @return
#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_test_proportion_diff <- make_afun(
  s_test_proportion_diff,
  .formats = c(pval = "x.xxxx | (<0.0001)"),
  .indent_mods = c(pval = 1L)
)

#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_test_proportion_diff()` to the table layout.
#'
#' @examples
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50)),
#'   strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
#' )
#'
#' # With `rtables` pipelines.
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   test_proportion_diff(
#'     vars = "rsp",
#'     method = "cmh", variables = list(strata = "strat")
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
#' @order 2
test_proportion_diff <- function(lyt,
                                 vars,
                                 variables = list(strata = NULL),
                                 method = c("chisq", "schouten", "fisher", "cmh"),
                                 na_str = default_na_str(),
                                 nested = TRUE,
                                 ...,
                                 var_labels = vars,
                                 show_labels = "hidden",
                                 table_names = vars,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = NULL,
                                 .indent_mods = NULL) {
  extra_args <- list(variables = variables, method = method, ...)

  afun <- make_afun(
    a_test_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions to Test Proportion Differences
#'
#' Helper functions to implement various tests on the difference between two proportions.
#'
#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
#'
#' @return A p-value.
#'
#' @seealso [prop_diff_test()] for implementation of these helper functions.
#'
#' @name h_prop_diff_test
NULL

#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
#'
#' @keywords internal
prop_chisq <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }
  stats::prop.test(tbl, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
#'   [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
#'
#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
#'   (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
#'
#' @keywords internal
prop_cmh <- function(ary) {
  checkmate::assert_array(ary)
  checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
  checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
  strata_sizes <- apply(ary, MARGIN = 3, sum)
  if (any(strata_sizes < 5)) {
    warning("<5 data points in some strata. CMH test may be incorrect.")
    ary <- ary[, , strata_sizes > 1]
  }

  stats::mantelhaen.test(ary, correct = FALSE)$p.value
}

#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
#'
#' @seealso Schouten correction is based upon \insertCite{Schouten1980-kd;textual}{tern}.
#'
#' @keywords internal
prop_schouten <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  if (any(colSums(tbl) == 0)) {
    return(1)
  }

  n <- sum(tbl)
  n1 <- sum(tbl[1, ])
  n2 <- sum(tbl[2, ])

  ad <- diag(tbl)
  bc <- diag(apply(tbl, 2, rev))
  ac <- tbl[, 1]
  bd <- tbl[, 2]

  t_schouten <- (n - 1) *
    (abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
    (n1 * n2 * sum(ac) * sum(bd))

  1 - stats::pchisq(t_schouten, df = 1)
}

#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
#'
#' @keywords internal
prop_fisher <- function(tbl) {
  checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
  tbl <- tbl[, c("TRUE", "FALSE")]
  stats::fisher.test(tbl)$p.value
}

#' Estimation of Proportions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion of responders within a studied population.
#'
#' @inheritParams prop_strat_wilson
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_proportion")`
#'   to see available statistics for this function.
#' @param method (`string`)\cr the method used to construct the confidence interval
#'   for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
#'   `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
#' @param long (`flag`)\cr a long description is required.
#'
#' @seealso [h_proportions]
#'
#' @name estimate_proportions
#' @order 1
NULL

#' @describeIn estimate_proportions Statistics function estimating a
#'   proportion along with its confidence interval.
#'
#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
#'   it indicates whether each subject is a responder or not. `TRUE` represents
#'   a successful outcome. If a `data.frame` is provided, also the `strata` variable
#'   names must be provided in `variables` as a list element with the strata strings.
#'   In the case of `data.frame`, the logical vector of responses must be indicated as a
#'   variable name in `.var`.
#'
#' @return
#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
#'   given variable.
#'
#' @examples
#' # Case with only logical vector.
#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
#' s_proportion(rsp_v)
#'
#' # Example for Stratified Wilson CI
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' s_proportion(
#'   df = dta,
#'   .var = "rsp",
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "strat_wilson"
#' )
#'
#' @export
s_proportion <- function(df,
                         .var,
                         conf_level = 0.95,
                         method = c(
                           "waldcc", "wald", "clopper-pearson",
                           "wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
                           "agresti-coull", "jeffreys"
                         ),
                         weights = NULL,
                         max_iterations = 50,
                         variables = list(strata = NULL),
                         long = FALSE) {
  method <- match.arg(method)
  checkmate::assert_flag(long)
  assert_proportion_value(conf_level)

  if (!is.null(variables$strata)) {
    # Checks for strata
    if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
    strata_colnames <- variables$strata
    checkmate::assert_character(strata_colnames, null.ok = FALSE)
    strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
    assert_df_with_variables(df, strata_vars)

    strata <- interaction(df[strata_colnames])
    strata <- as.factor(strata)

    # Pushing down checks to prop_strat_wilson
  } else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
    stop("To use stratified methods you need to specify the strata variables.")
  }
  if (checkmate::test_atomic_vector(df)) {
    rsp <- as.logical(df)
  } else {
    rsp <- as.logical(df[[.var]])
  }
  n <- sum(rsp)
  p_hat <- mean(rsp)

  prop_ci <- switch(method,
    "clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
    "wilson" = prop_wilson(rsp, conf_level),
    "wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
    "strat_wilson" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = FALSE
    )$conf_int,
    "strat_wilsonc" = prop_strat_wilson(rsp,
      strata,
      weights,
      conf_level,
      max_iterations,
      correct = TRUE
    )$conf_int,
    "wald" = prop_wald(rsp, conf_level),
    "waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
    "agresti-coull" = prop_agresti_coull(rsp, conf_level),
    "jeffreys" = prop_jeffreys(rsp, conf_level)
  )

  list(
    "n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
    "prop_ci" = formatters::with_label(
      x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
    )
  )
}

#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
#'   in `estimate_proportion()`.
#'
#' @return
#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @export
a_proportion <- make_afun(
  s_proportion,
  .formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
)

#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion()` to the table layout.
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = rep(LETTERS[1:3], each = 4),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_proportion(vars = "AVAL") %>%
#'   build_table(df = dta_test)
#'
#' @export
#' @order 2
estimate_proportion <- function(lyt,
                                vars,
                                conf_level = 0.95,
                                method = c(
                                  "waldcc", "wald", "clopper-pearson",
                                  "wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
                                  "agresti-coull", "jeffreys"
                                ),
                                weights = NULL,
                                max_iterations = 50,
                                variables = list(strata = NULL),
                                long = FALSE,
                                na_str = default_na_str(),
                                nested = TRUE,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  extra_args <- list(
    conf_level = conf_level, method = method, weights = weights, max_iterations = max_iterations,
    variables = variables, long = long, ...
  )

  afun <- make_afun(
    a_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Calculating Proportion Confidence Intervals
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
#'
#' @inheritParams argument_convention
#' @inheritParams estimate_proportions
#'
#' @return Confidence interval of a proportion.
#'
#' @seealso [estimate_proportions], descriptive function [d_proportion()],
#'  and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
#'
#' @name h_proportions
NULL

#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
#'  Also referred to as Wilson score interval.
#'
#' @examples
#' rsp <- c(
#'   TRUE, TRUE, TRUE, TRUE, TRUE,
#'   FALSE, FALSE, FALSE, FALSE, FALSE
#' )
#' prop_wilson(rsp, conf_level = 0.9)
#'
#' @export
prop_wilson <- function(rsp, conf_level, correct = FALSE) {
  y <- stats::prop.test(
    sum(rsp),
    length(rsp),
    correct = correct,
    conf.level = conf_level
  )

  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the stratified Wilson confidence
#'   interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
#'   estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
#'   minimizes the weighted squared length of the confidence interval.
#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
#'   to find estimates of optimal weights.
#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
#'   [stats::prop.test()].
#'
#' @references
#' \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified Wilson confidence interval with unequal probabilities
#'
#' set.seed(1)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#' strata <- interaction(strata_data)
#' n_strata <- ncol(table(rsp, strata)) # Number of strata
#'
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   conf_level = 0.90
#' )
#'
#' # Not automatic setting of weights
#' prop_strat_wilson(
#'   rsp = rsp, strata = strata,
#'   weights = rep(1 / n_strata, n_strata),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_strat_wilson <- function(rsp,
                              strata,
                              weights = NULL,
                              conf_level = 0.95,
                              max_iterations = NULL,
                              correct = FALSE) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(strata, len = length(rsp))
  assert_proportion_value(conf_level)

  tbl <- table(rsp, strata)
  n_strata <- length(unique(strata))

  # Checking the weights and maximum number of iterations.
  do_iter <- FALSE
  if (is.null(weights)) {
    weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
    do_iter <- TRUE

    # Iteration parameters
    if (is.null(max_iterations)) max_iterations <- 10
    checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
  }
  checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata)
  sum_weights <- checkmate::assert_int(sum(weights))
  if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.")


  xs <- tbl["TRUE", ]
  ns <- colSums(tbl)
  use_stratum <- (ns > 0)
  ns <- ns[use_stratum]
  xs <- xs[use_stratum]
  ests <- xs / ns
  vars <- ests * (1 - ests) / ns

  strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)

  # Iterative setting of weights if they were not set externally
  weights_new <- if (do_iter) {
    update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
  } else {
    weights
  }

  strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1

  ci_by_strata <- Map(
    function(x, n) {
      # Classic Wilson's confidence interval
      suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
    },
    x = xs,
    n = ns
  )
  lower_by_strata <- sapply(ci_by_strata, "[", 1L)
  upper_by_strata <- sapply(ci_by_strata, "[", 2L)

  lower <- sum(weights_new * lower_by_strata)
  upper <- sum(weights_new * upper_by_strata)

  # Return values
  if (do_iter) {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      ),
      weights = weights_new
    )
  } else {
    list(
      conf_int = c(
        lower = lower,
        upper = upper
      )
    )
  }
}

#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
#'   Also referred to as the `exact` method.
#'
#' @examples
#' prop_clopper_pearson(rsp, conf_level = .95)
#'
#' @export
prop_clopper_pearson <- function(rsp,
                                 conf_level) {
  y <- stats::binom.test(
    x = sum(rsp),
    n = length(rsp),
    conf.level = conf_level
  )
  as.numeric(y$conf.int)
}

#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
#'   for a single proportion confidence interval using the normal approximation.
#'
#' @param correct (`flag`)\cr apply continuity correction.
#'
#' @examples
#' prop_wald(rsp, conf_level = 0.95)
#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
#'
#' @export
prop_wald <- function(rsp, conf_level, correct = FALSE) {
  n <- length(rsp)
  p_hat <- mean(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)
  q_hat <- 1 - p_hat
  correct <- if (correct) 1 / (2 * n) else 0

  err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
  l_ci <- max(0, p_hat - err)
  u_ci <- min(1, p_hat + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by
#'   (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
#'
#' @examples
#' prop_agresti_coull(rsp, conf_level = 0.95)
#'
#' @export
prop_agresti_coull <- function(rsp, conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)
  z <- stats::qnorm((1 + conf_level) / 2)

  # Add here both z^2 / 2 successes and failures.
  x_sum_tilde <- x_sum + z^2 / 2
  n_tilde <- n + z^2

  # Then proceed as with the Wald interval.
  p_tilde <- x_sum_tilde / n_tilde
  q_tilde <- 1 - p_tilde
  err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
  l_ci <- max(0, p_tilde - err)
  u_ci <- min(1, p_tilde + err)

  c(l_ci, u_ci)
}

#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
#'   non-informative Jeffreys prior for a binomial proportion.
#'
#' @examples
#' prop_jeffreys(rsp, conf_level = 0.95)
#'
#' @export
prop_jeffreys <- function(rsp,
                          conf_level) {
  n <- length(rsp)
  x_sum <- sum(rsp)

  alpha <- 1 - conf_level
  l_ci <- ifelse(
    x_sum == 0,
    0,
    stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  u_ci <- ifelse(
    x_sum == n,
    1,
    stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
  )

  c(l_ci, u_ci)
}

#' Description of the Proportion Summary
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_proportion()].
#'
#' @inheritParams s_proportion
#' @param long (`flag`)\cr whether a long or a short (default) description is required.
#'
#' @return String describing the analysis.
#'
#' @export
d_proportion <- function(conf_level,
                         method,
                         long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")

  if (long) label <- paste(label, "for Response Rates")

  method_part <- switch(method,
    "clopper-pearson" = "Clopper-Pearson",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "wilson" = "Wilson, without correction",
    "strat_wilson" = "Stratified Wilson, without correction",
    "wilsonc" = "Wilson, with correction",
    "strat_wilsonc" = "Stratified Wilson, with correction",
    "agresti-coull" = "Agresti-Coull",
    "jeffreys" = "Jeffreys",
    stop(paste(method, "does not have a description"))
  )

  paste0(label, " (", method_part, ")")
}

#' Helper Function for the Estimation of Stratified Quantiles
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the estimation of stratified percentiles when we assume
#' the approximation for large numbers. This is necessary only in the case
#' proportions for each strata are unequal.
#'
#' @inheritParams argument_convention
#' @inheritParams prop_strat_wilson
#'
#' @return Stratified quantile.
#'
#' @seealso [prop_strat_wilson()]
#'
#' @examples
#' strata_data <- table(data.frame(
#'   "f1" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' ))
#' ns <- colSums(strata_data)
#' ests <- strata_data["TRUE", ] / ns
#' vars <- ests * (1 - ests) / ns
#' weights <- rep(1 / length(ns), length(ns))
#'
#' strata_normal_quantile(vars, weights, 0.95)
#'
#' @export
strata_normal_quantile <- function(vars, weights, conf_level) {
  summands <- weights^2 * vars
  # Stratified quantile
  sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
}

#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function wraps the iteration procedure that allows you to estimate
#' the weights for each proportional strata. This assumes to minimize the
#' weighted squared length of the confidence interval.
#'
#' @inheritParams prop_strat_wilson
#' @param vars (`numeric`)\cr normalized proportions for each strata.
#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
#'   be optimized in the future if we need to estimate better initial weights.
#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
#' @param tol (`number`)\cr tolerance threshold for convergence.
#'
#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
#'
#' @seealso For references and details see [prop_strat_wilson()].
#'
#' @examples
#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
#' sq <- 0.674
#' ws <- rep(1 / length(vs), length(vs))
#' ns <- c(22, 18, 17, 17, 14, 12)
#'
#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
#'
#' @export
update_weights_strat_wilson <- function(vars,
                                        strata_qnorm,
                                        initial_weights,
                                        n_per_strata,
                                        max_iterations = 50,
                                        conf_level = 0.95,
                                        tol = 0.001) {
  it <- 0
  diff_v <- NULL

  while (it < max_iterations) {
    it <- it + 1
    weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
    weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
    weights_new <- weights_new_t / weights_new_b
    weights_new <- weights_new / sum(weights_new)
    strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
    diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
    if (diff_v[length(diff_v)] < tol) break
    initial_weights <- weights_new
  }

  if (it == max_iterations) {
    warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
  }

  list(
    "n_it" = it,
    "weights" = weights_new,
    "diff_v" = diff_v
  )
}

#' `rtables` Access Helper Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are a couple of functions that help with accessing the data in `rtables` objects.
#' Currently these work for occurrence tables, which are defined as having a count as the first
#' element and a fraction as the second element in each cell.
#'
#' @seealso [prune_occurrences] for usage of these functions.
#'
#' @name rtables_access
NULL

#' @describeIn rtables_access Helper function to extract the first values from each content
#'   cell and from specified columns in a `TableRow`. Defaults to all columns.
#'
#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
#' @param col_names (`character`)\cr the names of the columns to extract from.
#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
#'   then these are inferred from the names of `table_row`. Note that this currently only works well with a single
#'   column split.
#'
#' @return
#' * `h_row_first_values()` returns a `vector` of numeric values.
#'
#' @examples
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   analyze("AGE", function(x) {
#'     list(
#'       "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
#'       "n" = length(x),
#'       "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
#'     )
#'   }) %>%
#'   build_table(tern_ex_adsl) %>%
#'   prune_table()
#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
#' result <- max(h_row_first_values(tree_row_elem))
#' result
#'
#' @export
h_row_first_values <- function(table_row,
                               col_names = NULL,
                               col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  checkmate::assert_integerish(col_indices)
  checkmate::assert_subset(col_indices, seq_len(ncol(table_row)))

  # Main values are extracted
  row_vals <- row_values(table_row)[col_indices]

  # Main return
  vapply(row_vals, function(rv) {
    if (is.null(rv)) {
      NA_real_
    } else {
      rv[1L]
    }
  }, FUN.VALUE = numeric(1))
}

#' @describeIn rtables_access Helper function that extracts row values and checks if they are
#'   convertible to integers (`integerish` values).
#'
#' @return
#' * `h_row_counts()` returns a `vector` of numeric values.
#'
#' @examples
#' # Row counts (integer values)
#' # h_row_counts(tree_row_elem) # Fails because there are no integers
#' # Using values with integers
#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
#' result <- h_row_counts(tree_row_elem)
#' # result
#'
#' @export
h_row_counts <- function(table_row,
                         col_names = NULL,
                         col_indices = NULL) {
  counts <- h_row_first_values(table_row, col_names, col_indices)
  checkmate::assert_integerish(counts)
  counts
}

#' @describeIn rtables_access helper function to extract fractions from specified columns in a `TableRow`.
#'   More specifically it extracts the second values from each content cell and checks it is a fraction.
#'
#' @return
#' * `h_row_fractions()` returns a `vector` of proportions.
#'
#' @examples
#' # Row fractions
#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
#' h_row_fractions(tree_row_elem)
#'
#' @export
h_row_fractions <- function(table_row,
                            col_names = NULL,
                            col_indices = NULL) {
  col_indices <- check_names_indices(table_row, col_names, col_indices)
  row_vals <- row_values(table_row)[col_indices]
  fractions <- sapply(row_vals, "[", 2L)
  checkmate::assert_numeric(fractions, lower = 0, upper = 1)
  fractions
}

#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
#'
#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
#'
#' @return
#' * `h_col_counts()` returns a `vector` of column counts.
#'
#' @export
h_col_counts <- function(table,
                         col_names = NULL,
                         col_indices = NULL) {
  col_indices <- check_names_indices(table, col_names, col_indices)
  counts <- col_counts(table)[col_indices]
  stats::setNames(counts, col_names)
}

#' @describeIn rtables_access Helper function to get first row of content table of current table.
#'
#' @return
#' * `h_content_first_row()` returns a row from an `rtables` table.
#'
#' @export
h_content_first_row <- function(table) {
  ct <- content_table(table)
  tree_children(ct)[[1]]
}

#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
#'
#' @return
#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
#'
#' @keywords internal
is_leaf_table <- function(table) {
  children <- tree_children(table)
  child_classes <- unique(sapply(children, class))
  identical(child_classes, "ElementaryTable")
}

#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
#'
#' @return
#' * `check_names_indices` returns column indices.
#'
#' @keywords internal
check_names_indices <- function(table_row,
                                col_names = NULL,
                                col_indices = NULL) {
  if (!is.null(col_names)) {
    if (!is.null(col_indices)) {
      stop(
        "Inserted both col_names and col_indices when selecting row values. ",
        "Please choose one."
      )
    }
    col_indices <- h_col_indices(table_row, col_names)
  }
  if (is.null(col_indices)) {
    ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row))
    col_indices <- seq_len(ll)
  }

  return(col_indices)
}

#' Control Function for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for STEP calculations.
#'
#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
#'   could be used to infer `bandwidth`, see below.
#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
#'   quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
#'   covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
#'   distributed.
#' @param bandwidth (`number` or `NULL`)\cr indicating the bandwidth of each window.
#'   Depending on the argument `use_percentile`, it can be either the length of actual-value
#'   windows on the real biomarker scale, or percentage windows.
#'   If `use_percentile = TRUE`, it should be a number between 0 and 1.
#'   If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
#'   By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
#'   variable for actual-value windows.
#' @param degree (`count`)\cr the degree of polynomial function of the biomarker as an interaction term
#'   with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
#'   is not included in the model fitted in each biomarker window.
#' @param num_points (`count`)\cr the number of points at which the hazard ratios are estimated. The
#'   smallest number is 2.
#'
#' @return A list of components with the same names as the arguments, except `biomarker` which is
#'   just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
#'
#' @examples
#' # Provide biomarker values and request actual values to be used,
#' # so that bandwidth is chosen from range.
#' control_step(biomarker = 1:10, use_percentile = FALSE)
#'
#' # Use a global model with quadratic biomarker interaction term.
#' control_step(bandwidth = NULL, degree = 2)
#'
#' # Reduce number of points to be used.
#' control_step(num_points = 10)
#'
#' @export
control_step <- function(biomarker = NULL,
                         use_percentile = TRUE,
                         bandwidth,
                         degree = 0L,
                         num_points = 39L) {
  checkmate::assert_numeric(biomarker, null.ok = TRUE)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_int(num_points, lower = 2)
  checkmate::assert_count(degree)

  if (missing(bandwidth)) {
    # Infer bandwidth
    bandwidth <- if (use_percentile) {
      0.25
    } else if (!is.null(biomarker)) {
      diff(range(biomarker, na.rm = TRUE)) / 4
    } else {
      NULL
    }
  } else {
    # Check bandwidth
    if (!is.null(bandwidth)) {
      if (use_percentile) {
        assert_proportion_value(bandwidth)
      } else {
        checkmate::assert_scalar(bandwidth)
        checkmate::assert_true(bandwidth > 0)
      }
    }
  }
  list(
    use_percentile = use_percentile,
    bandwidth = bandwidth,
    degree = as.integer(degree),
    num_points = as.integer(num_points)
  )
}

#' Confidence Interval for Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
#' geometric mean. It can be used as a `ggplot` helper function for plotting.
#'
#' @inheritParams argument_convention
#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
#'
#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
#'
#' @examples
#' stat_mean_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5),
#'   geom = "errorbar"
#' )
#'
#' p + ggplot2::stat_summary(
#'   fun.data = stat_mean_ci,
#'   fun.args = list(conf_level = 0.5, geom_mean = TRUE),
#'   geom = "errorbar"
#' )
#'
#' @export
stat_mean_ci <- function(x,
                         conf_level = 0.95,
                         na.rm = TRUE, # nolint
                         n_min = 2,
                         gg_helper = TRUE,
                         geom_mean = FALSE) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  if (!geom_mean) {
    m <- mean(x)
  } else {
    negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
    if (negative_values_exist) {
      m <- NA_real_
    } else {
      x <- log(x)
      m <- mean(x)
    }
  }

  if (n < n_min || is.na(m)) {
    ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
  } else {
    hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
    ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
    if (geom_mean) {
      ci <- exp(ci)
    }
  }

  if (gg_helper) {
    m <- ifelse(is.na(m), NA_real_, m)
    ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
  }

  return(ci)
}

#' Confidence Interval for Median
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
#' function for plotting.
#'
#' @inheritParams argument_convention
#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
#'
#' @details The function was adapted from `DescTools/versions/0.99.35/source`
#'
#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
#'
#' @examples
#' stat_median_ci(sample(10), gg_helper = FALSE)
#'
#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
#'   ggplot2::geom_point()
#' p + ggplot2::stat_summary(
#'   fun.data = stat_median_ci,
#'   geom = "errorbar"
#' )
#'
#' @export
stat_median_ci <- function(x,
                           conf_level = 0.95,
                           na.rm = TRUE, # nolint
                           gg_helper = TRUE) {
  x <- unname(x)
  if (na.rm) {
    x <- x[!is.na(x)]
  }
  n <- length(x)
  med <- stats::median(x)

  k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)

  # k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
  if (k == 0 || is.na(med)) {
    ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
    empir_conf_level <- NA_real_
  } else {
    x_sort <- sort(x)
    ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
    empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
  }

  if (gg_helper) {
    ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
  }

  attr(ci, "conf_level") <- empir_conf_level

  return(ci)
}

#' p-Value of the Mean
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convenient function for calculating the two-sided p-value of the mean.
#'
#' @inheritParams argument_convention
#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
#'
#' @return A p-value.
#'
#' @examples
#' stat_mean_pval(sample(10))
#'
#' stat_mean_pval(rnorm(10), test_mean = 0.5)
#'
#' @export
stat_mean_pval <- function(x,
                           na.rm = TRUE, # nolint
                           n_min = 2,
                           test_mean = 0) {
  if (na.rm) {
    x <- stats::na.omit(x)
  }
  n <- length(x)

  x_mean <- mean(x)
  x_sd <- stats::sd(x)

  if (n < n_min) {
    pv <- c(p_value = NA_real_)
  } else {
    x_se <- stats::sd(x) / sqrt(n)
    ttest <- (x_mean - test_mean) / x_se
    pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
  }

  return(pv)
}

#' Proportion Difference and Confidence Interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function for calculating the proportion (or risk) difference and confidence interval between arm
#' X (reference group) and arm Y. Risk difference is calculated by subtracting cumulative incidence
#' in arm Y from cumulative incidence in arm X.
#'
#' @inheritParams argument_convention
#' @param x (`list` of `integer`)\cr list of number of occurrences in arm X (reference group).
#' @param y (`list` of `integer`)\cr list of number of occurrences in arm Y. Must be of equal length to `x`.
#' @param N_x (`numeric`)\cr total number of records in arm X.
#' @param N_y (`numeric`)\cr total number of records in arm Y.
#' @param list_names (`character`)\cr names of each variable/level corresponding to pair of proportions in
#'   `x` and `y`. Must be of equal length to `x` and `y`.
#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
#'
#' @return List of proportion differences and CIs corresponding to each pair of number of occurrences in `x` and
#'   `y`. Each list element consists of 3 statistics: proportion difference, CI lower bound, and CI upper bound.
#'
#' @seealso Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()]
#'   with `riskdiff` argument is set to `TRUE` in subsequent analyze functions, adds a column containing
#'   proportion (risk) difference to an `rtables` layout.
#'
#' @examples
#' stat_propdiff_ci(
#'   x = list(0.375), y = list(0.01), N_x = 5, N_y = 5, list_names = "x", conf_level = 0.9
#' )
#'
#' stat_propdiff_ci(
#'   x = list(0.5, 0.75, 1), y = list(0.25, 0.05, 0.5), N_x = 10, N_y = 20, pct = FALSE
#' )
#'
#' @export
stat_propdiff_ci <- function(x,
                             y,
                             N_x, # nolint
                             N_y, # nolint
                             list_names = NULL,
                             conf_level = 0.95,
                             pct = TRUE) {
  checkmate::assert_list(x, types = "numeric")
  checkmate::assert_list(y, types = "numeric", len = length(x))
  checkmate::assert_character(list_names, len = length(x), null.ok = TRUE)
  rd_list <- lapply(seq_along(x), function(i) {
    p_x <- x[[i]] / N_x
    p_y <- y[[i]] / N_y
    rd_ci <- p_x - p_y + c(-1, 1) * stats::qnorm((1 + conf_level) / 2) *
      sqrt(p_x * (1 - p_x) / N_x + p_y * (1 - p_y) / N_y)
    c(p_x - p_y, rd_ci) * ifelse(pct, 100, 1)
  })
  names(rd_list) <- list_names
  rd_list
}

#' Confidence Intervals for a Difference of Binomials
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Several confidence intervals for the difference between proportions.
#'
#' @name desctools_binom
NULL

#' Recycle List of Parameters
#'
#' This function recycles all supplied elements to the maximal dimension.
#'
#' @param ... (`any`)\cr Elements to recycle.
#'
#' @return A `list`.
#'
#' @keywords internal
#' @noRd
h_recycle <- function(...) {
  lst <- list(...)
  maxdim <- max(lengths(lst))
  res <- lapply(lst, rep, length.out = maxdim)
  attr(res, "maxdim") <- maxdim
  return(res)
}

#' @describeIn desctools_binom Several confidence intervals for the difference between proportions.
#'
#' @return A `matrix` of 3 values:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: estimate of lower end of the confidence interval.
#'   * `upr.ci`: estimate of upper end of the confidence interval.
#'
#' @keywords internal
desctools_binom <- function(x1,
                            n1,
                            x2,
                            n2,
                            conf.level = 0.95, # nolint
                            sides = c("two.sided", "left", "right"),
                            method = c(
                              "ac", "wald", "waldcc", "score", "scorecc", "mn", "mee", "blj", "ha", "hal", "jp"
                            )) {
  if (missing(sides)) {
    sides <- match.arg(sides)
  }
  if (missing(method)) {
    method <- match.arg(method)
  }
  iBinomDiffCI <- function(x1, n1, x2, n2, conf.level, sides, method) { # nolint
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p1_hat <- x1 / n1
    p2_hat <- x2 / n2
    est <- p1_hat - p2_hat
    switch(method,
      wald = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      waldcc = {
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        term2 <- term2 + 0.5 * (1 / n1 + 1 / n2)
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      ac = {
        n1 <- n1 + 2
        n2 <- n2 + 2
        x1 <- x1 + 1
        x2 <- x2 + 1
        p1_hat <- x1 / n1
        p2_hat <- x2 / n2
        est1 <- p1_hat - p2_hat
        vd <- p1_hat * (1 - p1_hat) / n1 + p2_hat * (1 - p2_hat) / n2
        term2 <- kappa * sqrt(vd)
        ci_lwr <- max(-1, est1 - term2)
        ci_upr <- min(1, est1 + term2)
      },
      exact = {
        ci_lwr <- NA
        ci_upr <- NA
      },
      score = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilson"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilson"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- est - kappa * sqrt(l1 * (1 - l1) / n1 + u2 * (1 - u2) / n2)
        ci_upr <- est + kappa * sqrt(u1 * (1 - u1) / n1 + l2 * (1 - l2) / n2)
      },
      scorecc = {
        w1 <- desctools_binomci(
          x = x1, n = n1, conf.level = conf.level,
          method = "wilsoncc"
        )
        w2 <- desctools_binomci(
          x = x2, n = n2, conf.level = conf.level,
          method = "wilsoncc"
        )
        l1 <- w1[2]
        u1 <- w1[3]
        l2 <- w2[2]
        u2 <- w2[3]
        ci_lwr <- max(-1, est - sqrt((p1_hat - l1)^2 + (u2 - p2_hat)^2))
        ci_upr <- min(1, est + sqrt((u1 - p1_hat)^2 + (p2_hat - l2)^2))
      },
      mee = {
        .score <- function(p1, n1, p2, n2, dif) {
          if (dif > 1) dif <- 1
          if (dif < -1) dif <- -1
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            if (abs(v) < .Machine$double.eps) v <- 0
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            res <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2)
          }
          return(sqrt(res))
        }
        pval <- function(delta) {
          z <- (est - delta) / .score(p1_hat, n1, p2_hat, n2, delta)
          2 * min(stats::pnorm(z), 1 - stats::pnorm(z))
        }
        ci_lwr <- max(-1, stats::uniroot(function(delta) {
          pval(delta) - alpha
        }, interval = c(-1 + 1e-06, est - 1e-06))$root)
        ci_upr <- min(1, stats::uniroot(function(delta) {
          pval(delta) - alpha
        }, interval = c(est + 1e-06, 1 - 1e-06))$root)
      },
      blj = {
        p1_dash <- (x1 + 0.5) / (n1 + 1)
        p2_dash <- (x2 + 0.5) / (n2 + 1)
        vd <- p1_dash * (1 - p1_dash) / n1 + p2_dash * (1 - p2_dash) / n2
        term2 <- kappa * sqrt(vd)
        est_dash <- p1_dash - p2_dash
        ci_lwr <- max(-1, est_dash - term2)
        ci_upr <- min(1, est_dash + term2)
      },
      ha = {
        term2 <- 1 /
          (2 * min(n1, n2)) + kappa * sqrt(p1_hat * (1 - p1_hat) / (n1 - 1) + p2_hat * (1 - p2_hat) / (n2 - 1))
        ci_lwr <- max(-1, est - term2)
        ci_upr <- min(1, est + term2)
      },
      mn = {
        .conf <- function(x1, n1, x2, n2, z, lower = FALSE) {
          p1 <- x1 / n1
          p2 <- x2 / n2
          p_hat <- p1 - p2
          dp <- 1 + ifelse(lower, 1, -1) * p_hat
          i <- 1
          while (i <= 50) {
            dp <- 0.5 * dp
            y <- p_hat + ifelse(lower, -1, 1) * dp
            score <- .score(p1, n1, p2, n2, y)
            if (score < z) {
              p_hat <- y
            }
            if ((dp < 1e-07) || (abs(z - score) < 1e-06)) {
              (break)()
            } else {
              i <- i + 1
            }
          }
          return(y)
        }
        .score <- function(p1, n1, p2, n2, dif) {
          diff <- p1 - p2 - dif
          if (abs(diff) == 0) {
            res <- 0
          } else {
            t <- n2 / n1
            a <- 1 + t
            b <- -(1 + t + p1 + t * p2 + dif * (t + 2))
            c <- dif * dif + dif * (2 * p1 + t + 1) + p1 + t * p2
            d <- -p1 * dif * (1 + dif)
            v <- (b / a / 3)^3 - b * c / (6 * a * a) + d / a / 2
            s <- sqrt((b / a / 3)^2 - c / a / 3)
            u <- ifelse(v > 0, 1, -1) * s
            w <- (3.141592654 + acos(v / u^3)) / 3
            p1d <- 2 * u * cos(w) - b / a / 3
            p2d <- p1d - dif
            n <- n1 + n2
            var <- (p1d * (1 - p1d) / n1 + p2d * (1 - p2d) / n2) * n / (n - 1)
            res <- diff^2 / var
          }
          return(res)
        }
        z <- stats::qchisq(conf.level, 1)
        ci_lwr <- max(-1, .conf(x1, n1, x2, n2, z, TRUE))
        ci_upr <- min(1, .conf(x1, n1, x2, n2, z, FALSE))
      },
      beal = {
        a <- p1_hat + p2_hat
        b <- p1_hat - p2_hat
        u <- ((1 / n1) + (1 / n2)) / 4
        v <- ((1 / n1) - (1 / n2)) / 4
        V <- u * ((2 - a) * a - b^2) + 2 * v * (1 - a) * b # nolint
        z <- stats::qchisq(p = 1 - alpha / 2, df = 1)
        A <- sqrt(z * (V + z * u^2 * (2 - a) * a + z * v^2 * (1 - a)^2)) # nolint
        B <- (b + z * v * (1 - a)) / (1 + z * u) # nolint
        ci_lwr <- max(-1, B - A / (1 + z * u))
        ci_upr <- min(1, B + A / (1 + z * u))
      },
      hal = {
        psi <- (p1_hat + p2_hat) / 2
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
      jp = {
        psi <- 0.5 * ((x1 + 0.5) / (n1 + 1) + (x2 + 0.5) / (n2 + 1))
        u <- (1 / n1 + 1 / n2) / 4
        v <- (1 / n1 - 1 / n2) / 4
        z <- kappa
        theta <- ((p1_hat - p2_hat) + z^2 * v * (1 - 2 * psi)) / (1 + z^2 * u)
        w <- z / (1 + z^2 * u) * sqrt(u * (4 * psi * (1 - psi) - (p1_hat - p2_hat)^2) + 2 * v * (1 - 2 * psi) *
          (p1_hat - p2_hat) + 4 * z^2 * u^2 * (1 - psi) * psi + z^2 * v^2 * (1 - 2 * psi)^2) # nolint
        c(theta + w, theta - w)
        ci_lwr <- max(-1, theta - w)
        ci_upr <- min(1, theta + w)
      },
    )
    ci <- c(
      est = est, lwr.ci = min(ci_lwr, ci_upr),
      upr.ci = max(ci_lwr, ci_upr)
    )
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- -1
    }
    return(ci)
  }
  method <- match.arg(arg = method, several.ok = TRUE)
  sides <- match.arg(arg = sides, several.ok = TRUE)
  lst <- h_recycle(
    x1 = x1, n1 = n1, x2 = x2, n2 = n2, conf.level = conf.level,
    sides = sides, method = method
  )
  res <- t(sapply(1:attr(lst, "maxdim"), function(i) {
    iBinomDiffCI(
      x1 = lst$x1[i],
      n1 = lst$n1[i], x2 = lst$x2[i], n2 = lst$n2[i], conf.level = lst$conf.level[i],
      sides = lst$sides[i], method = lst$method[i]
    )
  }))
  lgn <- h_recycle(x1 = if (is.null(names(x1))) {
    paste("x1", seq_along(x1), sep = ".")
  } else {
    names(x1)
  }, n1 = if (is.null(names(n1))) {
    paste("n1", seq_along(n1), sep = ".")
  } else {
    names(n1)
  }, x2 = if (is.null(names(x2))) {
    paste("x2", seq_along(x2), sep = ".")
  } else {
    names(x2)
  }, n2 = if (is.null(names(n2))) {
    paste("n2", seq_along(n2), sep = ".")
  } else {
    names(n2)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  rownames(res) <- xn
  return(res)
}

#' @describeIn desctools_binom Compute confidence intervals for binomial proportions.
#'
#' @param x (`count`)\cr number of successes
#' @param n (`count`)\cr number of trials
#' @param conf.level (`proportion`)\cr confidence level, defaults to 0.95.
#' @param sides (`character`)\cr side of the confidence interval to compute. Must be one of `"two-sided"` (default),
#'   `"left"`, or `"right"`.
#' @param method (`character`)\cr method to use. Can be one out of: `"wald"`, `"wilson"`, `"wilsoncc"`,
#' `"agresti-coull"`, `"jeffreys"`, `"modified wilson"`, `"modified jeffreys"`, `"clopper-pearson"`, `"arcsine"`,
#' `"logit"`, `"witting"`, `"pratt"`, `"midp"`, `"lik"`, and `"blaker"`.
#'
#' @return A `matrix` with 3 columns containing:
#'   * `est`: estimate of proportion difference.
#'   * `lwr.ci`: lower end of the confidence interval.
#'   * `upr.ci`: upper end of the confidence interval.
#'
#' @keywords internal
desctools_binomci <- function(x,
                              n,
                              conf.level = 0.95, # nolint
                              sides = c("two.sided", "left", "right"),
                              method = c(
                                "wilson", "wald", "waldcc", "agresti-coull",
                                "jeffreys", "modified wilson", "wilsoncc", "modified jeffreys",
                                "clopper-pearson", "arcsine", "logit", "witting", "pratt",
                                "midp", "lik", "blaker"
                              ),
                              rand = 123,
                              tol = 1e-05) {
  if (missing(method)) {
    method <- "wilson"
  }
  if (missing(sides)) {
    sides <- "two.sided"
  }
  iBinomCI <- function(x, n, conf.level = 0.95, sides = c("two.sided", "left", "right"), # nolint
                       method = c(
                         "wilson", "wilsoncc", "wald",
                         "waldcc", "agresti-coull", "jeffreys", "modified wilson",
                         "modified jeffreys", "clopper-pearson", "arcsine", "logit",
                         "witting", "pratt", "midp", "lik", "blaker"
                       ),
                       rand = 123,
                       tol = 1e-05) {
    if (length(x) != 1) {
      stop("'x' has to be of length 1 (number of successes)")
    }
    if (length(n) != 1) {
      stop("'n' has to be of length 1 (number of trials)")
    }
    if (length(conf.level) != 1) {
      stop("'conf.level' has to be of length 1 (confidence level)")
    }
    if (conf.level < 0.5 || conf.level > 1) {
      stop("'conf.level' has to be in [0.5, 1]")
    }
    sides <- match.arg(sides, choices = c(
      "two.sided", "left",
      "right"
    ), several.ok = FALSE)
    if (sides != "two.sided") {
      conf.level <- 1 - 2 * (1 - conf.level) # nolint
    }
    alpha <- 1 - conf.level
    kappa <- stats::qnorm(1 - alpha / 2)
    p_hat <- x / n
    q_hat <- 1 - p_hat
    est <- p_hat
    switch(match.arg(arg = method, choices = c(
      "wilson",
      "wald", "waldcc", "wilsoncc", "agresti-coull", "jeffreys",
      "modified wilson", "modified jeffreys", "clopper-pearson",
      "arcsine", "logit", "witting", "pratt", "midp", "lik",
      "blaker"
    )),
    wald = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    waldcc = {
      term2 <- kappa * sqrt(p_hat * q_hat) / sqrt(n)
      term2 <- term2 + 1 / (2 * n)
      ci_lwr <- max(0, p_hat - term2)
      ci_upr <- min(1, p_hat + term2)
    },
    wilson = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n))
      ci_lwr <- max(0, term1 - term2)
      ci_upr <- min(1, term1 + term2)
    },
    wilsoncc = {
      lci <- (
        2 * x + kappa^2 - 1 - kappa * sqrt(kappa^2 - 2 - 1 / n + 4 * p_hat * (n * q_hat + 1))
      ) / (2 * (n + kappa^2))
      uci <- (
        2 * x + kappa^2 + 1 + kappa * sqrt(kappa^2 + 2 - 1 / n + 4 * p_hat * (n * q_hat - 1))
      ) / (2 * (n + kappa^2))
      ci_lwr <- max(0, ifelse(p_hat == 0, 0, lci))
      ci_upr <- min(1, ifelse(p_hat == 1, 1, uci))
    },
    `agresti-coull` = {
      x_tilde <- x + kappa^2 / 2
      n_tilde <- n + kappa^2
      p_tilde <- x_tilde / n_tilde
      q_tilde <- 1 - p_tilde
      est <- p_tilde
      term2 <- kappa * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
      ci_lwr <- max(0, p_tilde - term2)
      ci_upr <- min(1, p_tilde + term2)
    },
    jeffreys = {
      if (x == 0) {
        ci_lwr <- 0
      } else {
        ci_lwr <- stats::qbeta(
          alpha / 2,
          x + 0.5, n - x + 0.5
        )
      }
      if (x == n) {
        ci_upr <- 1
      } else {
        ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5)
      }
    },
    `modified wilson` = {
      term1 <- (x + kappa^2 / 2) / (n + kappa^2)
      term2 <- kappa * sqrt(n) / (n + kappa^2) * sqrt(p_hat * q_hat + kappa^2 / (4 * n))
      if ((n <= 50 & x %in% c(1, 2)) | (n >= 51 & x %in% c(1:3))) {
        ci_lwr <- 0.5 * stats::qchisq(alpha, 2 * x) / n
      } else {
        ci_lwr <- max(0, term1 - term2)
      }
      if ((n <= 50 & x %in% c(n - 1, n - 2)) | (n >= 51 & x %in% c(n - (1:3)))) {
        ci_upr <- 1 - 0.5 * stats::qchisq(
          alpha,
          2 * (n - x)
        ) / n
      } else {
        ci_upr <- min(1, term1 + term2)
      }
    },
    `modified jeffreys` = {
      if (x == n) {
        ci_lwr <- (alpha / 2)^(1 / n)
      } else {
        if (x <= 1) {
          ci_lwr <- 0
        } else {
          ci_lwr <- stats::qbeta(
            alpha / 2,
            x + 0.5, n - x + 0.5
          )
        }
      }
      if (x == 0) {
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else {
        if (x >= n - 1) {
          ci_upr <- 1
        } else {
          ci_upr <- stats::qbeta(1 - alpha / 2, x + 0.5, n - x + 0.5)
        }
      }
    },
    `clopper-pearson` = {
      ci_lwr <- stats::qbeta(alpha / 2, x, n - x + 1)
      ci_upr <- stats::qbeta(1 - alpha / 2, x + 1, n - x)
    },
    arcsine = {
      p_tilde <- (x + 0.375) / (n + 0.75)
      est <- p_tilde
      ci_lwr <- sin(asin(sqrt(p_tilde)) - 0.5 * kappa / sqrt(n))^2
      ci_upr <- sin(asin(sqrt(p_tilde)) + 0.5 * kappa / sqrt(n))^2
    },
    logit = {
      lambda_hat <- log(x / (n - x))
      V_hat <- n / (x * (n - x)) # nolint
      lambda_lower <- lambda_hat - kappa * sqrt(V_hat)
      lambda_upper <- lambda_hat + kappa * sqrt(V_hat)
      ci_lwr <- exp(lambda_lower) / (1 + exp(lambda_lower))
      ci_upr <- exp(lambda_upper) / (1 + exp(lambda_upper))
    },
    witting = {
      set.seed(rand)
      x_tilde <- x + stats::runif(1, min = 0, max = 1)
      pbinom_abscont <- function(q, size, prob) {
        v <- trunc(q)
        term1 <- stats::pbinom(v - 1, size = size, prob = prob)
        term2 <- (q - v) * stats::dbinom(v, size = size, prob = prob)
        return(term1 + term2)
      }
      qbinom_abscont <- function(p, size, x) {
        fun <- function(prob, size, x, p) {
          pbinom_abscont(x, size, prob) - p
        }
        stats::uniroot(fun,
          interval = c(0, 1), size = size,
          x = x, p = p
        )$root
      }
      ci_lwr <- qbinom_abscont(1 - alpha, size = n, x = x_tilde)
      ci_upr <- qbinom_abscont(alpha, size = n, x = x_tilde)
    },
    pratt = {
      if (x == 0) {
        ci_lwr <- 0
        ci_upr <- 1 - alpha^(1 / n)
      } else if (x == 1) {
        ci_lwr <- 1 - (1 - alpha / 2)^(1 / n)
        ci_upr <- 1 - (alpha / 2)^(1 / n)
      } else if (x == (n - 1)) {
        ci_lwr <- (alpha / 2)^(1 / n)
        ci_upr <- (1 - alpha / 2)^(1 / n)
      } else if (x == n) {
        ci_lwr <- alpha^(1 / n)
        ci_upr <- 1
      } else {
        z <- stats::qnorm(1 - alpha / 2)
        A <- ((x + 1) / (n - x))^2 # nolint
        B <- 81 * (x + 1) * (n - x) - 9 * n - 8 # nolint
        C <- (0 - 3) * z * sqrt(9 * (x + 1) * (n - x) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * (x + 1)^2 - 9 * (x + 1) * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_upr <- 1 / E
        A <- (x / (n - x - 1))^2 # nolint
        B <- 81 * x * (n - x - 1) - 9 * n - 8 # nolint
        C <- 3 * z * sqrt(9 * x * (n - x - 1) * (9 * n + 5 - z^2) + n + 1) # nolint
        D <- 81 * x^2 - 9 * x * (2 + z^2) + 1 # nolint
        E <- 1 + A * ((B + C) / D)^3 # nolint
        ci_lwr <- 1 / E
      }
    },
    midp = {
      f_low <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x,
          size = n, prob = pi, lower.tail = FALSE
        ) -
          (1 - conf.level) / 2
      }
      f_up <- function(pi, x, n) {
        1 / 2 * stats::dbinom(x, size = n, prob = pi) + stats::pbinom(x - 1, size = n, prob = pi) - (1 - conf.level) / 2
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::uniroot(f_low,
          interval = c(0, p_hat),
          x = x, n = n
        )$root
      }
      if (x != n) {
        ci_upr <- stats::uniroot(f_up, interval = c(
          p_hat,
          1
        ), x = x, n = n)$root
      }
    },
    lik = {
      ci_lwr <- 0
      ci_upr <- 1
      z <- stats::qnorm(1 - alpha * 0.5)
      tol <- .Machine$double.eps^0.5
      BinDev <- function(y, x, mu, wt, bound = 0, tol = .Machine$double.eps^0.5, # nolint
                         ...) {
        ll_y <- ifelse(y %in% c(0, 1), 0, stats::dbinom(x, wt,
          y,
          log = TRUE
        ))
        ll_mu <- ifelse(mu %in% c(0, 1), 0, stats::dbinom(x,
          wt, mu,
          log = TRUE
        ))
        res <- ifelse(abs(y - mu) < tol, 0, sign(y - mu) * sqrt(-2 * (ll_y - ll_mu)))
        return(res - bound)
      }
      if (x != 0 && tol < p_hat) {
        ci_lwr <- if (BinDev(
          tol, x, p_hat, n, -z,
          tol
        ) <= 0) {
          stats::uniroot(
            f = BinDev, interval = c(tol, if (p_hat < tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }), bound = -z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
      if (x != n && p_hat < (1 - tol)) {
        ci_upr <- if (
          BinDev(y = 1 - tol, x = x, mu = ifelse(p_hat > 1 - tol, tol, p_hat), wt = n, bound = z, tol = tol) < 0) { # nolint
          ci_lwr <- if (BinDev(
            tol, x, if (p_hat < tol || p_hat == 1) {
              1 - tol
            } else {
              p_hat
            }, n,
            -z, tol
          ) <= 0) {
            stats::uniroot(
              f = BinDev, interval = c(tol, p_hat),
              bound = -z, x = x, mu = p_hat, wt = n
            )$root
          }
        } else {
          stats::uniroot(
            f = BinDev, interval = c(if (p_hat > 1 - tol) {
              tol
            } else {
              p_hat
            }, 1 - tol), bound = z,
            x = x, mu = p_hat, wt = n
          )$root
        }
      }
    },
    blaker = {
      acceptbin <- function(x, n, p) {
        p1 <- 1 - stats::pbinom(x - 1, n, p)
        p2 <- stats::pbinom(x, n, p)
        a1 <- p1 + stats::pbinom(stats::qbinom(p1, n, p) - 1, n, p)
        a2 <- p2 + 1 - stats::pbinom(
          stats::qbinom(1 - p2, n, p), n,
          p
        )
        return(min(a1, a2))
      }
      ci_lwr <- 0
      ci_upr <- 1
      if (x != 0) {
        ci_lwr <- stats::qbeta((1 - conf.level) / 2, x, n - x + 1)
        while (acceptbin(x, n, ci_lwr + tol) < (1 - conf.level)) {
          ci_lwr <- ci_lwr + tol
        }
      }
      if (x != n) {
        ci_upr <- stats::qbeta(1 - (1 - conf.level) / 2, x + 1, n - x)
        while (acceptbin(x, n, ci_upr - tol) < (1 - conf.level)) {
          ci_upr <- ci_upr - tol
        }
      }
    }
    )
    ci <- c(est = est, lwr.ci = max(0, ci_lwr), upr.ci = min(
      1,
      ci_upr
    ))
    if (sides == "left") {
      ci[3] <- 1
    } else if (sides == "right") {
      ci[2] <- 0
    }
    return(ci)
  }
  lst <- list(
    x = x, n = n, conf.level = conf.level, sides = sides,
    method = method, rand = rand
  )
  maxdim <- max(unlist(lapply(lst, length)))
  lgp <- lapply(lst, rep, length.out = maxdim)
  lgn <- h_recycle(x = if (is.null(names(x))) {
    paste("x", seq_along(x), sep = ".")
  } else {
    names(x)
  }, n = if (is.null(names(n))) {
    paste("n", seq_along(n), sep = ".")
  } else {
    names(n)
  }, conf.level = conf.level, sides = sides, method = method)
  xn <- apply(as.data.frame(lgn[sapply(lgn, function(x) {
    length(unique(x)) !=
      1
  })]), 1, paste, collapse = ":")
  res <- t(sapply(1:maxdim, function(i) {
    iBinomCI(
      x = lgp$x[i],
      n = lgp$n[i], conf.level = lgp$conf.level[i], sides = lgp$sides[i],
      method = lgp$method[i], rand = lgp$rand[i]
    )
  }))
  colnames(res)[1] <- c("est")
  rownames(res) <- xn
  return(res)
}

#' Survival Time Point Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize patients' survival rate and difference of survival rates between groups at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param time_point (`number`)\cr survival time point of interest.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'     see more in [survival::survfit()]. Note option "none" is no longer supported.
#'   * `time_point` (`number`)\cr survival time point of interest.
#' @param method (`string`)\cr either `surv` (survival estimations),
#'   `surv_diff` (difference in survival with the control) or `both`.
#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
#'   avoid warnings from duplicate table names.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("surv_timepoint")`
#'   to see available statistics for this function.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @name survival_timepoint
#' @order 1
NULL

#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
#'
#' @return
#' * `s_surv_timepoint()` returns the statistics:
#'   * `pt_at_risk`: Patients remaining at risk.
#'   * `event_free_rate`: Event-free rate (%).
#'   * `rate_se`: Standard error of event free rate.
#'   * `rate_ci`: Confidence interval for event free rate.
#'
#' @keywords internal
s_surv_timepoint <- function(df,
                             .var,
                             time_point,
                             is_event,
                             control = control_surv_timepoint()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_number(time_point)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE)
  df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")])
  if (df_srv_fit[["n.risk"]] == 0) {
    pt_at_risk <- event_free_rate <- rate_se <- NA_real_
    rate_ci <- c(NA_real_, NA_real_)
  } else {
    pt_at_risk <- df_srv_fit$n.risk
    event_free_rate <- df_srv_fit$surv
    rate_se <- df_srv_fit$std.err
    rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
  }
  list(
    pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
    event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
    rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
    rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level))
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv"`.
#'
#' @return
#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_surv_timepoint <- make_afun(
  s_surv_timepoint,
  .indent_mods = c(
    pt_at_risk = 0L,
    event_free_rate = 0L,
    rate_se = 1L,
    rate_ci = 1L
  ),
  .formats = c(
    pt_at_risk = "xx",
    event_free_rate = "xx.xx",
    rate_se = "xx.xx",
    rate_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
#'
#' @return
#' * `s_surv_timepoint_diff()` returns the statistics:
#'   * `rate_diff`: Event-free rate difference between two groups.
#'   * `rate_diff_ci`: Confidence interval for the difference.
#'   * `ztest_pval`: p-value to test the difference is 0.
#'
#' @keywords internal
s_surv_timepoint_diff <- function(df,
                                  .var,
                                  .ref_group,
                                  .in_ref_col,
                                  time_point,
                                  control = control_surv_timepoint(),
                                  ...) {
  if (.in_ref_col) {
    return(
      list(
        rate_diff = formatters::with_label("", "Difference in Event Free Rate"),
        rate_diff_ci = formatters::with_label("", f_conf_level(control$conf_level)),
        ztest_pval = formatters::with_label("", "p-value (Z-test)")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
  res_per_group <- lapply(split(data, group), function(x) {
    s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...)
  })

  res_x <- res_per_group[[2]]
  res_ref <- res_per_group[[1]]
  rate_diff <- res_x$event_free_rate - res_ref$event_free_rate
  se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2)

  qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
  rate_diff_ci <- rate_diff + qs * se_diff
  ztest_pval <- if (is.na(rate_diff)) {
    NA
  } else {
    2 * (1 - stats::pnorm(abs(rate_diff) / se_diff))
  }
  list(
    rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
    rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
    ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
  )
}

#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
#'   when `method = "surv_diff"`.
#'
#' @return
#' * `a_surv_timepoint_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_surv_timepoint_diff <- make_afun(
  s_surv_timepoint_diff,
  .formats = c(
    rate_diff = "xx.xx",
    rate_diff_ci = "(xx.xx, xx.xx)",
    ztest_pval = "x.xxxx | (<0.0001)"
  )
)

#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
#'   the value of `method`.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#'
#' # Survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 7
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "surv_diff",
#'     .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' # Survival and difference in survival at given time points.
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   surv_timepoint(
#'     vars = "AVAL",
#'     var_labels = "Months",
#'     is_event = "is_event",
#'     time_point = 9,
#'     method = "both"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
#' @order 2
surv_timepoint <- function(lyt,
                           vars,
                           time_point,
                           is_event,
                           control = control_surv_timepoint(),
                           method = c("surv", "surv_diff", "both"),
                           na_str = default_na_str(),
                           nested = TRUE,
                           ...,
                           table_names_suffix = "",
                           var_labels = "Time",
                           show_labels = "visible",
                           .stats = c(
                             "pt_at_risk", "event_free_rate", "rate_ci",
                             "rate_diff", "rate_diff_ci", "ztest_pval"
                           ),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = if (method == "both") {
                             c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L)
                           } else {
                             c(rate_diff_ci = 1L, ztest_pval = 1L)
                           }) {
  method <- match.arg(method)
  checkmate::assert_string(table_names_suffix)

  extra_args <- list(time_point = time_point, is_event = is_event, control = control, ...)

  f <- list(
    surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
    surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
  )
  .stats <- h_split_param(.stats, .stats, f = f)
  .formats <- h_split_param(.formats, names(.formats), f = f)
  .labels <- h_split_param(.labels, names(.labels), f = f)
  .indent_mods <- h_split_param(.indent_mods, names(.indent_mods), f = f)

  afun_surv <- make_afun(
    a_surv_timepoint,
    .stats = .stats$surv,
    .formats = .formats$surv,
    .labels = .labels$surv,
    .indent_mods = .indent_mods$surv
  )

  afun_surv_diff <- make_afun(
    a_surv_timepoint_diff,
    .stats = .stats$surv_diff,
    .formats = .formats$surv_diff,
    .labels = .labels$surv_diff,
    .indent_mods = .indent_mods$surv_diff
  )

  time_point <- extra_args$time_point

  for (i in seq_along(time_point)) {
    extra_args[["time_point"]] <- time_point[i]

    if (method %in% c("surv", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(time_point[i], var_labels),
        table_names = paste0("surv_", time_point[i], table_names_suffix),
        show_labels = show_labels,
        afun = afun_surv,
        na_str = na_str,
        nested = nested,
        extra_args = extra_args
      )
    }

    if (method %in% c("surv_diff", "both")) {
      lyt <- analyze(
        lyt,
        vars,
        var_labels = paste(time_point[i], var_labels),
        table_names = paste0("surv_diff_", time_point[i], table_names_suffix),
        show_labels = ifelse(method == "both", "hidden", show_labels),
        afun = afun_surv_diff,
        na_str = na_str,
        nested = nested,
        extra_args = extra_args
      )
    }
  }
  lyt
}

#' Survival Time Analysis
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize median survival time and CIs, percentiles of survival times, survival
#' time range of censored/event patients.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_surv_time()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
#'   * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log",
#'     see more in [survival::survfit()]. Note option "none" is not supported.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
#' @param ref_fn_censor (`flag`)\cr whether referential footnotes indicating censored observations should be printed
#'   when the `range` statistic is included.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("surv_time")`
#'   to see available statistics for this function.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
#'
#' @name survival_time
#' @order 1
NULL

#' @describeIn survival_time Statistics function which analyzes survival times.
#'
#' @return
#' * `s_surv_time()` returns the statistics:
#'   * `median`: Median survival time.
#'   * `median_ci`: Confidence interval for median time.
#'   * `quantiles`: Survival time for two specified quantiles.
#'   * `range_censor`: Survival time range for censored observations.
#'   * `range_event`: Survival time range for observations with events.
#'   * `range`: Survival time range for all observations.
#'
#' @keywords internal
s_surv_time <- function(df,
                        .var,
                        is_event,
                        control = control_surv_time()) {
  checkmate::assert_string(.var)
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)

  conf_type <- control$conf_type
  conf_level <- control$conf_level
  quantiles <- control$quantiles

  formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
  srv_fit <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = conf_level,
    conf.type = conf_type
  )
  srv_tab <- summary(srv_fit, extend = TRUE)$table
  srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)$quantile
  range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
  range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
  range <- range_noinf(df[[.var]], na.rm = TRUE)
  list(
    median = formatters::with_label(unname(srv_tab["median"]), "Median"),
    median_ci = formatters::with_label(
      unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level)
    ),
    quantiles = formatters::with_label(
      unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile")
    ),
    range_censor = formatters::with_label(range_censor, "Range (censored)"),
    range_event = formatters::with_label(range_event, "Range (event)"),
    range = formatters::with_label(range, "Range")
  )
}

#' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`.
#'
#' @return
#' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_surv_time(
#'   df,
#'   .df_row = df,
#'   .var = "AVAL",
#'   is_event = "is_event"
#' )
#'
#' @export
a_surv_time <- function(df,
                        labelstr = "",
                        .var = NULL,
                        .df_row = NULL,
                        is_event,
                        control = control_surv_time(),
                        ref_fn_censor = TRUE,
                        .stats = NULL,
                        .formats = NULL,
                        .labels = NULL,
                        .indent_mods = NULL,
                        na_str = default_na_str()) {
  x_stats <- s_surv_time(
    df = df, .var = .var, is_event = is_event, control = control
  )
  rng_censor_lwr <- x_stats[["range_censor"]][1]
  rng_censor_upr <- x_stats[["range_censor"]][2]

  # Use method-specific defaults
  fmts <- c(median_ci = "(xx.x, xx.x)", quantiles = "xx.x, xx.x", range = "xx.x to xx.x")
  lbls <- c(median_ci = "95% CI", range = "Range", range_censor = "Range (censored)", range_event = "Range (event)")
  lbls_custom <- .labels
  .formats <- c(.formats, fmts[setdiff(names(fmts), names(.formats))])
  .labels <- c(.labels, lbls[setdiff(names(lbls), names(lbls_custom))])

  # Fill in with formatting defaults if needed
  .stats <- get_stats("surv_time", stats_in = .stats)
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels) %>% labels_use_control(control, lbls_custom)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  x_stats <- x_stats[.stats]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, .df_row, .var)

  cell_fns <- setNames(vector("list", length = length(x_stats)), .labels)
  if ("range" %in% names(x_stats) && ref_fn_censor) {
    if (x_stats[["range"]][1] == rng_censor_lwr && x_stats[["range"]][2] == rng_censor_upr) {
      cell_fns[[.labels[["range"]]]] <- "Censored observations: range minimum & maximum"
    } else if (x_stats[["range"]][1] == rng_censor_lwr) {
      cell_fns[[.labels[["range"]]]] <- "Censored observation: range minimum"
    } else if (x_stats[["range"]][2] == rng_censor_upr) {
      cell_fns[[.labels[["range"]]]] <- "Censored observation: range maximum"
    }
  }

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = na_str,
    .cell_footnotes = cell_fns
  )
}

#' @describeIn survival_time Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `surv_time()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_surv_time()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD") %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'     control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
#' @order 2
surv_time <- function(lyt,
                      vars,
                      is_event,
                      control = control_surv_time(),
                      ref_fn_censor = TRUE,
                      na_str = default_na_str(),
                      nested = TRUE,
                      ...,
                      var_labels = "Time to Event",
                      show_labels = "visible",
                      table_names = vars,
                      .stats = c("median", "median_ci", "quantiles", "range"),
                      .formats = NULL,
                      .labels = NULL,
                      .indent_mods = c(median_ci = 1L)) {
  extra_args <- list(
    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str,
    is_event = is_event, control = control, ref_fn_censor = ref_fn_censor, ...
  )

  analyze(
    lyt = lyt,
    vars = vars,
    afun = a_surv_time,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Kaplan-Meier Plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' From a survival model, a graphic is rendered along with tabulated annotation
#' including the number of patient at risk at given time and the median survival
#' per group.
#'
#' @inheritParams grid::gTree
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param variables (named `list`)\cr variable names. Details are:
#'   * `tte` (`numeric`)\cr variable indicating time-to-event duration values.
#'   * `is_event` (`logical`)\cr event variable. `TRUE` if event, `FALSE` if time to event is censored.
#'   * `arm` (`factor`)\cr the treatment group variable.
#'   * `strat` (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control_surv (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_surv_timepoint()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
#'   * `conf_type` (`string`)\cr `"plain"` (default), `"log"`, `"log-log"` for confidence interval type,
#'     see more in [survival::survfit()]. Note that the option "none" is no longer supported.
#' @param xticks (`numeric`, `number`, or `NULL`)\cr numeric vector of ticks or single number with spacing
#'   between ticks on the x axis. If `NULL` (default), [labeling::extended()] is used to determine
#'   an optimal tick position on the x axis.
#' @param yval (`string`)\cr value of y-axis. Options are `Survival` (default) and `Failure` probability.
#' @param censor_show (`flag`)\cr whether to show censored.
#' @param xlab (`string`)\cr label of x-axis.
#' @param ylab (`string`)\cr label of y-axis.
#' @param ylim (`vector` of `numeric`)\cr vector of length 2 containing lower and upper limits for the y-axis.
#'   If `NULL` (default), the minimum and maximum y-values displayed are used as limits.
#' @param title (`string`)\cr title for plot.
#' @param footnotes (`string`)\cr footnotes for plot.
#' @param col (`character`)\cr lines colors. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lty (`numeric`)\cr line type. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param lwd (`numeric`)\cr line width. Length of a vector should be equal
#'   to number of strata from [survival::survfit()].
#' @param pch (`numeric`, `string`)\cr value or character of points symbol to indicate censored cases.
#' @param size (`numeric`)\cr size of censored point, a class of `unit`.
#' @param max_time (`numeric`)\cr maximum value to show on X axis. Only data values less than or up to
#'   this threshold value will be plotted (defaults to `NULL`).
#' @param font_size (`number`)\cr font size to be used.
#' @param ci_ribbon (`flag`)\cr draw the confidence interval around the Kaplan-Meier curve.
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control outlook of the Kaplan-Meier curve.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of patient at risk
#'   matching the main grid of the Kaplan-Meier curve.
#' @param annot_at_risk_title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk`
#'   table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`.
#' @param annot_surv_med (`flag`)\cr compute and add the annotation table on the Kaplan-Meier curve estimating the
#'   median survival time per group.
#' @param annot_coxph (`flag`)\cr add the annotation table from a [survival::coxph()] model.
#' @param annot_stats (`string`)\cr statistics annotations to add to the plot. Options are
#'   `median` (median survival follow-up time) and `min` (minimum survival follow-up time).
#' @param annot_stats_vlines (`flag`)\cr add vertical lines corresponding to each of the statistics
#'   specified by `annot_stats`. If `annot_stats` is `NULL` no lines will be added.
#' @param control_coxph_pw (`list`)\cr parameters for comparison details, specified by using
#'   the helper function [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1.
#'     Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#'   * `ties` (`string`)\cr method for tie handling. Default is `"efron"`,
#'     can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#' @param ref_group_coxph (`character`)\cr level of arm variable to use as reference group in calculations for
#'   `annot_coxph` table. If `NULL` (default), uses the first level of the arm variable.
#' @param annot_coxph_ref_lbls (`flag`)\cr whether the reference group should be explicitly printed in labels for the
#'   `annot_coxph` table. If `FALSE` (default), only comparison groups will be printed in `annot_coxph` table labels.
#' @param position_coxph (`numeric`)\cr x and y positions for plotting [survival::coxph()] model.
#' @param position_surv_med (`numeric`)\cr x and y positions for plotting annotation table estimating median survival
#'   time per group.
#' @param width_annots (named `list` of `unit`s)\cr a named list of widths for annotation tables with names `surv_med`
#'   (median survival time table) and `coxph` ([survival::coxph()] model table), where each value is the width
#'   (in units) to implement when printing the annotation table.
#'
#' @return A `grob` of class `gTree`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(ggplot2)
#' library(survival)
#' library(grid)
#' library(nestcolor)
#'
#' df <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' variables <- list(tte = "AVAL", is_event = "is_event", arm = "ARMCD")
#'
#' # 1. Example - basic option
#'
#' res <- g_km(df = df, variables = variables)
#' res <- g_km(df = df, variables = variables, yval = "Failure")
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   control_surv = control_surv_timepoint(conf_level = 0.9),
#'   col = c("grey25", "grey50", "grey75"),
#'   annot_at_risk_title = FALSE
#' )
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal())
#' res <- g_km(df = df, variables = variables, ggtheme = theme_minimal(), lty = 1:3)
#' res <- g_km(df = df, variables = variables, max = 2000)
#' res <- g_km(
#'   df = df,
#'   variables = variables,
#'   annot_stats = c("min", "median"),
#'   annot_stats_vlines = TRUE
#' )
#'
#' # 2. Example - Arrange several KM curve on a single graph device
#'
#' # 2.1 Use case: A general graph on the top, a zoom on the bottom.
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE, annot_surv_med = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 1000, newpage = FALSE, annot_surv_med = FALSE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # 2.1 Use case: No annotations on top, annotated graph on bottom
#' grid.newpage()
#' lyt <- grid.layout(nrow = 2, ncol = 1) %>%
#'   viewport(layout = .) %>%
#'   pushViewport()
#'
#' res <- g_km(
#'   df = df, variables = variables, newpage = FALSE,
#'   annot_surv_med = FALSE, annot_at_risk = FALSE,
#'   vp = viewport(layout.pos.row = 1, layout.pos.col = 1)
#' )
#' res <- g_km(
#'   df = df, variables = variables, max = 2000, newpage = FALSE, annot_surv_med = FALSE,
#'   annot_at_risk = TRUE,
#'   ggtheme = theme_dark(),
#'   vp = viewport(layout.pos.row = 2, layout.pos.col = 1)
#' )
#'
#' # Add annotation from a pairwise coxph analysis
#' g_km(
#'   df = df, variables = variables,
#'   annot_coxph = TRUE
#' )
#'
#' # Change widths/sizes of surv_med and coxph annotation tables.
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   annot_coxph = TRUE,
#'   width_annots = list(surv_med = grid::unit(2, "in"), coxph = grid::unit(3, "in"))
#' )
#'
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_coxph = c(0.5, 0.5)
#' )
#'
#' # Change position of the treatment group annotation table.
#' g_km(
#'   df = df, variables = c(variables, list(strat = "SEX")),
#'   font_size = 15,
#'   annot_coxph = TRUE,
#'   control_coxph = control_coxph(pval_method = "wald", ties = "exact", conf_level = 0.99),
#'   position_surv_med = c(1, 0.7)
#' )
#' }
#'
#' @export
g_km <- function(df,
                 variables,
                 control_surv = control_surv_timepoint(),
                 col = NULL,
                 lty = NULL,
                 lwd = .5,
                 censor_show = TRUE,
                 pch = 3,
                 size = 2,
                 max_time = NULL,
                 xticks = NULL,
                 xlab = "Days",
                 yval = c("Survival", "Failure"),
                 ylab = paste(yval, "Probability"),
                 ylim = NULL,
                 title = NULL,
                 footnotes = NULL,
                 draw = TRUE,
                 newpage = TRUE,
                 gp = NULL,
                 vp = NULL,
                 name = NULL,
                 font_size = 12,
                 ci_ribbon = FALSE,
                 ggtheme = nestcolor::theme_nest(),
                 annot_at_risk = TRUE,
                 annot_at_risk_title = TRUE,
                 annot_surv_med = TRUE,
                 annot_coxph = FALSE,
                 annot_stats = NULL,
                 annot_stats_vlines = FALSE,
                 control_coxph_pw = control_coxph(),
                 ref_group_coxph = NULL,
                 annot_coxph_ref_lbls = FALSE,
                 position_coxph = c(-0.03, -0.02),
                 position_surv_med = c(0.95, 0.9),
                 width_annots = list(surv_med = grid::unit(0.3, "npc"), coxph = grid::unit(0.4, "npc"))) {
  checkmate::assert_list(variables)
  checkmate::assert_subset(c("tte", "arm", "is_event"), names(variables))
  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(footnotes, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_subset(annot_stats, c("median", "min"))
  checkmate::assert_logical(annot_stats_vlines)
  checkmate::assert_true(all(sapply(width_annots, grid::is.unit)))

  tte <- variables$tte
  is_event <- variables$is_event
  arm <- variables$arm

  assert_valid_factor(df[[arm]])
  assert_df_with_variables(df, list(tte = tte, is_event = is_event, arm = arm))
  checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(df[[tte]], min.len = 1, any.missing = FALSE)

  armval <- as.character(unique(df[[arm]]))
  if (annot_coxph && length(armval) < 2) {
    stop(paste(
      "When `annot_coxph` = TRUE, `df` must contain at least 2 levels of `variables$arm`",
      "in order to calculate the hazard ratio."
    ))
  } else if (length(armval) > 1) {
    armval <- NULL
  }
  yval <- match.arg(yval)
  formula <- stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", arm))
  fit_km <- survival::survfit(
    formula = formula,
    data = df,
    conf.int = control_surv$conf_level,
    conf.type = control_surv$conf_type
  )
  data_plot <- h_data_plot(
    fit_km = fit_km,
    armval = armval,
    max_time = max_time
  )

  xticks <- h_xticks(data = data_plot, xticks = xticks, max_time = max_time)
  gg <- h_ggkm(
    data = data_plot,
    censor_show = censor_show,
    pch = pch,
    size = size,
    xticks = xticks,
    xlab = xlab,
    yval = yval,
    ylab = ylab,
    ylim = ylim,
    title = title,
    footnotes = footnotes,
    max_time = max_time,
    lwd = lwd,
    lty = lty,
    col = col,
    ggtheme = ggtheme,
    ci_ribbon = ci_ribbon
  )

  if (!is.null(annot_stats)) {
    if ("median" %in% annot_stats) {
      fit_km_all <- survival::survfit(
        formula = stats::as.formula(paste0("survival::Surv(", tte, ", ", is_event, ") ~ ", 1)),
        data = df,
        conf.int = control_surv$conf_level,
        conf.type = control_surv$conf_type
      )
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = stats::median(fit_km_all) + 0.065 * max(data_plot$time),
          y = ifelse(yval == "Survival", 0.62, 0.38),
          label = paste("Median F/U:\n", round(stats::median(fit_km_all), 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = stats::median(fit_km_all), xend = stats::median(fit_km_all), y = -Inf, yend = Inf),
            linetype = 2, col = "darkgray"
          )
      }
    }
    if ("min" %in% annot_stats) {
      min_fu <- min(df[[tte]])
      gg <- gg +
        geom_text(
          size = 8 / ggplot2::.pt, col = 1,
          x = min_fu + max(data_plot$time) * ifelse(yval == "Survival", 0.05, 0.07),
          y = ifelse(yval == "Survival", 1.0, 0.05),
          label = paste("Min. F/U:\n", round(min_fu, 1), tolower(df$AVALU[1]))
        )
      if (annot_stats_vlines) {
        gg <- gg +
          geom_segment(aes(x = min_fu, xend = min_fu, y = Inf, yend = -Inf), linetype = 2, col = "darkgray")
      }
    }
    gg <- gg + ggplot2::guides(fill = ggplot2::guide_legend(override.aes = list(shape = NA, label = "")))
  }

  g_el <- h_decompose_gg(gg)

  if (annot_at_risk) {
    # This is the content of the table that will be below the graph.
    annot_tbl <- summary(fit_km, time = xticks)
    annot_tbl <- if (is.null(fit_km$strata)) {
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = as.factor(armval)
      )
    } else {
      strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
      levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
      data.frame(
        n.risk = annot_tbl$n.risk,
        time = annot_tbl$time,
        strata = annot_tbl$strata
      )
    }

    grobs_patient <- h_grob_tbl_at_risk(
      data = data_plot,
      annot_tbl = annot_tbl,
      xlim = max(max_time, data_plot$time, xticks),
      title = annot_at_risk_title
    )
  }

  if (annot_at_risk || annot_surv_med || annot_coxph) {
    lyt <- h_km_layout(
      data = data_plot, g_el = g_el, title = title, footnotes = footnotes,
      annot_at_risk = annot_at_risk, annot_at_risk_title = annot_at_risk_title
    )
    at_risk_ttl <- as.numeric(annot_at_risk_title)
    ttl_row <- as.numeric(!is.null(title))
    foot_row <- as.numeric(!is.null(footnotes))
    km_grob <- grid::gTree(
      vp = grid::viewport(layout = lyt, height = .95, width = .95),
      children = grid::gList(
        # Title.
        if (ttl_row == 1) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 2),
            children = grid::gList(grid::textGrob(label = title, x = grid::unit(0, "npc"), hjust = 0))
          )
        },

        # The Kaplan - Meier curve (top-right corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$panel)
        ),

        # Survfit summary table (top-right corner).
        if (annot_surv_med) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_median_surv(
              fit_km = fit_km,
              armval = armval,
              x = position_surv_med[1],
              y = position_surv_med[2],
              width = if (!is.null(width_annots[["surv_med"]])) width_annots[["surv_med"]] else grid::unit(0.3, "npc"),
              ttheme = gridExtra::ttheme_default(base_size = font_size)
            )
          )
        },
        if (annot_coxph) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 2),
            children = h_grob_coxph(
              df = df,
              variables = variables,
              control_coxph_pw = control_coxph_pw,
              ref_group_coxph = ref_group_coxph,
              annot_coxph_ref_lbls = annot_coxph_ref_lbls,
              x = position_coxph[1],
              y = position_coxph[2],
              width = if (!is.null(width_annots[["coxph"]])) width_annots[["coxph"]] else grid::unit(0.4, "npc"),
              ttheme = gridExtra::ttheme_default(
                base_size = font_size,
                padding = grid::unit(c(1, .5), "lines"),
                core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
              )
            )
          )
        },

        # Add the y-axis annotation (top-left corner).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 1 + ttl_row, layout.pos.col = 1),
          children = h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis)
        ),

        # Add the x-axis annotation (second row below the Kaplan Meier Curve).
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 2 + ttl_row, layout.pos.col = 2),
          children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
        ),

        # Add the legend.
        grid::gTree(
          vp = grid::viewport(layout.pos.row = 3 + ttl_row, layout.pos.col = 2),
          children = grid::gList(g_el$guide)
        ),

        # Add the table with patient-at-risk numbers.
        if (annot_at_risk && annot_at_risk_title) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + ttl_row, layout.pos.col = 1),
            children = grobs_patient$title
          )
        },
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + at_risk_ttl + ttl_row, layout.pos.col = 2),
            children = grobs_patient$at_risk
          )
        },
        if (annot_at_risk) {
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 4 + at_risk_ttl + ttl_row, layout.pos.col = 1),
            children = grobs_patient$label
          )
        },
        if (annot_at_risk) {
          # Add the x-axis for the table.
          grid::gTree(
            vp = grid::viewport(layout.pos.row = 5 + at_risk_ttl + ttl_row, layout.pos.col = 2),
            children = grid::gList(rbind(g_el$xaxis, g_el$xlab))
          )
        },

        # Footnotes.
        if (foot_row == 1) {
          grid::gTree(
            vp = grid::viewport(
              layout.pos.row = ifelse(annot_at_risk, 6 + at_risk_ttl + ttl_row, 4 + ttl_row),
              layout.pos.col = 2
            ),
            children = grid::gList(grid::textGrob(label = footnotes, x = grid::unit(0, "npc"), hjust = 0))
          )
        }
      )
    )

    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(km_grob)
    )
  } else {
    result <- grid::gTree(
      vp = vp,
      gp = gp,
      name = name,
      children = grid::gList(ggplot2::ggplotGrob(gg))
    )
  }

  if (newpage && draw) grid::grid.newpage()
  if (draw) grid::grid.draw(result)
  invisible(result)
}

#' Helper function: tidy survival fit
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Convert the survival fit data into a data frame designed for plotting
#' within `g_km`.
#'
#' This starts from the [broom::tidy()] result, and then:
#'   * Post-processes the `strata` column into a factor.
#'   * Extends each stratum by an additional first row with time 0 and probability 1 so that
#'     downstream plot lines start at those coordinates.
#'   * Adds a `censor` column.
#'   * Filters the rows before `max_time`.
#'
#' @inheritParams g_km
#' @param fit_km (`survfit`)\cr result of [survival::survfit()].
#' @param armval (`string`)\cr used as strata name when treatment arm variable only has one level. Default is `"All"`.
#'
#' @return A `tibble` with columns `time`, `n.risk`, `n.event`, `n.censor`, `estimate`, `std.error`, `conf.high`,
#'   `conf.low`, `strata`, and `censor`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' # Test with multiple arms
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' # Test with single arm
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS", ARMCD == "ARM B") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot(armval = "ARM B")
#' }
#'
#' @export
h_data_plot <- function(fit_km,
                        armval = "All",
                        max_time = NULL) {
  y <- broom::tidy(fit_km)

  if (!is.null(fit_km$strata)) {
    fit_km_var_level <- strsplit(sub("=", "equals", names(fit_km$strata)), "equals")
    strata_levels <- vapply(fit_km_var_level, FUN = "[", FUN.VALUE = "a", i = 2)
    strata_var_level <- strsplit(sub("=", "equals", y$strata), "equals")
    y$strata <- factor(
      vapply(strata_var_level, FUN = "[", FUN.VALUE = "a", i = 2),
      levels = strata_levels
    )
  } else {
    y$strata <- armval
  }

  y_by_strata <- split(y, y$strata)
  y_by_strata_extended <- lapply(
    y_by_strata,
    FUN = function(tbl) {
      first_row <- tbl[1L, ]
      first_row$time <- 0
      first_row$n.risk <- sum(first_row[, c("n.risk", "n.event", "n.censor")])
      first_row$n.event <- first_row$n.censor <- 0
      first_row$estimate <- first_row$conf.high <- first_row$conf.low <- 1
      first_row$std.error <- 0
      rbind(
        first_row,
        tbl
      )
    }
  )
  y <- do.call(rbind, y_by_strata_extended)

  y$censor <- ifelse(y$n.censor > 0, y$estimate, NA)
  if (!is.null(max_time)) {
    y <- y[y$time <= max(max_time), ]
  }
  y
}

#' Helper function: x tick positions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Calculate the positions of ticks on the x-axis. However, if `xticks` already
#' exists it is kept as is. It is based on the same function `ggplot2` relies on,
#' and is required in the graphic and the patient-at-risk annotation table.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#'
#' @return A vector of positions to use for x-axis ticks on a `ggplot` object.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_data_plot()
#'
#' h_xticks(data)
#' h_xticks(data, xticks = seq(0, 3000, 500))
#' h_xticks(data, xticks = 500)
#' h_xticks(data, xticks = 500, max_time = 6000)
#' h_xticks(data, xticks = c(0, 500), max_time = 300)
#' h_xticks(data, xticks = 500, max_time = 300)
#' }
#'
#' @export
h_xticks <- function(data, xticks = NULL, max_time = NULL) {
  if (is.null(xticks)) {
    if (is.null(max_time)) {
      labeling::extended(range(data$time)[1], range(data$time)[2], m = 5)
    } else {
      labeling::extended(range(data$time)[1], max(range(data$time)[2], max_time), m = 5)
    }
  } else if (checkmate::test_number(xticks)) {
    if (is.null(max_time)) {
      seq(0, max(data$time), xticks)
    } else {
      seq(0, max(data$time, max_time), xticks)
    }
  } else if (is.numeric(xticks)) {
    xticks
  } else {
    stop(
      paste(
        "xticks should be either `NULL`",
        "or a single number (interval between x ticks)",
        "or a numeric vector (position of ticks on the x axis)"
      )
    )
  }
}

#' Helper function: KM plot
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw the Kaplan-Meier plot using `ggplot2`.
#'
#' @inheritParams g_km
#' @param data (`data.frame`)\cr survival data as pre-processed by `h_data_plot`.
#'
#' @return A `ggplot` object.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks,
#'   xlab = "Days",
#'   yval = "Survival",
#'   ylab = "Survival Probability",
#'   title = "Survival"
#' )
#' gg
#' }
#'
#' @export
h_ggkm <- function(data,
                   xticks = NULL,
                   yval = "Survival",
                   censor_show,
                   xlab,
                   ylab,
                   ylim = NULL,
                   title,
                   footnotes = NULL,
                   max_time = NULL,
                   lwd = 1,
                   lty = NULL,
                   pch = 3,
                   size = 2,
                   col = NULL,
                   ci_ribbon = FALSE,
                   ggtheme = nestcolor::theme_nest()) {
  checkmate::assert_numeric(lty, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  if (is.null(ylim)) {
    data_lims <- data
    if (yval == "Failure") data_lims[["estimate"]] <- 1 - data_lims[["estimate"]]
    if (!is.null(max_time)) {
      y_lwr <- min(data_lims[data_lims$time < max_time, ][["estimate"]])
      y_upr <- max(data_lims[data_lims$time < max_time, ][["estimate"]])
    } else {
      y_lwr <- min(data_lims[["estimate"]])
      y_upr <- max(data_lims[["estimate"]])
    }
    ylim <- c(y_lwr, y_upr)
  }
  checkmate::assert_numeric(ylim, finite = TRUE, any.missing = FALSE, len = 2, sorted = TRUE)

  # change estimates of survival to estimates of failure (1 - survival)
  if (yval == "Failure") {
    data$estimate <- 1 - data$estimate
    data[c("conf.high", "conf.low")] <- list(1 - data$conf.low, 1 - data$conf.high)
    data$censor <- 1 - data$censor
  }

  gg <- {
    ggplot2::ggplot(
      data = data,
      mapping = ggplot2::aes(
        x = .data[["time"]],
        y = .data[["estimate"]],
        ymin = .data[["conf.low"]],
        ymax = .data[["conf.high"]],
        color = .data[["strata"]],
        fill = .data[["strata"]]
      )
    ) +
      ggplot2::geom_hline(yintercept = 0)
  }

  if (ci_ribbon) {
    gg <- gg + ggplot2::geom_ribbon(alpha = .3, lty = 0)
  }

  gg <- if (is.null(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd)
  } else if (checkmate::test_number(lty)) {
    gg +
      ggplot2::geom_step(linewidth = lwd, lty = lty)
  } else if (is.numeric(lty)) {
    gg +
      ggplot2::geom_step(mapping = ggplot2::aes(linetype = .data[["strata"]]), linewidth = lwd) +
      ggplot2::scale_linetype_manual(values = lty)
  }

  gg <- gg +
    ggplot2::coord_cartesian(ylim = ylim) +
    ggplot2::labs(x = xlab, y = ylab, title = title, caption = footnotes)

  if (!is.null(col)) {
    gg <- gg +
      ggplot2::scale_color_manual(values = col) +
      ggplot2::scale_fill_manual(values = col)
  }
  if (censor_show) {
    dt <- data[data$n.censor != 0, ]
    dt$censor_lbl <- factor("Censored")

    gg <- gg + ggplot2::geom_point(
      data = dt,
      ggplot2::aes(
        x = .data[["time"]],
        y = .data[["censor"]],
        shape = .data[["censor_lbl"]]
      ),
      size = size,
      show.legend = TRUE,
      inherit.aes = TRUE
    ) +
      ggplot2::scale_shape_manual(name = NULL, values = pch) +
      ggplot2::guides(
        shape = ggplot2::guide_legend(override.aes = list(linetype = NA)),
        fill = ggplot2::guide_legend(override.aes = list(shape = NA))
      )
  }

  if (!is.null(max_time) && !is.null(xticks)) {
    gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, xticks), max(c(xticks, max_time))))
  } else if (!is.null(xticks)) {
    if (max(data$time) <= max(xticks)) {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks, limits = c(min(0, min(xticks)), max(xticks)))
    } else {
      gg <- gg + ggplot2::scale_x_continuous(breaks = xticks)
    }
  } else if (!is.null(max_time)) {
    gg <- gg + ggplot2::scale_x_continuous(limits = c(0, max_time))
  }

  if (!is.null(ggtheme)) {
    gg <- gg + ggtheme
  }

  gg + ggplot2::theme(
    legend.position = "bottom",
    legend.title = ggplot2::element_blank(),
    legend.key.height = unit(0.02, "npc"),
    panel.grid.major.x = ggplot2::element_line(linewidth = 2)
  )
}

#' `ggplot` Decomposition
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The elements composing the `ggplot` are extracted and organized in a `list`.
#'
#' @param gg (`ggplot`)\cr a graphic to decompose.
#'
#' @return A named `list` with elements:
#'   * `panel`: The panel.
#'   * `yaxis`: The y-axis.
#'   * `xaxis`: The x-axis.
#'   * `xlab`: The x-axis label.
#'   * `ylab`: The y-axis label.
#'   * `guide`: The legend.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   yval = "Survival",
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt",
#'   footnotes = "ff"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "red", fill = "gray85", lwd = 5))
#' grid::grid.draw(g_el$panel)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "royalblue", fill = "gray85", lwd = 5))
#' grid::grid.draw(with(g_el, cbind(ylab, yaxis)))
#' }
#'
#' @export
h_decompose_gg <- function(gg) {
  g_el <- ggplot2::ggplotGrob(gg)
  y <- c(
    panel = "panel",
    yaxis = "axis-l",
    xaxis = "axis-b",
    xlab = "xlab-b",
    ylab = "ylab-l",
    guide = "guide"
  )
  lapply(X = y, function(x) gtable::gtable_filter(g_el, x))
}

#' Helper: KM Layout
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares a (5 rows) x (2 cols) layout for the Kaplan-Meier curve.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param g_el (`list` of `gtable`)\cr list as obtained by `h_decompose_gg()`.
#' @param annot_at_risk (`flag`)\cr compute and add the annotation table reporting the number of
#'   patient at risk matching the main grid of the Kaplan-Meier curve.
#'
#' @return A grid layout.
#'
#' @details The layout corresponds to a grid of two columns and five rows of unequal dimensions. Most of the
#'   dimension are fixed, only the curve is flexible and will accommodate with the remaining free space.
#'   * The left column gets the annotation of the `ggplot` (y-axis) and the names of the strata for the patient
#'     at risk tabulation. The main constraint is about the width of the columns which must allow the writing of
#'     the strata name.
#'   * The right column receive the `ggplot`, the legend, the x-axis and the patient at risk table.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#' grid.show.layout(lyt)
#' }
#'
#' @export
h_km_layout <- function(data, g_el, title, footnotes, annot_at_risk = TRUE, annot_at_risk_title = TRUE) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  col_annot_width <- max(
    c(
      as.numeric(grid::convertX(g_el$yaxis$width + g_el$ylab$width, "pt")),
      as.numeric(
        grid::convertX(
          grid::stringWidth(txtlines) + grid::unit(7, "pt"), "pt"
        )
      )
    )
  )

  ttl_row <- as.numeric(!is.null(title))
  foot_row <- as.numeric(!is.null(footnotes))
  no_tbl_ind <- c()
  ht_x <- c()
  ht_units <- c()

  if (ttl_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 2)
    ht_units <- c(ht_units, "lines")
  }

  no_tbl_ind <- c(no_tbl_ind, rep(TRUE, 3), rep(FALSE, 2))
  ht_x <- c(
    ht_x,
    1,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt") + grid::unit(5, "pt"),
    grid::convertX(g_el$guide$heights, "pt") + grid::unit(2, "pt"),
    1,
    nlines + 0.5,
    grid::convertX(with(g_el, xaxis$height + ylab$width), "pt")
  )
  ht_units <- c(
    ht_units,
    "null",
    "pt",
    "pt",
    "lines",
    "lines",
    "pt"
  )

  if (foot_row == 1) {
    no_tbl_ind <- c(no_tbl_ind, TRUE)
    ht_x <- c(ht_x, 1)
    ht_units <- c(ht_units, "lines")
  }
  if (annot_at_risk) {
    no_at_risk_tbl <- rep(TRUE, 6 + ttl_row + foot_row)
    if (!annot_at_risk_title) {
      no_at_risk_tbl[length(no_at_risk_tbl) - 2 - foot_row] <- FALSE
    }
  } else {
    no_at_risk_tbl <- no_tbl_ind
  }

  grid::grid.layout(
    nrow = sum(no_at_risk_tbl), ncol = 2,
    widths = grid::unit(c(col_annot_width, 1), c("pt", "null")),
    heights = grid::unit(
      x = ht_x[no_at_risk_tbl],
      units = ht_units[no_at_risk_tbl]
    )
  )
}

#' Helper: Patient-at-Risk Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Two graphical objects are obtained, one corresponding to row labeling and the second to the table of
#' numbers of patients at risk. If `title = TRUE`, a third object corresponding to the table title is
#' also obtained.
#'
#' @inheritParams g_km
#' @inheritParams h_ggkm
#' @param annot_tbl (`data.frame`)\cr annotation as prepared by [survival::summary.survfit()] which
#'   includes the number of patients at risk at given time points.
#' @param xlim (`numeric`)\cr the maximum value on the x-axis (used to
#'   ensure the at risk table aligns with the KM graph).
#' @param title (`flag`)\cr whether the "Patients at Risk" title should be added above the `annot_at_risk`
#'   table. Has no effect if `annot_at_risk` is `FALSE`. Defaults to `TRUE`.
#'
#' @return A named `list` of two `gTree` objects if `title = FALSE`: `at_risk` and `label`, or three
#'   `gTree` objects if `title = TRUE`: `at_risk`, `label`, and `title`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#'
#' data_plot <- h_data_plot(fit_km = fit_km)
#'
#' xticks <- h_xticks(data = data_plot)
#'
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "tt", footnotes = "ff", yval = "Survival"
#' )
#'
#' # The annotation table reports the patient at risk for a given strata and
#' # time (`xticks`).
#' annot_tbl <- summary(fit_km, time = xticks)
#' if (is.null(fit_km$strata)) {
#'   annot_tbl <- with(annot_tbl, data.frame(n.risk = n.risk, time = time, strata = "All"))
#' } else {
#'   strata_lst <- strsplit(sub("=", "equals", levels(annot_tbl$strata)), "equals")
#'   levels(annot_tbl$strata) <- matrix(unlist(strata_lst), ncol = 2, byrow = TRUE)[, 2]
#'   annot_tbl <- data.frame(
#'     n.risk = annot_tbl$n.risk,
#'     time = annot_tbl$time,
#'     strata = annot_tbl$strata
#'   )
#' }
#'
#' # The annotation table is transformed into a grob.
#' tbl <- h_grob_tbl_at_risk(data = data_plot, annot_tbl = annot_tbl, xlim = max(xticks))
#'
#' # For the representation, the layout is estimated for which the decomposition
#' # of the graphic element is necessary.
#' g_el <- h_decompose_gg(gg)
#' lyt <- h_km_layout(data = data_plot, g_el = g_el, title = "t", footnotes = "f")
#'
#' grid::grid.newpage()
#' pushViewport(viewport(layout = lyt, height = .95, width = .95))
#' grid.rect(gp = grid::gpar(lty = 1, col = "purple", fill = "gray85", lwd = 1))
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 2))
#' grid.rect(gp = grid::gpar(lty = 1, col = "orange", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$at_risk)
#' popViewport()
#' pushViewport(viewport(layout.pos.row = 3:4, layout.pos.col = 1))
#' grid.rect(gp = grid::gpar(lty = 1, col = "green3", fill = "gray85", lwd = 1))
#' grid::grid.draw(tbl$label)
#' }
#'
#' @export
h_grob_tbl_at_risk <- function(data, annot_tbl, xlim, title = TRUE) {
  txtlines <- levels(as.factor(data$strata))
  nlines <- nlevels(as.factor(data$strata))
  y_int <- annot_tbl$time[2] - annot_tbl$time[1]
  annot_tbl <- expand.grid(
    time = seq(0, xlim, y_int),
    strata = unique(annot_tbl$strata)
  ) %>% dplyr::left_join(annot_tbl, by = c("time", "strata"))
  annot_tbl[is.na(annot_tbl)] <- 0
  y_str_unit <- as.numeric(annot_tbl$strata)
  vp_table <- grid::plotViewport(margins = grid::unit(c(0, 0, 0, 0), "lines"))
  if (title) {
    gb_table_title <- grid::gList(
      grid::textGrob(
        label = "Patients at Risk:",
        x = 1,
        y = grid::unit(0.2, "native"),
        gp = grid::gpar(fontface = "bold", fontsize = 10)
      )
    )
  }
  gb_table_left_annot <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = unique(annot_tbl$strata),
      x = 0.5,
      y = grid::unit(
        (max(unique(y_str_unit)) - unique(y_str_unit)) + 0.75,
        "native"
      ),
      gp = grid::gpar(fontface = "italic", fontsize = 10)
    )
  )
  gb_patient_at_risk <- grid::gList(
    grid::rectGrob(
      x = 0, y = grid::unit(c(1:nlines) - 1, "lines"),
      gp = grid::gpar(fill = c("gray95", "gray90"), alpha = 1, col = "white"),
      height = grid::unit(1, "lines"), just = "bottom", hjust = 0
    ),
    grid::textGrob(
      label = annot_tbl$n.risk,
      x = grid::unit(annot_tbl$time, "native"),
      y = grid::unit(
        (max(y_str_unit) - y_str_unit) + .5,
        "line"
      ) # maybe native
    )
  )

  ret <- list(
    at_risk = grid::gList(
      grid::gTree(
        vp = vp_table,
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = c(0, xlim) + c(-0.05, 0.05) * xlim,
              yscale = c(0, nlines + 1),
              extension = c(0.05, 0)
            ),
            children = grid::gList(gb_patient_at_risk)
          )
        )
      )
    ),
    label = grid::gList(
      grid::gTree(
        vp = grid::viewport(width = max(grid::stringWidth(txtlines))),
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = 0:1,
              yscale = c(0, nlines + 1),
              extension = c(0.0, 0)
            ),
            children = grid::gList(gb_table_left_annot)
          )
        )
      )
    )
  )

  if (title) {
    ret[["title"]] <- grid::gList(
      grid::gTree(
        vp = grid::viewport(width = max(grid::stringWidth(txtlines))),
        children = grid::gList(
          grid::gTree(
            vp = grid::dataViewport(
              xscale = 0:1,
              yscale = c(0, 1),
              extension = c(0, 0)
            ),
            children = grid::gList(gb_table_title)
          )
        )
      )
    )
  }

  ret
}

#' Helper Function: Survival Estimations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Transform a survival fit to a table with groups in rows characterized by N, median and confidence interval.
#'
#' @inheritParams h_data_plot
#'
#' @return A summary table with statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "OS")
#' fit <- survfit(
#'   form = Surv(AVAL, 1 - CNSR) ~ ARMCD,
#'   data = adtte
#' )
#' h_tbl_median_surv(fit_km = fit)
#' }
#'
#' @export
h_tbl_median_surv <- function(fit_km, armval = "All") {
  y <- if (is.null(fit_km$strata)) {
    as.data.frame(t(summary(fit_km)$table), row.names = armval)
  } else {
    tbl <- summary(fit_km)$table
    rownames_lst <- strsplit(sub("=", "equals", rownames(tbl)), "equals")
    rownames(tbl) <- matrix(unlist(rownames_lst), ncol = 2, byrow = TRUE)[, 2]
    as.data.frame(tbl)
  }
  conf.int <- summary(fit_km)$conf.int # nolint
  y$records <- round(y$records)
  y$median <- signif(y$median, 4)
  y$`CI` <- paste0(
    "(", signif(y[[paste0(conf.int, "LCL")]], 4), ", ", signif(y[[paste0(conf.int, "UCL")]], 4), ")"
  )
  stats::setNames(
    y[c("records", "median", "CI")],
    c("N", "Median", f_conf_level(conf.int))
  )
}

#' Helper Function: Survival Estimation Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The survival fit is transformed in a grob containing a table with groups in
#' rows characterized by N, median and 95% confidence interval.
#'
#' @inheritParams g_km
#' @inheritParams h_data_plot
#' @param ttheme (`list`)\cr see [gridExtra::ttheme_default()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#' @param width (`unit`)\cr width (as a unit) to use when printing the grob.
#'
#' @return A `grob` of a table containing statistics `N`, `Median`, and `XX% CI` (`XX` taken from `fit_km`).
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .) %>%
#'   h_grob_median_surv() %>%
#'   grid::grid.draw()
#' }
#'
#' @export
h_grob_median_surv <- function(fit_km,
                               armval = "All",
                               x = 0.9,
                               y = 0.9,
                               width = grid::unit(0.3, "npc"),
                               ttheme = gridExtra::ttheme_default()) {
  data <- h_tbl_median_surv(fit_km, armval = armval)

  width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in")
  height <- width * (nrow(data) + 1) / 12

  w <- paste(" ", c(
    rownames(data)[which.max(nchar(rownames(data)))],
    sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))])
  ))
  w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE)

  w_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strwidth(w[4], units = "in")
  })
  f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]])

  h_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strheight(grid::stringHeight("X"), units = "in")
  })
  f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))])

  if (ttheme$core$fg_params$fontsize == 12) {
    ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h)
  }

  gt <- gridExtra::tableGrob(
    d = data,
    theme = ttheme
  )
  gt$widths <- ((w_unit / sum(w_unit)) * width)
  gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt))

  vp <- grid::viewport(
    x = grid::unit(x, "npc") + grid::unit(1, "lines"),
    y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
    height = height,
    width = width,
    just = c("right", "top")
  )

  grid::gList(
    grid::gTree(
      vp = vp,
      children = grid::gList(gt)
    )
  )
}

#' Helper: Grid Object with y-axis Annotation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Build the y-axis annotation from a decomposed `ggplot`.
#'
#' @param ylab (`gtable`)\cr the y-lab as a graphical object derived from a `ggplot`.
#' @param yaxis (`gtable`)\cr the y-axis as a graphical object derived from a `ggplot`.
#'
#' @return a `gTree` object containing the y-axis annotation from a `ggplot`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' fit_km <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   survfit(form = Surv(AVAL, 1 - CNSR) ~ ARMCD, data = .)
#' data_plot <- h_data_plot(fit_km = fit_km)
#' xticks <- h_xticks(data = data_plot)
#' gg <- h_ggkm(
#'   data = data_plot,
#'   censor_show = TRUE,
#'   xticks = xticks, xlab = "Days", ylab = "Survival Probability",
#'   title = "title", footnotes = "footnotes", yval = "Survival"
#' )
#'
#' g_el <- h_decompose_gg(gg)
#'
#' grid::grid.newpage()
#' pvp <- grid::plotViewport(margins = c(5, 4, 2, 20))
#' pushViewport(pvp)
#' grid::grid.draw(h_grob_y_annot(ylab = g_el$ylab, yaxis = g_el$yaxis))
#' grid.rect(gp = grid::gpar(lty = 1, col = "gray35", fill = NA))
#' }
#'
#' @export
h_grob_y_annot <- function(ylab, yaxis) {
  grid::gList(
    grid::gTree(
      vp = grid::viewport(
        width = grid::convertX(yaxis$width + ylab$width, "pt"),
        x = grid::unit(1, "npc"),
        just = "right"
      ),
      children = grid::gList(cbind(ylab, yaxis))
    )
  )
}

#' Helper Function: Pairwise `CoxPH` table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create a `data.frame` of pairwise stratified or unstratified `CoxPH` analysis results.
#'
#' @inheritParams g_km
#'
#' @return A `data.frame` containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#'
#' h_tbl_coxph_pairwise(
#'   df = adtte,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARM"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9)
#' )
#' }
#'
#' @export
h_tbl_coxph_pairwise <- function(df,
                                 variables,
                                 ref_group_coxph = NULL,
                                 control_coxph_pw = control_coxph(),
                                 annot_coxph_ref_lbls = FALSE) {
  assert_df_with_variables(df, variables)
  checkmate::assert_choice(ref_group_coxph, levels(df[[variables$arm]]), null.ok = TRUE)
  checkmate::assert_flag(annot_coxph_ref_lbls)

  arm <- variables$arm
  df[[arm]] <- factor(df[[arm]])

  ref_group <- if (!is.null(ref_group_coxph)) ref_group_coxph else levels(df[[variables$arm]])[1]
  comp_group <- setdiff(levels(df[[arm]]), ref_group)

  results <- Map(function(comp) {
    res <- s_coxph_pairwise(
      df = df[df[[arm]] == comp, , drop = FALSE],
      .ref_group = df[df[[arm]] == ref_group, , drop = FALSE],
      .in_ref_col = FALSE,
      .var = variables$tte,
      is_event = variables$is_event,
      strat = variables$strat,
      control = control_coxph_pw
    )
    res_df <- data.frame(
      hr = format(round(res$hr, 2), nsmall = 2),
      hr_ci = paste0(
        "(", format(round(res$hr_ci[1], 2), nsmall = 2), ", ",
        format(round(res$hr_ci[2], 2), nsmall = 2), ")"
      ),
      pvalue = if (res$pvalue < 0.0001) "<0.0001" else format(round(res$pvalue, 4), 4),
      stringsAsFactors = FALSE
    )
    colnames(res_df) <- c("HR", vapply(res[c("hr_ci", "pvalue")], obj_label, FUN.VALUE = "character"))
    row.names(res_df) <- comp
    res_df
  }, comp_group)
  if (annot_coxph_ref_lbls) names(results) <- paste(comp_group, "vs.", ref_group)

  do.call(rbind, results)
}

#' Helper Function: `CoxPH` Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Grob of `rtable` output from [h_tbl_coxph_pairwise()]
#'
#' @inheritParams h_grob_median_surv
#' @param ... arguments will be passed to [h_tbl_coxph_pairwise()].
#' @param x (`numeric`)\cr a value between 0 and 1 specifying x-location.
#' @param y (`numeric`)\cr a value between 0 and 1 specifying y-location.
#' @param width (`unit`)\cr width (as a unit) to use when printing the grob.
#'
#' @return A `grob` of a table containing statistics `HR`, `XX% CI` (`XX` taken from `control_coxph_pw`),
#'   and `p-value (log-rank)`.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(survival)
#' library(grid)
#'
#' grid::grid.newpage()
#' grid.rect(gp = grid::gpar(lty = 1, col = "pink", fill = "gray85", lwd = 1))
#' data <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#' tbl_grob <- h_grob_coxph(
#'   df = data,
#'   variables = list(tte = "AVAL", is_event = "is_event", arm = "ARMCD"),
#'   control_coxph_pw = control_coxph(conf_level = 0.9), x = 0.5, y = 0.5
#' )
#' grid::grid.draw(tbl_grob)
#' }
#'
#' @export
h_grob_coxph <- function(...,
                         x = 0,
                         y = 0,
                         width = grid::unit(0.4, "npc"),
                         ttheme = gridExtra::ttheme_default(
                           padding = grid::unit(c(1, .5), "lines"),
                           core = list(bg_params = list(fill = c("grey95", "grey90"), alpha = .5))
                         )) {
  data <- h_tbl_coxph_pairwise(...)

  width <- grid::convertUnit(grid::unit(as.numeric(width), grid::unitType(width)), "in")
  height <- width * (nrow(data) + 1) / 12

  w <- paste("    ", c(
    rownames(data)[which.max(nchar(rownames(data)))],
    sapply(names(data), function(x) c(x, data[[x]])[which.max(nchar(c(x, data[[x]])))])
  ))
  w_unit <- grid::convertWidth(grid::stringWidth(w), "in", valueOnly = TRUE)

  w_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strwidth(w[4], units = "in")
  })
  f_size_w <- which.max(w_txt[w_txt < as.numeric((w_unit / sum(w_unit)) * width)[4]])

  h_txt <- sapply(1:64, function(x) {
    graphics::par(ps = x)
    graphics::strheight(grid::stringHeight("X"), units = "in")
  })
  f_size_h <- which.max(h_txt[h_txt < as.numeric(grid::unit(as.numeric(height) / 4, grid::unitType(height)))])

  if (ttheme$core$fg_params$fontsize == 12) {
    ttheme$core$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$colhead$fg_params$fontsize <- min(f_size_w, f_size_h)
    ttheme$rowhead$fg_params$fontsize <- min(f_size_w, f_size_h)
  }

  tryCatch(
    expr = {
      gt <- gridExtra::tableGrob(
        d = data,
        theme = ttheme
      ) # ERROR 'data' must be of a vector type, was 'NULL'
      gt$widths <- ((w_unit / sum(w_unit)) * width)
      gt$heights <- rep(grid::unit(as.numeric(height) / 4, grid::unitType(height)), nrow(gt))
      vp <- grid::viewport(
        x = grid::unit(x, "npc") + grid::unit(1, "lines"),
        y = grid::unit(y, "npc") + grid::unit(1.5, "lines"),
        height = height,
        width = width,
        just = c("left", "bottom")
      )
      grid::gList(
        grid::gTree(
          vp = vp,
          children = grid::gList(gt)
        )
      )
    },
    error = function(w) {
      message(paste(
        "Warning: Cox table will not be displayed as there is",
        "not any level to be compared in the arm variable."
      ))
      return(
        grid::gList(
          grid::gTree(
            vp = NULL,
            children = NULL
          )
        )
      )
    }
  )
}

#' Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("summarize_coxreg")`
#'   to see available statistics for this function.
#'
#' @details Cox models are the most commonly used methods to estimate the magnitude of
#'   the effect in survival analysis. It assumes proportional hazards: the ratio
#'   of the hazards between groups (e.g., two arms) is constant over time.
#'   This ratio is referred to as the "hazard ratio" (HR) and is one of the
#'   most commonly reported metrics to describe the effect size in survival
#'   analysis (NEST Team, 2020).
#'
#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
#'   helper functions, and [tidy_coxreg] for custom tidy methods.
#'
#' @examples
#' library(survival)
#'
#' # Testing dataset [survival::bladder].
#' set.seed(1, kind = "Mersenne-Twister")
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   tibble::tibble(
#'     TIME = stop,
#'     STATUS = event,
#'     ARM = as.factor(rx),
#'     COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
#'     COVAR2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     ) %>% formatters::with_label("Sex (F/M)")
#'   )
#' )
#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#' dta_bladder$STUDYID <- factor("X")
#'
#' u1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#'
#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#'
#' m1_variables <- list(
#'   time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
#' )
#'
#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
#'
#' @name cox_regression
#' @order 1
NULL

#' @describeIn cox_regression Statistics function that transforms results tabulated
#'   from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
#'
#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
#'   function with tidying applied via [broom::tidy()].
#' @param .stats (`character`)\cr the name of statistics to be reported among:
#'   * `n`: number of observations (univariate only)
#'   * `hr`: hazard ratio
#'   * `ci`: confidence interval
#'   * `pval`: p-value of the treatment effect
#'   * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
#'   Defaults to "all". Other options include "var_main" for main effects, `"inter"` for interaction effects,
#'   and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is "all" specific
#'   variables can be selected by specifying `.var_nms`.
#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
#'   this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
#'   variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
#'   is `"var_main"` `.var_nms` should be only the variable name.
#'
#' @return
#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
#'
#' @examples
#' # s_coxreg
#'
#' # Univariate
#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
#' df1 <- broom::tidy(univar_model)
#'
#' s_coxreg(model_df = df1, .stats = "hr")
#'
#' # Univariate with interactions
#' univar_model_inter <- fit_coxreg_univar(
#'   variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
#' )
#' df1_inter <- broom::tidy(univar_model_inter)
#'
#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
#'
#' # Univariate without treatment arm - only "COVAR2" covariate effects
#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
#' df1_covs <- broom::tidy(univar_covs_model)
#'
#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
#'
#' # Multivariate.
#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
#' df2 <- broom::tidy(multivar_model)
#'
#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
#' s_coxreg(
#'   model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
#'   .var_nms = c("COVAR1", "A Covariate Label")
#' )
#'
#' # Multivariate without treatment arm - only "COVAR1" main effect
#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
#' df2_covs <- broom::tidy(multivar_covs_model)
#'
#' s_coxreg(model_df = df2_covs, .stats = "hr")
#'
#' @export
s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
  assert_df_with_variables(model_df, list(term = "term", stat = .stats))
  checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
  model_df$term <- as.character(model_df$term)
  .var_nms <- .var_nms[!is.na(.var_nms)]

  if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
  if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)

  # We need a list with names corresponding to the stats to display of equal length to the list of stats.
  y <- split(model_df, f = model_df$term, drop = FALSE)
  y <- stats::setNames(y, nm = rep(.stats, length(y)))

  if (.which_vars == "var_main") {
    y <- lapply(y, function(x) x[1, ]) # only main effect
  } else if (.which_vars %in% c("inter", "multi_lvl")) {
    y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
  }

  lapply(
    X = y,
    FUN = function(x) {
      z <- as.list(x[[.stats]])
      stats::setNames(z, nm = x$term_label)
    }
  )
}

#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
#'   and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
#'
#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
#' @param na_str (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
#'   avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
#' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed
#'   as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing
#'   from variables, then only Cox model(s) including the covariates will be fitted and the corresponding
#'   effect estimates will be tabulated later.
#'
#' @return
#' * `a_coxreg()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "Label 1",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR1"),
#'   .stats = "n",
#'   .formats = "xx"
#' )
#'
#' a_coxreg(
#'   df = dta_bladder,
#'   labelstr = "",
#'   variables = u1_variables,
#'   .spl_context = list(value = "COVAR2"),
#'   .stats = "pval",
#'   .formats = "xx.xxxx"
#' )
#'
#' @export
a_coxreg <- function(df,
                     labelstr,
                     eff = FALSE,
                     var_main = FALSE,
                     multivar = FALSE,
                     variables,
                     at = list(),
                     control = control_coxreg(),
                     .spl_context,
                     .stats,
                     .formats,
                     .indent_mods = NULL,
                     na_level = lifecycle::deprecated(),
                     na_str = "",
                     cache_env = NULL) {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "a_coxreg(na_level)", "a_coxreg(na_str)")
    na_str <- na_level
  }

  cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
  cov <- tail(.spl_context$value, 1) # current variable/covariate
  var_lbl <- formatters::var_labels(df)[cov] # check for df labels
  if (length(labelstr) > 1) {
    labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none
  } else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) {
    labelstr <- var_lbl
  }
  if (eff || multivar || cov_no_arm) {
    control$interaction <- FALSE
  } else {
    variables$covariates <- cov
    if (var_main) control$interaction <- TRUE
  }

  if (is.null(cache_env[[cov]])) {
    if (!multivar) {
      model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
    } else {
      model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
    }
    cache_env[[cov]] <- model
  } else {
    model <- cache_env[[cov]]
  }
  if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_

  if (cov_no_arm || (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
    multivar <- TRUE
    if (!cov_no_arm) var_main <- TRUE
  }

  vars_coxreg <- list(which_vars = "all", var_nms = NULL)
  if (eff) {
    if (multivar && !var_main) { # multivar treatment level
      var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
    } else { # treatment effect
      vars_coxreg["var_nms"] <- variables$arm
      if (var_main) vars_coxreg["which_vars"] <- "var_main"
    }
  } else {
    if (!multivar || (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
      vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
    } else if (multivar) { # multivar covariate level
      vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
      if (var_main) model[cov, .stats] <- NA_real_
    }
    if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
  }
  var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
  var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
    paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
  } else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) ||
    (multivar && var_main && is.numeric(df[[cov]]))) { # nolint
    labelstr # other main effect labels
  } else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
    "All" # multivar numeric covariate
  } else {
    names(var_vals)
  }
  in_rows(
    .list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods,
    .formats = stats::setNames(rep(.formats, length(var_names)), var_names),
    .format_na_strs = stats::setNames(rep(na_str, length(var_names)), var_names)
  )
}

#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
#'   layout. This function is a wrapper for several `rtables` layouting functions. This function
#'   is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
#'
#' @inheritParams fit_coxreg_univar
#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
#'   univariate Cox regression will run.
#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
#'   for all rows. This should be created during pre-processing if no such variable currently exists.
#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
#'   Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
#'   treatment and covariate sections and the second between different covariates.
#'
#' @return
#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
#'   containing the chosen statistics to the table layout.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
#'   `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
#'   Cox regression models, respectively.
#'
#' @examples
#' # summarize_coxreg
#'
#' result_univar <- basic_table() %>%
#'   summarize_coxreg(variables = u1_variables) %>%
#'   build_table(dta_bladder)
#' result_univar
#'
#' result_univar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = u2_variables,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_univar_covs
#'
#' result_multivar <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m1_variables,
#'     multivar = TRUE,
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar
#'
#' result_multivar_covs <- basic_table() %>%
#'   summarize_coxreg(
#'     variables = m2_variables,
#'     multivar = TRUE,
#'     varlabels = c("Covariate 1", "Covariate 2") # custom labels
#'   ) %>%
#'   build_table(dta_bladder)
#' result_multivar_covs
#'
#' @export
#' @order 2
summarize_coxreg <- function(lyt,
                             variables,
                             control = control_coxreg(),
                             at = list(),
                             multivar = FALSE,
                             common_var = "STUDYID",
                             .stats = c("n", "hr", "ci", "pval", "pval_inter"),
                             .formats = c(
                               n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
                               pval = "x.xxxx | (<0.0001)", pval_inter = "x.xxxx | (<0.0001)"
                             ),
                             varlabels = NULL,
                             .indent_mods = NULL,
                             na_level = lifecycle::deprecated(),
                             na_str = "",
                             .section_div = NA_character_) {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "summarize_coxreg(na_level)", "summarize_coxreg(na_str)")
    na_str <- na_level
  }

  if (multivar && control$interaction) {
    warning(paste(
      "Interactions are not available for multivariate cox regression using summarize_coxreg.",
      "The model will be calculated without interaction effects."
    ))
  }
  if (control$interaction && !"arm" %in% names(variables)) {
    stop("To include interactions please specify 'arm' in variables.")
  }

  .stats <- if (!"arm" %in% names(variables) || multivar) { # only valid statistics
    intersect(c("hr", "ci", "pval"), .stats)
  } else if (control$interaction) {
    intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
  } else {
    intersect(c("n", "hr", "ci", "pval"), .stats)
  }
  stat_labels <- c(
    n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
    pval = "p-value", pval_inter = "Interaction p-value"
  )
  stat_labels <- stat_labels[names(stat_labels) %in% .stats]
  .formats <- .formats[names(.formats) %in% .stats]
  env <- new.env() # create caching environment

  lyt <- lyt %>%
    split_cols_by_multivar(
      vars = rep(common_var, length(.stats)),
      varlabels = stat_labels,
      extra_args = list(
        .stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_str = rep(na_str, length(.stats)),
        cache_env = replicate(length(.stats), list(env))
      )
    )

  if ("arm" %in% names(variables)) { # treatment effect
    lyt <- lyt %>%
      split_rows_by(
        common_var,
        split_label = "Treatment:",
        label_pos = "visible",
        child_labels = "hidden",
        section_div = head(.section_div, 1)
      )
    if (!multivar) {
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar,
            labelstr = ""
          )
        )
    } else { # treatment level effects
      lyt <- lyt %>%
        summarize_row_groups(
          cfun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
          )
        ) %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "")
        )
    }
  }

  if ("covariates" %in% names(variables)) { # covariate main effects
    lyt <- lyt %>%
      split_rows_by_multivar(
        vars = variables$covariates,
        varlabels = varlabels,
        split_label = "Covariate:",
        nested = FALSE,
        child_labels = if (multivar || control$interaction || !"arm" %in% names(variables)) "default" else "hidden",
        section_div = tail(.section_div, 1)
      )
    if (multivar || control$interaction || !"arm" %in% names(variables)) {
      lyt <- lyt %>%
        summarize_row_groups(
          cfun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction
          )
        )
    } else {
      if (!is.null(varlabels)) names(varlabels) <- variables$covariates
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(
            variables = variables, at = at, control = control, multivar = multivar,
            var_main = if (multivar) multivar else control$interaction,
            labelstr = if (is.null(varlabels)) "" else varlabels
          )
        )
    }

    if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
    if (multivar || control$interaction) { # covariate level effects
      lyt <- lyt %>%
        analyze_colvars(
          afun = a_coxreg,
          na_str = na_str,
          extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = ""),
          indent_mod = if (!"arm" %in% names(variables) || multivar) 0L else -1L
        )
    }
  }

  lyt
}

#' Helper Functions for Tabulating Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as median survival
#' time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @inheritParams survival_duration_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_duration_subgroups
NULL

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame of median survival times by arm.
#'
#' @return
#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
#'
#' @examples
#' # Extract median survival time for one group.
#' h_survtime_df(
#'   tte = adtte_f$AVAL,
#'   is_event = adtte_f$is_event,
#'   arm = adtte_f$ARM
#' )
#'
#' @export
h_survtime_df <- function(tte, is_event, arm) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, len = length(tte))

  df_tte <- data.frame(
    tte = tte,
    is_event = is_event,
    stringsAsFactors = FALSE
  )

  # Delete NAs
  non_missing_rows <- stats::complete.cases(df_tte)
  df_tte <- df_tte[non_missing_rows, ]
  arm <- arm[non_missing_rows]

  lst_tte <- split(df_tte, arm)
  lst_results <- Map(function(x, arm) {
    if (nrow(x) > 0) {
      s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
      median_est <- unname(as.numeric(s_surv$median))
      n_events <- sum(x$is_event)
    } else {
      median_est <- NA
      n_events <- NA
    }

    data.frame(
      arm = arm,
      n = nrow(x),
      n_events = n_events,
      median = median_est,
      stringsAsFactors = FALSE
    )
  }, lst_tte, names(lst_tte))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_survival_duration_subgroups summarizes median survival times by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract median survival time for multiple groups.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_survtime_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_survtime_subgroups_df <- function(variables,
                                    data,
                                    groups_lists = list(),
                                    label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)

  assert_df_with_variables(data, variables)

  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
    l_result <- lapply(l_data, function(grp) {
      result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_survival_duration_subgroups helper to prepare a data frame with estimates of
#'   treatment hazard ratio.
#'
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
#'   `conf_level`, `pval` and `pval_label`.
#'
#' @examples
#' # Extract hazard ratio for one group.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
#'
#' # Extract hazard ratio for one group with stratification factor.
#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
#'
#' @export
h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
  checkmate::assert_numeric(tte)
  checkmate::assert_logical(is_event, len = length(tte))
  assert_valid_factor(arm, n.levels = 2, len = length(tte))

  df_tte <- data.frame(tte = tte, is_event = is_event)
  strata_vars <- NULL

  if (!is.null(strata_data)) {
    if (is.data.frame(strata_data)) {
      strata_vars <- names(strata_data)
      checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
      assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
    } else {
      assert_valid_factor(strata_data, len = nrow(df_tte))
      strata_vars <- "strata_data"
    }
    df_tte[strata_vars] <- strata_data
  }

  l_df <- split(df_tte, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Hazard ratio and CI.
    result <- s_coxph_pairwise(
      df = l_df[[2]],
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .var = "tte",
      is_event = "is_event",
      strat = strata_vars,
      control = control
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(as.numeric(result$n_tot)),
      n_tot_events = unname(as.numeric(result$n_tot_events)),
      hr = unname(as.numeric(result$hr)),
      lcl = unname(result$hr_ci[1]),
      ucl = unname(result$hr_ci[2]),
      conf_level = control[["conf_level"]],
      pval = as.numeric(result$pvalue),
      pval_label = obj_label(result$pvalue),
      stringsAsFactors = FALSE
    )
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = nrow(df_tte_complete),
      n_tot_events = sum(df_tte_complete$is_event),
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      n_tot_events = 0L,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = control[["conf_level"]],
      pval = NA,
      pval_label = NA,
      stringsAsFactors = FALSE
    )
  }

  df
}

#' @describeIn h_survival_duration_subgroups summarizes estimates of the treatment hazard ratio
#'   across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
#'   optionally `subgroups` and `strat`. `groups_lists` optionally specifies
#'   groupings for `subgroups` variables.
#'
#' @return
#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
#'   `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Extract hazard ratio for multiple groups.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' # Extract hazard ratio for multiple groups with stratification factors.
#' h_coxph_subgroups_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#'
#' @export
h_coxph_subgroups_df <- function(variables,
                                 data,
                                 groups_lists = list(),
                                 control = control_coxph(),
                                 label_all = "All Patients") {
  checkmate::assert_character(variables$tte)
  checkmate::assert_character(variables$is_event)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_coxph_df(
    tte = data[[variables$tte]],
    is_event = data[[variables$is_event]],
    arm = data[[variables$arm]],
    strata_data = if (is.null(variables$strat)) NULL else data[variables$strat],
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_coxph_df(
        tte = grp$df[[variables$tte]],
        is_event = grp$df[[variables$is_event]],
        arm = grp$df[[variables$arm]],
        strata_data = if (is.null(variables$strat)) NULL else grp$df[variables$strat],
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Split Dataframe by Subgroups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Split a dataframe into a non-nested list of subsets.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @param data (`data.frame`)\cr dataset to split.
#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
#'   Unused levels not present in `data` are dropped. Note that the order in this vector
#'   determines the order in the downstream table.
#'
#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
#'
#' @details Main functionality is to prepare data for use in forest plot layouts.
#'
#' @examples
#' df <- data.frame(
#'   x = c(1:5),
#'   y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
#'   z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
#' )
#' formatters::var_labels(df) <- paste("label for", names(df))
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z")
#' )
#'
#' h_split_by_subgroups(
#'   data = df,
#'   subgroups = c("y", "z"),
#'   groups_lists = list(
#'     y = list("AB" = c("A", "B"), "C" = "C")
#'   )
#' )
#'
#' @export
h_split_by_subgroups <- function(data,
                                 subgroups,
                                 groups_lists = list()) {
  checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(groups_lists, names = "named")
  checkmate::assert_subset(names(groups_lists), subgroups)
  assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))

  data_labels <- unname(formatters::var_labels(data))
  df_subgroups <- data[, subgroups, drop = FALSE]
  subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)

  l_labels <- Map(function(grp_i, name_i) {
    existing_levels <- levels(droplevels(grp_i))
    grp_levels <- if (name_i %in% names(groups_lists)) {
      # For this variable groupings are defined. We check which groups are contained in the data.
      group_list_i <- groups_lists[[name_i]]
      group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
      names(which(group_has_levels))
    } else {
      existing_levels
    }
    df_labels <- data.frame(
      subgroup = grp_levels,
      var = name_i,
      var_label = unname(subgroup_labels[name_i]),
      stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
    )
  }, df_subgroups, names(df_subgroups))

  # Create a dataframe with one row per subgroup.
  df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
  row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
  row_split_var <- factor(row_label, levels = row_label)

  # Create a list of data subsets.
  lapply(split(df_labels, row_split_var), function(row_i) {
    which_row <- if (row_i$var %in% names(groups_lists)) {
      data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
    } else {
      data[[row_i$var]] == row_i$subgroup
    }
    df <- data[which_row, ]
    rownames(df) <- NULL
    formatters::var_labels(df) <- data_labels

    list(
      df = df,
      df_labels = data.frame(row_i, row.names = NULL)
    )
  })
}

#' Helper Function for Deriving Analysis Datasets for `LBT13` and `LBT14`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function that merges `ADSL` and `ADLB` datasets so that missing lab test records are inserted in the
#' output dataset. Remember that `na_level` must match the needed pre-processing
#' done with [df_explicit_na()] to have the desired output.
#'
#' @param adsl (`data.frame`)\cr `ADSL` dataframe.
#' @param adlb (`data.frame`)\cr `ADLB` dataframe.
#' @param worst_flag (named `vector`)\cr Worst post-baseline lab flag variable.
#' @param by_visit (`logical`)\cr defaults to `FALSE` to generate worst grade per patient.
#'   If worst grade per patient per visit is specified for `worst_flag`, then
#'   `by_visit` should be `TRUE` to generate worst grade patient per visit.
#' @param no_fillin_visits (named `character`)\cr Visits that are not considered for post-baseline worst toxicity
#'   grade. Defaults to `c("SCREENING", "BASELINE")`.
#'
#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
#'   `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
#'   `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
#'
#' @details In the result data missing records will be created for the following situations:
#'   * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
#'   * Patients who do not have any post-baseline lab values.
#'   * Patients without any post-baseline values flagged as the worst.
#'
#' @examples
#' # `h_adsl_adlb_merge_using_worst_flag`
#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRHIFL" = "Y")
#' )
#'
#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
#'   tern_ex_adsl,
#'   tern_ex_adlb,
#'   worst_flag = c("WGRLOVFL" = "Y"),
#'   by_visit = TRUE
#' )
#'
#' @export
h_adsl_adlb_merge_using_worst_flag <- function(adsl, # nolint
                                               adlb,
                                               worst_flag = c("WGRHIFL" = "Y"),
                                               by_visit = FALSE,
                                               no_fillin_visits = c("SCREENING", "BASELINE")) {
  col_names <- names(worst_flag)
  filter_values <- worst_flag

  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )

  position_satisfy_filters <- Reduce(intersect, temp)

  adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb))
  columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR")

  adlb_f <- adlb[position_satisfy_filters, ] %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits)
  adlb_f <- adlb_f[, columns_from_adlb]

  avisits_grid <- adlb %>%
    dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>%
    dplyr::pull(.data[["AVISIT"]]) %>%
    unique()

  if (by_visit) {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      AVISIT = avisits_grid,
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>%
      dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>%
      dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM")

    adlb_btoxgr <- adlb %>%
      dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>%
      unique() %>%
      dplyr::rename("BTOXGR_MAP" = "BTOXGR")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )
    adlb_out <- adlb_out %>%
      dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>%
      dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>%
      dplyr::select(-"BTOXGR_MAP")

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  } else {
    adsl_lb <- expand.grid(
      USUBJID = unique(adsl$USUBJID),
      PARAMCD = unique(adlb$PARAMCD)
    )

    adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")

    adsl1 <- adsl[, adsl_adlb_common_columns]
    adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")

    by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM")

    adlb_out <- merge(
      adlb_f,
      adsl_lb,
      by = by_variables_from_adlb,
      all = TRUE,
      sort = FALSE
    )

    adlb_var_labels <- c(
      formatters::var_labels(adlb[by_variables_from_adlb]),
      formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
      formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
    )
  }

  adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR)
  adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR)

  formatters::var_labels(adlb_out) <- adlb_var_labels

  adlb_out
}

# Utility functions to cooperate with {rtables} package

#' Convert Table into Matrix of Strings
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to use mostly within tests. `with_spaces`parameter allows
#' to test not only for content but also indentation and table structure.
#' `print_txt_to_copy` instead facilitate the testing development by returning a well
#' formatted text that needs only to be copied and pasted in the expected output.
#'
#' @inheritParams formatters::toString
#' @param x `rtables` table.
#' @param with_spaces (`logical`)\cr should the tested table keep the indentation and other relevant spaces?
#' @param print_txt_to_copy  (`logical`)\cr utility to have a way to copy the input table directly
#'   into the expected variable instead of copying it too manually.
#'
#' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the
#'   table will be printed to console, ready to be copied as a expected value.
#'
#' @examples
#' tbl <- basic_table() %>%
#'   split_rows_by("SEX") %>%
#'   split_cols_by("ARM") %>%
#'   analyze("AGE") %>%
#'   build_table(tern_ex_adsl)
#'
#' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2))
#'
#' @export
to_string_matrix <- function(x, widths = NULL, max_width = NULL,
                             hsep = formatters::default_hsep(),
                             with_spaces = TRUE, print_txt_to_copy = FALSE) {
  checkmate::assert_flag(with_spaces)
  checkmate::assert_flag(print_txt_to_copy)
  checkmate::assert_int(max_width, null.ok = TRUE)

  if (inherits(x, "MatrixPrintForm")) {
    tx <- x
  } else {
    tx <- matrix_form(x, TRUE)
  }

  tf_wrap <- FALSE
  if (!is.null(max_width)) {
    tf_wrap <- TRUE
  }

  # Producing the matrix to test
  if (with_spaces) {
    out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\\n")[[1]]
  } else {
    out <- tx$string
  }

  # Printing to console formatted output that needs to be copied in "expected"
  if (print_txt_to_copy) {
    out_tmp <- out
    if (!with_spaces) {
      out_tmp <- apply(out, 1, paste0, collapse = '", "')
    }
    cat(paste0('c(\n  "', paste0(out_tmp, collapse = '",\n  "'), '"\n)'))
  }

  # Return values
  return(out)
}

#' Blank for Missing Input
#'
#' Helper function to use in tabulating model results.
#'
#' @param x (`vector`)\cr input for a cell.
#'
#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
#'   the unlisted version of `x`.
#'
#' @keywords internal
unlist_and_blank_na <- function(x) {
  unl <- unlist(x)
  if (all(is.na(unl))) {
    character()
  } else {
    unl
  }
}

#' Constructor for Content Functions given Data Frame with Flag Input
#'
#' This can be useful for tabulating model results.
#'
#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
#'   content function.
#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
#' @param format (`string`)\cr `rtables` format to use.
#'
#' @return A content function which gives `df$analysis_var` at the row identified by
#'   `.df_row$flag` in the given format.
#'
#' @keywords internal
cfun_by_flag <- function(analysis_var,
                         flag_var,
                         format = "xx",
                         .indent_mods = NULL) {
  checkmate::assert_string(analysis_var)
  checkmate::assert_string(flag_var)
  function(df, labelstr) {
    row_index <- which(df[[flag_var]])
    x <- unlist_and_blank_na(df[[analysis_var]][row_index])
    formatters::with_label(
      rcell(x, format = format, indent_mod = .indent_mods),
      labelstr
    )
  }
}

#' Content Row Function to Add Row Total to Labels
#'
#' This takes the label of the latest row split level and adds the row total from `df` in parentheses.
#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than
#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`.
#'
#' @inheritParams argument_convention
#'
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
#'
#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
#'   the former is already split by columns and will refer to the first column of the data only.
#'
#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from
#'   `alt_counts_df` instead of `df`.
#'
#' @keywords internal
c_label_n <- function(df,
                      labelstr,
                      .N_row) { # nolint
  label <- paste0(labelstr, " (N=", .N_row, ")")
  in_rows(
    .list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)),
    .formats = c(row_count = function(x, ...) "")
  )
}

#' Content Row Function to Add `alt_counts_df` Row Total to Labels
#'
#' This takes the label of the latest row split level and adds the row total from `alt_counts_df`
#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df`
#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`.
#'
#' @inheritParams argument_convention
#'
#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
#'
#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead
#'   of `alt_counts_df`.
#'
#' @keywords internal
c_label_n_alt <- function(df,
                          labelstr,
                          .alt_df_row) {
  N_row_alt <- nrow(.alt_df_row) # nolint
  label <- paste0(labelstr, " (N=", N_row_alt, ")")
  in_rows(
    .list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)),
    .formats = c(row_count = function(x, ...) "")
  )
}

#' Layout Creating Function to Add Row Total Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
#'  is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`)
#'   or from `df` (`FALSE`). Defaults to `FALSE`.
#'
#' @return A modified layout where the latest row split labels now have the row-wise
#'   total counts (i.e. without column-based subsetting) attached in parentheses.
#'
#' @note Row count values are contained in these row count rows but are not displayed
#'   so that they are not considered zero rows by default when pruning.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'   add_rowcounts() %>%
#'   analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
#'   build_table(DM)
#'
#' @export
add_rowcounts <- function(lyt, alt_counts = FALSE) {
  summarize_row_groups(
    lyt,
    cfun = if (alt_counts) c_label_n_alt else c_label_n
  )
}

#' Obtain Column Indices
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to extract column indices from a `VTableTree` for a given
#' vector of column names.
#'
#' @param table_tree (`VTableTree`)\cr table to extract the indices from.
#' @param col_names (`character`)\cr vector of column names.
#'
#' @return A vector of column indices.
#'
#' @export
h_col_indices <- function(table_tree, col_names) {
  checkmate::assert_class(table_tree, "VTableNodeInfo")
  checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
  match(col_names, names(attr(col_info(table_tree), "cextra_args")))
}

#' Labels or Names of List Elements
#'
#' Internal helper function for working with nested statistic function results which typically
#' don't have labels but names that we can use.
#'
#' @param x a list.
#'
#' @return A `character` vector with the labels or names for the list elements.
#'
#' @keywords internal
labels_or_names <- function(x) {
  checkmate::assert_multi_class(x, c("data.frame", "list"))
  labs <- sapply(x, obj_label)
  nams <- rlang::names2(x)
  label_is_null <- sapply(labs, is.null)
  result <- unlist(ifelse(label_is_null, nams, labs))
  return(result)
}

#' Convert to `rtable`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a new generic function to convert objects to `rtable` tables.
#'
#' @param x the object which should be converted to an `rtable`.
#' @param ... additional arguments for methods.
#'
#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
#'
#' @export
as.rtable <- function(x, ...) { # nolint
  UseMethod("as.rtable", x)
}

#' @describeIn as.rtable method for converting `data.frame` that contain numeric columns to `rtable`.
#'
#' @param format the format which should be used for the columns.
#'
#' @method as.rtable data.frame
#'
#' @examples
#' x <- data.frame(
#'   a = 1:10,
#'   b = rnorm(10)
#' )
#' as.rtable(x)
#'
#' @export
as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
  checkmate::assert_numeric(unlist(x))
  do.call(
    rtable,
    c(
      list(
        header = labels_or_names(x),
        format = format
      ),
      Map(
        function(row, row_name) {
          do.call(
            rrow,
            c(as.list(unname(row)),
              row.name = row_name
            )
          )
        },
        row = as.data.frame(t(x)),
        row_name = rownames(x)
      )
    )
  )
}

#' Split parameters
#'
#' @description `r lifecycle::badge("stable")`
#'
#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
#' specific analysis function.
#'
#' @param param (`vector`)\cr the parameter to be split.
#' @param value (`vector`)\cr the value used to split.
#' @param f (`list` of `vectors`)\cr the reference to make the split
#'
#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
#'
#' @examples
#' f <- list(
#'   surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
#'   surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
#' )
#'
#' .stats <- c("pt_at_risk", "rate_diff")
#' h_split_param(.stats, .stats, f = f)
#'
#' # $surv
#' # [1] "pt_at_risk"
#' #
#' # $surv_diff
#' # [1] "rate_diff"
#'
#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
#' h_split_param(.formats, names(.formats), f = f)
#'
#' # $surv
#' # pt_at_risk event_free_rate
#' # "xx"           "xxx"
#' #
#' # $surv_diff
#' # NULL
#'
#' @export
h_split_param <- function(param,
                          value,
                          f) {
  y <- lapply(f, function(x) param[value %in% x])
  lapply(y, function(x) if (length(x) == 0) NULL else x)
}

#' Get Selected Statistics Names
#'
#' Helper function to be used for creating `afun`.
#'
#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
#'   in this context that all default statistics should be used.
#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
#'
#' @return A `character` vector with the selected statistics.
#'
#' @keywords internal
afun_selected_stats <- function(.stats, all_stats) {
  checkmate::assert_character(.stats, null.ok = TRUE)
  checkmate::assert_character(all_stats)
  if (is.null(.stats)) {
    all_stats
  } else {
    intersect(.stats, all_stats)
  }
}

#' Add Variable Labels to Top Left Corner in Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper layout creating function to just append the variable labels of a given variables vector
#' from a given dataset in the top left corner. If a variable label is not found then the
#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
#'
#' @inheritParams argument_convention
#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
#' @param indent (`integer`)\cr non-negative number of nested indent space, default to 0L which means no indent.
#'   1L means two spaces indent, 2L means four spaces indent and so on.
#'
#' @return A modified layout with the new variable label(s) added to the top-left material.
#'
#' @note This is not an optimal implementation of course, since we are using here the data set
#'   itself during the layout creation. When we have a more mature `rtables` implementation then
#'   this will also be improved or not necessary anymore.
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   split_rows_by("SEX") %>%
#'   append_varlabels(DM, "SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, "AGE", indent = 1)
#' build_table(lyt, DM)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("SEX") %>%
#'   analyze("AGE", afun = mean) %>%
#'   append_varlabels(DM, c("SEX", "AGE"))
#' build_table(lyt, DM)
#'
#' @export
append_varlabels <- function(lyt, df, vars, indent = 0L) {
  if (checkmate::test_flag(indent)) {
    warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
    indent <- as.integer(indent)
  }

  checkmate::assert_data_frame(df)
  checkmate::assert_character(vars)
  checkmate::assert_count(indent)

  lab <- formatters::var_labels(df[vars], fill = TRUE)
  lab <- paste(lab, collapse = " / ")
  space <- paste(rep(" ", indent * 2), collapse = "")
  lab <- paste0(space, lab)

  append_topleft(lyt, lab)
}

#' Default string replacement for `NA` values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The default string used to represent `NA` values. This value is used as the default
#' value for the `na_str` argument throughout the `tern` package, and printed in place
#' of `NA` values in output tables. If not specified for each `tern` function by the user
#' via the `na_str` argument, or in the R environment options via [set_default_na_str()],
#' then `NA` is used.
#'
#' @param na_str (`string`)\cr Single string value to set in the R environment options as
#'   the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the
#'   current value set in the R environment (defaults to `NULL` if not set).
#'
#' @name default_na_str
NULL

#' @describeIn default_na_str Getter for default `NA` value replacement string.
#'
#' @return
#' * `default_na_str` returns the current value if an R environment option has been set
#'   for `"tern_default_na_str"`, or `NA_character_` otherwise.
#'
#' @examples
#' # Default settings
#' default_na_str()
#' getOption("tern_default_na_str")
#'
#' # Set custom value
#' set_default_na_str("<Missing>")
#'
#' # Settings after value has been set
#' default_na_str()
#' getOption("tern_default_na_str")
#'
#' @export
default_na_str <- function() {
  getOption("tern_default_na_str", default = NA_character_)
}

#' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the
#'   option `"tern_default_na_str"` within the R environment.
#'
#' @return
#' * `set_default_na_str` has no return value.
#'
#' @export
set_default_na_str <- function(na_str) {
  checkmate::assert_character(na_str, len = 1, null.ok = TRUE)
  options("tern_default_na_str" = na_str)
}

#' Helper Functions for Tabulating Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that tabulate in a data frame statistics such as response rate
#' and odds ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param arm (`factor`)\cr the treatment group variable.
#'
#' @details Main functionality is to prepare data for use in a layout-creating function.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_subgroups
NULL

#' @describeIn h_response_subgroups helper to prepare a data frame of binary responses by arm.
#'
#' @return
#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
#'
#' @examples
#' h_proportion_df(
#'   c(TRUE, FALSE, FALSE),
#'   arm = factor(c("A", "A", "B"), levels = c("A", "B"))
#' )
#'
#' @export
h_proportion_df <- function(rsp, arm) {
  checkmate::assert_logical(rsp)
  assert_valid_factor(arm, len = length(rsp))
  non_missing_rsp <- !is.na(rsp)
  rsp <- rsp[non_missing_rsp]
  arm <- arm[non_missing_rsp]

  lst_rsp <- split(rsp, arm)
  lst_results <- Map(function(x, arm) {
    if (length(x) > 0) {
      s_prop <- s_proportion(df = x)
      data.frame(
        arm = arm,
        n = length(x),
        n_rsp = unname(s_prop$n_prop[1]),
        prop = unname(s_prop$n_prop[2]),
        stringsAsFactors = FALSE
      )
    } else {
      data.frame(
        arm = arm,
        n = 0L,
        n_rsp = NA,
        prop = NA,
        stringsAsFactors = FALSE
      )
    }
  }, lst_rsp, names(lst_rsp))

  df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
  df$arm <- factor(df$arm, levels = levels(arm))
  df
}

#' @describeIn h_response_subgroups summarizes proportion of binary responses by arm and across subgroups
#'    in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
#'    requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
#'    groupings for `subgroups` variables.
#'
#' @return
#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
#'   `var`, `var_label`, and `row_type`.
#'
#' @examples
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Define groupings for BMRKR2 levels.
#' h_proportion_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_proportion_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  # Add All Patients.
  result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]])
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  # Add Subgroups.
  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]])
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn h_response_subgroups helper to prepare a data frame with estimates of
#'   the odds ratio between a treatment and a control arm.
#'
#' @inheritParams response_subgroups
#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
#'
#' @return
#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
#'   optionally `pval` and `pval_label`.
#'
#' @examples
#' # Unstratatified analysis.
#' h_odds_ratio_df(
#'   c(TRUE, FALSE, FALSE, TRUE),
#'   arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#' )
#'
#' # Include p-value.
#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
#'
#' # Stratatified analysis.
#' h_odds_ratio_df(
#'   rsp = adrs_f$rsp,
#'   arm = adrs_f$ARM,
#'   strata_data = adrs_f[, c("STRATA1", "STRATA2")],
#'   method = "cmh"
#' )
#'
#' @export
h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
  assert_valid_factor(arm, n.levels = 2, len = length(rsp))

  df_rsp <- data.frame(
    rsp = rsp,
    arm = arm
  )

  if (!is.null(strata_data)) {
    strata_var <- interaction(strata_data, drop = TRUE)
    strata_name <- "strata"

    assert_valid_factor(strata_var, len = nrow(df_rsp))

    df_rsp[[strata_name]] <- strata_var
  } else {
    strata_name <- NULL
  }

  l_df <- split(df_rsp, arm)

  if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
    # Odds ratio and CI.
    result_odds_ratio <- s_odds_ratio(
      df = l_df[[2]],
      .var = "rsp",
      .ref_group = l_df[[1]],
      .in_ref_col = FALSE,
      .df_row = df_rsp,
      variables = list(arm = "arm", strata = strata_name),
      conf_level = conf_level
    )

    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = unname(result_odds_ratio$n_tot["n_tot"]),
      or = unname(result_odds_ratio$or_ci["est"]),
      lcl = unname(result_odds_ratio$or_ci["lcl"]),
      ucl = unname(result_odds_ratio$or_ci["ucl"]),
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      # Test for difference.
      result_test <- s_test_proportion_diff(
        df = l_df[[2]],
        .var = "rsp",
        .ref_group = l_df[[1]],
        .in_ref_col = FALSE,
        variables = list(strata = strata_name),
        method = method
      )

      df$pval <- as.numeric(result_test$pval)
      df$pval_label <- obj_label(result_test$pval)
    }

    # In those cases cannot go through the model so will obtain n_tot from data.
  } else if (
    (nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) ||
      (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
  ) {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = sum(stats::complete.cases(df_rsp)),
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )
    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  } else {
    df <- data.frame(
      # Dummy column needed downstream to create a nested header.
      arm = " ",
      n_tot = 0L,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      stringsAsFactors = FALSE
    )

    if (!is.null(method)) {
      df$pval <- NA
      df$pval_label <- NA
    }
  }

  df
}

#' @describeIn h_response_subgroups summarizes estimates of the odds ratio between a treatment and a control
#'   arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
#'   `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
#'   and `strat`. `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @return
#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
#'   `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @examples
#' # Unstratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#'
#' # Stratified analysis.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2"),
#'     strat = c("STRATA1", "STRATA2")
#'   ),
#'   data = adrs_f
#' )
#'
#' # Define groupings of BMRKR2 levels.
#' h_odds_ratio_subgroups_df(
#'   variables = list(
#'     rsp = "rsp",
#'     arm = "ARM",
#'     subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#'
#' @export
h_odds_ratio_subgroups_df <- function(variables,
                                      data,
                                      groups_lists = list(),
                                      conf_level = 0.95,
                                      method = NULL,
                                      label_all = "All Patients") {
  checkmate::assert_character(variables$rsp)
  checkmate::assert_character(variables$arm)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_character(variables$strat, null.ok = TRUE)
  assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  assert_df_with_variables(data, variables)
  checkmate::assert_string(label_all)

  strata_data <- if (is.null(variables$strat)) {
    NULL
  } else {
    data[, variables$strat, drop = FALSE]
  }

  # Add All Patients.
  result_all <- h_odds_ratio_df(
    rsp = data[[variables$rsp]],
    arm = data[[variables$arm]],
    strata_data = strata_data,
    conf_level = conf_level,
    method = method
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"

  if (is.null(variables$subgroups)) {
    result_all
  } else {
    l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)

    l_result <- lapply(l_data, function(grp) {
      grp_strata_data <- if (is.null(variables$strat)) {
        NULL
      } else {
        grp$df[, variables$strat, drop = FALSE]
      }

      result <- h_odds_ratio_df(
        rsp = grp$df[[variables$rsp]],
        arm = grp$df[[variables$arm]],
        strata_data = grp_strata_data,
        conf_level = conf_level,
        method = method
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })

    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"

    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Re-implemented [range()] Default S3 method for numerical objects
#'
#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
#' without any warnings.
#'
#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
#'
#' @return A 2-element vector of class `numeric`.
#'
#' @keywords internal
range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint

  checkmate::assert_numeric(x)

  if (finite) {
    x <- x[is.finite(x)] # removes NAs too
  } else if (na.rm) {
    x <- x[!is.na(x)]
  }

  if (length(x) == 0) {
    rval <- c(NA, NA)
    mode(rval) <- typeof(x)
  } else {
    rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
  }

  return(rval)
}

#' Utility function to create label for confidence interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#'
#' @return A `string`.
#'
#' @export
f_conf_level <- function(conf_level) {
  assert_proportion_value(conf_level)
  paste0(conf_level * 100, "% CI")
}

#' Utility function to create label for p-value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
#'
#' @return A `string`.
#'
#' @export
f_pval <- function(test_mean) {
  checkmate::assert_numeric(test_mean, len = 1)
  paste0("p-value (H0: mean = ", test_mean, ")")
}

#' Utility function to return a named list of covariate names.
#'
#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
#'   `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'
#' @return A named `list` of `character` vector.
#'
#' @keywords internal
get_covariates <- function(covariates) {
  checkmate::assert_character(covariates)
  cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
  stats::setNames(as.list(cov_vars), cov_vars)
}

#' Replicate Entries of a Vector if Required
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Replicate entries of a vector if required.
#'
#' @inheritParams argument_convention
#' @param n (`count`)\cr how many entries we need.
#'
#' @return `x` if it has the required length already or is `NULL`,
#'   otherwise if it is scalar the replicated version of it with `n` entries.
#'
#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
#'
#' @export
to_n <- function(x, n) {
  if (is.null(x)) {
    NULL
  } else if (length(x) == 1) {
    rep(x, n)
  } else if (length(x) == n) {
    x
  } else {
    stop("dimension mismatch")
  }
}

#' Check Element Dimension
#'
#' Checks if the elements in `...` have the same dimension.
#'
#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
#'
#' @return A `logical` value.
#'
#' @keywords internal
check_same_n <- function(..., omit_null = TRUE) {
  dots <- list(...)

  n_list <- Map(
    function(x, name) {
      if (is.null(x)) {
        if (omit_null) {
          NA_integer_
        } else {
          stop("arg", name, "is not supposed to be NULL")
        }
      } else if (is.data.frame(x)) {
        nrow(x)
      } else if (is.atomic(x)) {
        length(x)
      } else {
        stop("data structure for ", name, "is currently not supported")
      }
    },
    dots, names(dots)
  )

  n <- stats::na.omit(unlist(n_list))

  if (length(unique(n)) > 1) {
    sel <- which(n != n[1])
    stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
  }

  TRUE
}

#' Make Names Without Dots
#'
#' @param nams (`character`)\cr vector of original names.
#'
#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
#'
#' @keywords internal
make_names <- function(nams) {
  orig <- make.names(nams)
  gsub(".", "", x = orig, fixed = TRUE)
}

#' Conversion of Months to Days
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Conversion of Months to Days. This is an approximative calculation because it
#' considers each month as having an average of 30.4375 days.
#'
#' @param x (`numeric`)\cr time in months.
#'
#' @return A `numeric` vector with the time in days.
#'
#' @examples
#' x <- c(13.25, 8.15, 1, 2.834)
#' month2day(x)
#'
#' @export
month2day <- function(x) {
  checkmate::assert_numeric(x)
  x * 30.4375
}

#' Conversion of Days to Months
#'
#' @param x (`numeric`)\cr time in days.
#'
#' @return A `numeric` vector with the time in months.
#'
#' @examples
#' x <- c(403, 248, 30, 86)
#' day2month(x)
#'
#' @export
day2month <- function(x) {
  checkmate::assert_numeric(x)
  x / 30.4375
}

#' Return an empty numeric if all elements are `NA`.
#'
#' @param x (`numeric`)\cr vector.
#'
#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
#'
#' @examples
#' x <- c(NA, NA, NA)
#' # Internal function - empty_vector_if_na
#' @keywords internal
empty_vector_if_na <- function(x) {
  if (all(is.na(x))) {
    numeric()
  } else {
    x
  }
}

#' Combine Two Vectors Element Wise
#'
#' @param x (`vector`)\cr first vector to combine.
#' @param y (`vector`)\cr second vector to combine.
#'
#' @return A `list` where each element combines corresponding elements of `x` and `y`.
#'
#' @examples
#' combine_vectors(1:3, 4:6)
#'
#' @export
combine_vectors <- function(x, y) {
  checkmate::assert_vector(x)
  checkmate::assert_vector(y, len = length(x))

  result <- lapply(as.data.frame(rbind(x, y)), `c`)
  names(result) <- NULL
  result
}

#' Extract Elements by Name
#'
#' This utility function extracts elements from a vector `x` by `names`.
#' Differences to the standard `[` function are:
#'
#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
#'   elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
#'
#' @param x (named `vector`)\cr where to extract named elements from.
#' @param names (`character`)\cr vector of names to extract.
#'
#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
#'
#' @keywords internal
extract_by_name <- function(x, names) {
  if (is.null(x)) {
    return(NULL)
  }
  checkmate::assert_named(x)
  checkmate::assert_character(names)
  which_extract <- intersect(names(x), names)
  if (length(which_extract) > 0) {
    x[which_extract]
  } else {
    NULL
  }
}

#' Labels for Adverse Event Baskets
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param aesi (`character`)\cr with standardized `MedDRA` query name (e.g. `SMQzzNAM`) or customized query
#'   name (e.g. `CQzzNAM`).
#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
#'
#' @return A `string` with the standard label for the `AE` basket.
#'
#' @examples
#' adae <- tern_ex_adae
#'
#' # Standardized query label includes scope.
#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
#'
#' # Customized query label.
#' aesi_label(adae$CQ01NAM)
#'
#' @export
aesi_label <- function(aesi, scope = NULL) {
  checkmate::assert_character(aesi)
  checkmate::assert_character(scope, null.ok = TRUE)
  aesi_label <- obj_label(aesi)
  aesi <- sas_na(aesi)
  aesi <- unique(aesi)[!is.na(unique(aesi))]

  lbl <- if (length(aesi) == 1 && !is.null(scope)) {
    scope <- sas_na(scope)
    scope <- unique(scope)[!is.na(unique(scope))]
    checkmate::assert_string(scope)
    paste0(aesi, " (", scope, ")")
  } else if (length(aesi) == 1 && is.null(scope)) {
    aesi
  } else {
    aesi_label
  }

  lbl
}

#' Indicate Study Arm Variable in Formula
#'
#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
#'
#' @param x arm information
#'
#' @return `x`
#'
#' @keywords internal
study_arm <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

#' Smooth Function with Optional Grouping
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param x (`character`)\cr value with x column name.
#' @param y (`character`)\cr value with y column name.
#' @param groups (`character`)\cr vector with optional grouping variables names.
#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
#'
#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
#'   optional `groups` variables formatted as `factor` type.
#'
#' @export
get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
  checkmate::assert_data_frame(df)
  df_cols <- colnames(df)
  checkmate::assert_string(x)
  checkmate::assert_subset(x, df_cols)
  checkmate::assert_numeric(df[[x]])
  checkmate::assert_string(y)
  checkmate::assert_subset(y, df_cols)
  checkmate::assert_numeric(df[[y]])

  if (!is.null(groups)) {
    checkmate::assert_character(groups)
    checkmate::assert_subset(groups, df_cols)
  }

  smooths <- function(x, y) {
    stats::predict(stats::loess(y ~ x), se = TRUE)
  }

  if (!is.null(groups)) {
    cc <- stats::complete.cases(df[c(x, y, groups)])
    df_c <- df[cc, c(x, y, groups)]
    df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
    df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))

    df_smooth_raw <-
      by(df_c_ordered, df_c_g, function(d) {
        plx <- smooths(d[[x]], d[[y]])
        data.frame(
          x = d[[x]],
          y = plx$fit,
          ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
          yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
        )
      })

    df_smooth <- do.call(rbind, df_smooth_raw)
    df_smooth[groups] <- df_c_g

    df_smooth
  } else {
    cc <- stats::complete.cases(df[c(x, y)])
    df_c <- df[cc, ]
    plx <- smooths(df_c[[x]], df_c[[y]])

    df_smooth <- data.frame(
      x = df_c[[x]],
      y = plx$fit,
      ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
      yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
    )

    df_smooth
  }
}

#' Number of Available (Non-Missing Entries) in a Vector
#'
#' Small utility function for better readability.
#'
#' @param x (`any`)\cr vector in which to count non-missing values.
#'
#' @return Number of non-missing values.
#'
#' @keywords internal
n_available <- function(x) {
  sum(!is.na(x))
}

#' Reapply Variable Labels
#'
#' This is a helper function that is used in tests.
#'
#' @param x (`vector`)\cr vector of elements that needs new labels.
#' @param varlabels (`character`)\cr vector of labels for `x`.
#' @param ... further parameters to be added to the list.
#'
#' @return `x` with variable labels reapplied.
#'
#' @export
reapply_varlabels <- function(x, varlabels, ...) {
  named_labels <- c(as.list(varlabels), list(...))
  formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
  x
}

# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
clogit_with_tryCatch <- function(formula, data, ...) { # nolint
  tryCatch(
    survival::clogit(formula = formula, data = data, ...),
    error = function(e) stop("model not built successfully with survival::clogit")
  )
}

#' Tabulate Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' on a binary response endpoint across population subgroups.
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_rsp_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot`: Total number of patients per group.
#'   * `n_rsp`: Total number of responses per group.
#'   * `prop`: Total response proportion per group.
#'   * `or`: Odds ratio.
#'   * `ci`: Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#'
#' \donttest{
#' ## Table with default columns.
#' tabulate_rsp_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_rsp_biomarkers(
#'   df = df,
#'   vars = c("n_rsp", "ci", "n_tot", "prop", "or")
#' )
#'
#' ## Finally produce the forest plot.
#' g_forest(tab, xlim = c(0.7, 1.4))
#' }
#'
#' @export
#' @name response_biomarkers_subgroups
tabulate_rsp_biomarkers <- function(df,
                                    vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
                                    na_str = default_na_str(),
                                    .indent_mods = 0L) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers"))

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_rsp_one_biomarker(
      df = df_sub,
      vars = vars,
      na_str = na_str,
      .indent_mods = .indent_mods
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_id <- grep("n_tot", vars)
  or_id <- match("or", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_id
  )
}

#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of responses, patients and overall response rate,
#' as well as odds ratio estimates, confidence intervals and p-values,
#' for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param control (named `list`)\cr controls for the response definition and the
#'   confidence level produced by [control_logistic()].
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
#'   `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @note You can also specify a continuous variable in `rsp` and then use the
#'   `response_definition` control to convert that internally to a logical
#'   variable reflecting binary response.
#'
#' @seealso [h_logistic_mult_cont_df()] which is used internally.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in logistic regression models with one covariate `RACE`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#' df <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
#' # which is then binarized internally (response is defined as this variable
#' # being larger than 500).
#' df_grouped <- extract_rsp_biomarkers(
#'   variables = list(
#'     rsp = "EOSDY",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     subgroups = "BMRKR2",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   ),
#'   control = control_logistic(
#'     response_definition = "I(response > 500)"
#'   )
#' )
#' df_grouped
#'
#' @export
extract_rsp_biomarkers <- function(variables,
                                   data,
                                   groups_lists = list(),
                                   control = control_logistic(),
                                   label_all = "All Patients") {
  assert_list_of_variables(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_logistic_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_logistic_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' Helper Functions for Multivariate Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in calculations for logistic regression.
#'
#' @inheritParams argument_convention
#' @param fit_glm (`glm`)\cr logistic regression model fitted by [stats::glm()] with "binomial" family.
#'   Limited functionality is also available for conditional logistic regression models fitted by
#'   [survival::clogit()], currently this is used only by [extract_rsp_biomarkers()].
#' @param x (`string` or `character`)\cr a variable or interaction term in `fit_glm` (depending on the
#'   helper function).
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @name h_logistic_regression
NULL

#' @describeIn h_logistic_regression Helper function to extract interaction variable names from a fitted
#'   model assuming only one interaction term.
#'
#' @return Vector of names of interaction variables.
#'
#' @export
h_get_interaction_vars <- function(fit_glm) {
  checkmate::assert_class(fit_glm, "glm")
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  terms_order <- attr(stats::terms(fit_glm), "order")
  interaction_term <- terms_name[terms_order == 2]
  checkmate::assert_string(interaction_term)
  strsplit(interaction_term, split = ":")[[1]]
}

#' @describeIn h_logistic_regression Helper function to get the right coefficient name from the
#'   interaction variable names and the given levels. The main value here is that the order
#'   of first and second variable is checked in the `interaction_vars` input.
#'
#' @param interaction_vars (`character` of length 2)\cr interaction variable names.
#' @param first_var_with_level (`character` of length 2)\cr the first variable name with
#'   the interaction level.
#' @param second_var_with_level (`character` of length 2)\cr the second variable name with
#'   the interaction level.
#'
#' @return Name of coefficient.
#'
#' @export
h_interaction_coef_name <- function(interaction_vars,
                                    first_var_with_level,
                                    second_var_with_level) {
  checkmate::assert_character(interaction_vars, len = 2, any.missing = FALSE)
  checkmate::assert_character(first_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_character(second_var_with_level, len = 2, any.missing = FALSE)
  checkmate::assert_subset(c(first_var_with_level[1], second_var_with_level[1]), interaction_vars)

  first_name <- paste(first_var_with_level, collapse = "")
  second_name <- paste(second_var_with_level, collapse = "")
  if (first_var_with_level[1] == interaction_vars[1]) {
    paste(first_name, second_name, sep = ":")
  } else if (second_var_with_level[1] == interaction_vars[1]) {
    paste(second_name, first_name, sep = ":")
  }
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when both the odds ratio and the interaction variable are categorical.
#'
#' @param odds_ratio_var (`string`)\cr the odds ratio variable.
#' @param interaction_var (`string`)\cr the interaction variable.
#'
#' @return Odds ratio.
#'
#' @export
h_or_cat_interaction <- function(odds_ratio_var,
                                 interaction_var,
                                 fit_glm,
                                 conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)

  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  y <- list()
  for (var_level in xs_level[[odds_ratio_var]][-1]) {
    x <- list()
    for (ref_level in xs_level[[interaction_var]]) {
      coef_names <- paste0(odds_ratio_var, var_level)
      if (ref_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, var_level),
          c(interaction_var, ref_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        ones <- t(c(1, 1))
        est <- as.numeric(ones %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(ones %*% xs_vcov[coef_names, coef_names] %*% t(ones)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      x[[ref_level]] <- list(or = or, ci = ci)
    }
    y[[var_level]] <- x
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   for the case when either the odds ratio or the interaction variable is continuous.
#'
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise
#'   the median is used.
#'
#' @return Odds ratio.
#'
#' @note We don't provide a function for the case when both variables are continuous because
#'   this does not arise in this table, as the treatment arm variable will always be involved
#'   and categorical.
#'
#' @export
h_or_cont_interaction <- function(odds_ratio_var,
                                  interaction_var,
                                  fit_glm,
                                  at = NULL,
                                  conf_level = 0.95) {
  interaction_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_string(odds_ratio_var)
  checkmate::assert_string(interaction_var)
  checkmate::assert_subset(c(odds_ratio_var, interaction_var), interaction_vars)
  checkmate::assert_vector(interaction_vars, len = 2)
  checkmate::assert_numeric(at, min.len = 1, null.ok = TRUE, any.missing = FALSE)
  xs_level <- fit_glm$xlevels
  xs_coef <- stats::coef(fit_glm)
  xs_vcov <- stats::vcov(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")
  model_data <- fit_glm$model
  if (!is.null(at)) {
    checkmate::assert_set_equal(xs_class[interaction_var], "numeric")
  }
  y <- list()
  if (xs_class[interaction_var] == "numeric") {
    if (is.null(at)) {
      at <- ceiling(stats::median(model_data[[interaction_var]]))
    }

    for (var_level in xs_level[[odds_ratio_var]][-1]) {
      x <- list()
      for (increment in at) {
        coef_names <- paste0(odds_ratio_var, var_level)
        if (increment != 0) {
          interaction_coef_name <- h_interaction_coef_name(
            interaction_vars,
            c(odds_ratio_var, var_level),
            c(interaction_var, "")
          )
          coef_names <- c(
            coef_names,
            interaction_coef_name
          )
        }
        if (length(coef_names) > 1) {
          xvec <- t(c(1, increment))
          est <- as.numeric(xvec %*% xs_coef[coef_names])
          se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
        } else {
          est <- xs_coef[coef_names]
          se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
        }
        or <- exp(est)
        ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
        x[[as.character(increment)]] <- list(or = or, ci = ci)
      }
      y[[var_level]] <- x
    }
  } else {
    checkmate::assert_set_equal(xs_class[odds_ratio_var], "numeric")
    checkmate::assert_set_equal(xs_class[interaction_var], "factor")
    for (var_level in xs_level[[interaction_var]]) {
      coef_names <- odds_ratio_var
      if (var_level != xs_level[[interaction_var]][1]) {
        interaction_coef_name <- h_interaction_coef_name(
          interaction_vars,
          c(odds_ratio_var, ""),
          c(interaction_var, var_level)
        )
        coef_names <- c(
          coef_names,
          interaction_coef_name
        )
      }
      if (length(coef_names) > 1) {
        xvec <- t(c(1, 1))
        est <- as.numeric(xvec %*% xs_coef[coef_names])
        se <- sqrt(as.numeric(xvec %*% xs_vcov[coef_names, coef_names] %*% t(xvec)))
      } else {
        est <- xs_coef[coef_names]
        se <- sqrt(as.numeric(xs_vcov[coef_names, coef_names]))
      }
      or <- exp(est)
      ci <- exp(est + c(lcl = -1, ucl = 1) * stats::qnorm((1 + conf_level) / 2) * se)
      y[[var_level]] <- list(or = or, ci = ci)
    }
  }
  y
}

#' @describeIn h_logistic_regression Helper function to calculate the odds ratio estimates
#'   in case of an interaction. This is a wrapper for [h_or_cont_interaction()] and
#'   [h_or_cat_interaction()].
#'
#' @return Odds ratio.
#'
#' @export
h_or_interaction <- function(odds_ratio_var,
                             interaction_var,
                             fit_glm,
                             at = NULL,
                             conf_level = 0.95) {
  xs_class <- attr(fit_glm$terms, "dataClasses")
  if (any(xs_class[c(odds_ratio_var, interaction_var)] == "numeric")) {
    h_or_cont_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      at = at,
      conf_level = conf_level
    )
  } else if (all(xs_class[c(odds_ratio_var, interaction_var)] == "factor")) {
    h_or_cat_interaction(
      odds_ratio_var,
      interaction_var,
      fit_glm,
      conf_level = conf_level
    )
  } else {
    stop("wrong interaction variable class, the interaction variable is not a numeric nor a factor")
  }
}

#' @describeIn h_logistic_regression Helper function to construct term labels from simple terms and the table
#'   of numbers of patients.
#'
#' @param terms (`character`)\cr simple terms.
#' @param table (`table`)\cr table containing numbers for terms.
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_simple_term_labels <- function(terms,
                                 table) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_multi_class(terms, classes = c("factor", "character"))
  terms <- as.character(terms)
  term_n <- table[terms]
  paste0(terms, ", n = ", term_n)
}

#' @describeIn h_logistic_regression Helper function to construct term labels from interaction terms and the table
#'   of numbers of patients.
#'
#' @param terms1 (`character`)\cr terms for first dimension (rows).
#' @param terms2 (`character`)\cr terms for second dimension (rows).
#' @param any (`flag`)\cr whether any of `term1` and `term2` can be fulfilled to count the
#'   number of patients. In that case they can only be scalar (strings).
#'
#' @return Term labels containing numbers of patients.
#'
#' @export
h_interaction_term_labels <- function(terms1,
                                      terms2,
                                      table,
                                      any = FALSE) {
  checkmate::assert_true(is.table(table))
  checkmate::assert_flag(any)
  checkmate::assert_multi_class(terms1, classes = c("factor", "character"))
  checkmate::assert_multi_class(terms2, classes = c("factor", "character"))
  terms1 <- as.character(terms1)
  terms2 <- as.character(terms2)
  if (any) {
    checkmate::assert_scalar(terms1)
    checkmate::assert_scalar(terms2)
    paste0(
      terms1, " or ", terms2, ", n = ",
      # Note that we double count in the initial sum the cell [terms1, terms2], therefore subtract.
      sum(c(table[terms1, ], table[, terms2])) - table[terms1, terms2]
    )
  } else {
    term_n <- table[cbind(terms1, terms2)]
    paste0(terms1, " * ", terms2, ", n = ", term_n)
  }
}

#' @describeIn h_logistic_regression Helper function to tabulate the main effect
#'   results of a (conditional) logistic regression model.
#'
#' @return Tabulated main effect results from a logistic regression model.
#'
#' @examples
#' h_glm_simple_term_extract("AGE", mod1)
#' h_glm_simple_term_extract("ARMCD", mod1)
#'
#' @export
h_glm_simple_term_extract <- function(x, fit_glm) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  checkmate::assert_string(x)

  xs_class <- attr(fit_glm$terms, "dataClasses")
  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  stats <- if (inherits(fit_glm, "glm")) {
    c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  } else {
    c("estimate" = "coef", "std_error" = "se(coef)", "pvalue" = "Pr(>|z|)")
  }
  # Make sure x is not an interaction term.
  checkmate::assert_subset(x, names(xs_class))
  x_sel <- if (xs_class[x] == "numeric") x else paste0(x, xs_level[[x]][-1])
  x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
  colnames(x_stats) <- names(stats)
  x_stats$estimate <- as.list(x_stats$estimate)
  x_stats$std_error <- as.list(x_stats$std_error)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$df <- as.list(1)
  if (xs_class[x] == "numeric") {
    x_stats$term <- x
    x_stats$term_label <- if (inherits(fit_glm, "glm")) {
      formatters::var_labels(fit_glm$data[x], fill = TRUE)
    } else {
      # We just fill in here with the `term` itself as we don't have the data available.
      x
    }
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
  } else {
    checkmate::assert_class(fit_glm, "glm")
    # The reason is that we don't have the original data set in the `clogit` object
    # and therefore cannot determine the `x_numbers` here.
    x_numbers <- table(fit_glm$data[[x]])
    x_stats$term <- xs_level[[x]][-1]
    x_stats$term_label <- h_simple_term_labels(x_stats$term, x_numbers)
    x_stats$is_variable_summary <- FALSE
    x_stats$is_term_summary <- TRUE
    main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
    x_main <- data.frame(
      pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
      term = xs_level[[x]][1],
      term_label = paste("Reference", h_simple_term_labels(xs_level[[x]][1], x_numbers)),
      df = main_effects[x, "Df", drop = TRUE],
      stringsAsFactors = FALSE
    )
    x_main$pvalue <- as.list(x_main$pvalue)
    x_main$df <- as.list(x_main$df)
    x_main$estimate <- list(numeric(0))
    x_main$std_error <- list(numeric(0))
    if (length(xs_level[[x]][-1]) == 1) {
      x_main$pvalue <- list(numeric(0))
      x_main$df <- list(numeric(0))
    }
    x_main$is_variable_summary <- TRUE
    x_main$is_term_summary <- FALSE
    x_stats <- rbind(x_main, x_stats)
  }
  x_stats$variable <- x
  x_stats$variable_label <- if (inherits(fit_glm, "glm")) {
    formatters::var_labels(fit_glm$data[x], fill = TRUE)
  } else {
    x
  }
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction term
#'   results of a logistic regression model.
#'
#' @return Tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_interaction_extract("ARMCD:AGE", mod2)
#'
#' @export
h_glm_interaction_extract <- function(x, fit_glm) {
  vars <- h_get_interaction_vars(fit_glm)
  xs_class <- attr(fit_glm$terms, "dataClasses")

  checkmate::assert_string(x)

  # Only take two-way interaction
  checkmate::assert_vector(vars, len = 2)

  # Only consider simple case: first variable in interaction is arm, a categorical variable
  checkmate::assert_disjunct(xs_class[vars[1]], "numeric")

  xs_level <- fit_glm$xlevels
  xs_coef <- summary(fit_glm)$coefficients
  main_effects <- car::Anova(fit_glm, type = 3, test.statistic = "Wald")
  stats <- c("estimate" = "Estimate", "std_error" = "Std. Error", "pvalue" = "Pr(>|z|)")
  v1_comp <- xs_level[[vars[1]]][-1]
  if (xs_class[vars[2]] == "numeric") {
    x_stats <- as.data.frame(
      xs_coef[paste0(vars[1], v1_comp, ":", vars[2]), stats, drop = FALSE],
      stringsAsFactors = FALSE
    )
    colnames(x_stats) <- names(stats)
    x_stats$term <- v1_comp
    x_numbers <- table(fit_glm$data[[vars[1]]])
    x_stats$term_label <- h_simple_term_labels(v1_comp, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    term_main <- v1_ref
    ref_label <- h_simple_term_labels(v1_ref, x_numbers)
  } else if (xs_class[vars[2]] != "numeric") {
    v2_comp <- xs_level[[vars[2]]][-1]
    v1_v2_grid <- expand.grid(v1 = v1_comp, v2 = v2_comp)
    x_sel <- paste(
      paste0(vars[1], v1_v2_grid$v1),
      paste0(vars[2], v1_v2_grid$v2),
      sep = ":"
    )
    x_stats <- as.data.frame(xs_coef[x_sel, stats, drop = FALSE], stringsAsFactors = FALSE)
    colnames(x_stats) <- names(stats)
    x_stats$term <- paste(v1_v2_grid$v1, "*", v1_v2_grid$v2)
    x_numbers <- table(fit_glm$data[[vars[1]]], fit_glm$data[[vars[2]]])
    x_stats$term_label <- h_interaction_term_labels(v1_v2_grid$v1, v1_v2_grid$v2, x_numbers)
    v1_ref <- xs_level[[vars[1]]][1]
    v2_ref <- xs_level[[vars[2]]][1]
    term_main <- paste(vars[1], vars[2], sep = " * ")
    ref_label <- h_interaction_term_labels(v1_ref, v2_ref, x_numbers, any = TRUE)
  }
  x_stats$df <- as.list(1)
  x_stats$pvalue <- as.list(x_stats$pvalue)
  x_stats$is_variable_summary <- FALSE
  x_stats$is_term_summary <- TRUE
  x_main <- data.frame(
    pvalue = main_effects[x, "Pr(>Chisq)", drop = TRUE],
    term = term_main,
    term_label = paste("Reference", ref_label),
    df = main_effects[x, "Df", drop = TRUE],
    stringsAsFactors = FALSE
  )
  x_main$pvalue <- as.list(x_main$pvalue)
  x_main$df <- as.list(x_main$df)
  x_main$estimate <- list(numeric(0))
  x_main$std_error <- list(numeric(0))
  x_main$is_variable_summary <- TRUE
  x_main$is_term_summary <- FALSE

  x_stats <- rbind(x_main, x_stats)
  x_stats$variable <- x
  x_stats$variable_label <- paste(
    "Interaction of",
    formatters::var_labels(fit_glm$data[vars[1]], fill = TRUE),
    "*",
    formatters::var_labels(fit_glm$data[vars[2]], fill = TRUE)
  )
  x_stats$interaction <- ""
  x_stats$interaction_label <- ""
  x_stats$reference <- ""
  x_stats$reference_label <- ""
  rownames(x_stats) <- NULL
  x_stats[c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "is_variable_summary",
    "is_term_summary"
  )]
}

#' @describeIn h_logistic_regression Helper function to tabulate the interaction
#'   results of a logistic regression model. This basically is a wrapper for
#'   [h_or_interaction()] and [h_glm_simple_term_extract()] which puts the results
#'   in the right data frame format.
#'
#' @return A `data.frame` of tabulated interaction term results from a logistic regression model.
#'
#' @examples
#' h_glm_inter_term_extract("AGE", "ARMCD", mod2)
#'
#' @export
h_glm_inter_term_extract <- function(odds_ratio_var,
                                     interaction_var,
                                     fit_glm,
                                     ...) {
  # First obtain the main effects.
  main_stats <- h_glm_simple_term_extract(odds_ratio_var, fit_glm)
  main_stats$is_reference_summary <- FALSE
  main_stats$odds_ratio <- NA
  main_stats$lcl <- NA
  main_stats$ucl <- NA

  # Then we get the odds ratio estimates and put into df form.
  or_numbers <- h_or_interaction(odds_ratio_var, interaction_var, fit_glm, ...)
  is_num_or_var <- attr(fit_glm$terms, "dataClasses")[odds_ratio_var] == "numeric"

  if (is_num_or_var) {
    # Numeric OR variable case.
    references <- names(or_numbers)
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          x[[name]][pos]
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = odds_ratio_var,
      term_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = references,
      reference_label = references,
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  } else {
    # Categorical OR variable case.
    references <- names(or_numbers[[1]])
    n_ref <- length(references)

    extract_from_list <- function(l, name, pos = 1) {
      unname(unlist(
        lapply(or_numbers, function(x) {
          lapply(x, function(y) y[[name]][pos])
        })
      ))
    }
    or_stats <- data.frame(
      variable = odds_ratio_var,
      variable_label = unname(formatters::var_labels(fit_glm$data[odds_ratio_var], fill = TRUE)),
      term = rep(names(or_numbers), each = n_ref),
      term_label = h_simple_term_labels(rep(names(or_numbers), each = n_ref), table(fit_glm$data[[odds_ratio_var]])),
      interaction = interaction_var,
      interaction_label = unname(formatters::var_labels(fit_glm$data[interaction_var], fill = TRUE)),
      reference = unlist(lapply(or_numbers, names)),
      reference_label = unlist(lapply(or_numbers, names)),
      estimate = NA,
      std_error = NA,
      odds_ratio = extract_from_list(or_numbers, "or"),
      lcl = extract_from_list(or_numbers, "ci", pos = "lcl"),
      ucl = extract_from_list(or_numbers, "ci", pos = "ucl"),
      df = NA,
      pvalue = NA,
      is_variable_summary = FALSE,
      is_term_summary = FALSE,
      is_reference_summary = TRUE
    )
  }

  df <- rbind(
    main_stats[, names(or_stats)],
    or_stats
  )
  df[order(-df$is_variable_summary, df$term, -df$is_term_summary, df$reference), ]
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of simple terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_simple_terms("AGE", mod1)
#'
#' @export
h_logistic_simple_terms <- function(x, fit_glm, conf_level = 0.95) {
  checkmate::assert_multi_class(fit_glm, c("glm", "clogit"))
  if (inherits(fit_glm, "glm")) {
    checkmate::assert_set_equal(fit_glm$family$family, "binomial")
  }
  terms_name <- attr(stats::terms(fit_glm), "term.labels")
  xs_class <- attr(fit_glm$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  checkmate::assert_subset(x, terms_name)
  if (length(interaction) != 0) {
    # Make sure any item in x is not part of interaction term
    checkmate::assert_disjunct(x, unlist(strsplit(interaction, ":")))
  }
  x_stats <- lapply(x, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ci <- Map(function(lcl, ucl) c(lcl, ucl), lcl = x_stats$lcl, ucl = x_stats$ucl)
  x_stats
}

#' @describeIn h_logistic_regression Helper function to tabulate the results including
#'   odds ratios and confidence intervals of interaction terms.
#'
#' @return Tabulated statistics for the given variable(s) from the logistic regression model.
#'
#' @examples
#' h_logistic_inter_terms(c("RACE", "AGE", "ARMCD", "AGE:ARMCD"), mod2)
#'
#' @export
h_logistic_inter_terms <- function(x,
                                   fit_glm,
                                   conf_level = 0.95,
                                   at = NULL) {
  # Find out the interaction variables and interaction term.
  inter_vars <- h_get_interaction_vars(fit_glm)
  checkmate::assert_vector(inter_vars, len = 2)


  inter_term_index <- intersect(grep(inter_vars[1], x), grep(inter_vars[2], x))
  inter_term <- x[inter_term_index]

  # For the non-interaction vars we need the standard stuff.
  normal_terms <- setdiff(x, union(inter_vars, inter_term))

  x_stats <- lapply(normal_terms, h_glm_simple_term_extract, fit_glm)
  x_stats <- do.call(rbind, x_stats)
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  x_stats$odds_ratio <- lapply(x_stats$estimate, exp)
  x_stats$lcl <- Map(function(or, se) exp(log(or) - q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  x_stats$ucl <- Map(function(or, se) exp(log(or) + q_norm * se), x_stats$odds_ratio, x_stats$std_error)
  normal_stats <- x_stats
  normal_stats$is_reference_summary <- FALSE

  # Now the interaction term itself.
  inter_term_stats <- h_glm_interaction_extract(inter_term, fit_glm)
  inter_term_stats$odds_ratio <- NA
  inter_term_stats$lcl <- NA
  inter_term_stats$ucl <- NA
  inter_term_stats$is_reference_summary <- FALSE

  is_intervar1_numeric <- attr(fit_glm$terms, "dataClasses")[inter_vars[1]] == "numeric"

  # Interaction stuff.
  inter_stats_one <- h_glm_inter_term_extract(
    inter_vars[1],
    inter_vars[2],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, NULL, at)
  )
  inter_stats_two <- h_glm_inter_term_extract(
    inter_vars[2],
    inter_vars[1],
    fit_glm,
    conf_level = conf_level,
    at = `if`(is_intervar1_numeric, at, NULL)
  )

  # Now just combine everything in one data frame.
  col_names <- c(
    "variable",
    "variable_label",
    "term",
    "term_label",
    "interaction",
    "interaction_label",
    "reference",
    "reference_label",
    "estimate",
    "std_error",
    "df",
    "pvalue",
    "odds_ratio",
    "lcl",
    "ucl",
    "is_variable_summary",
    "is_term_summary",
    "is_reference_summary"
  )
  df <- rbind(
    inter_stats_one[, col_names],
    inter_stats_two[, col_names],
    inter_term_stats[, col_names]
  )
  if (length(normal_terms) > 0) {
    df <- rbind(
      normal_stats[, col_names],
      df
    )
  }
  df$ci <- combine_vectors(df$lcl, df$ucl)
  df
}

#' Univariate Formula Special Term
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The special term `univariate` indicate that the model should be fitted individually for
#' every variable included in univariate.
#'
#' @param x A vector of variable name separated by commas.
#'
#' @return When used within a model formula, produces univariate models for each variable provided.
#'
#' @details
#' If provided alongside with pairwise specification, the model
#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
#' + `y ~ ARM`
#' + `y ~ ARM + SEX`
#' + `y ~ ARM + AGE`
#' + `y ~ ARM + RACE`
#'
#' @export
univariate <- function(x) {
  structure(x, varname = deparse(substitute(x)))
}

# Get the right-hand-term of a formula
rht <- function(x) {
  checkmate::assert_formula(x)
  y <- as.character(rev(x)[[1]])
  return(y)
}

#' Hazard Ratio Estimation in Interactions
#'
#' This function estimates the hazard ratios between arms when an interaction variable is given with
#' specific values.
#'
#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
#'   given the levels of `given`.
#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
#' @param coef Numeric of estimated coefficients.
#' @param vcov Variance-covariance matrix of underlying model.
#' @param conf_level Single numeric for the confidence level of estimate intervals.
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
#'   will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
#'   therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
#'   as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
#'
#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
#'   `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @seealso [s_cox_multivariate()].
#'
#' @examples
#' library(dplyr)
#' library(survival)
#'
#' ADSL <- tern_ex_adsl %>%
#'   filter(SEX %in% c("F", "M"))
#'
#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
#' adtte$ARMCD <- droplevels(adtte$ARMCD)
#' adtte$SEX <- droplevels(adtte$SEX)
#'
#' mod <- coxph(
#'   formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
#'   data = adtte
#' )
#'
#' mmat <- stats::model.matrix(mod)[1, ]
#' mmat[!mmat == 0] <- 0
#'
#' @keywords internal
estimate_coef <- function(variable, given,
                          lvl_var, lvl_given,
                          coef,
                          mmat,
                          vcov,
                          conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)

  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )

  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      return(mmat)
    }
  )
  colnames(design_mat) <- interaction_names

  betas <- as.matrix(coef)

  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"

  coef_se <- apply(design_mat, 2, function(x) {
    vcov_el <- as.logical(x)
    y <- vcov[vcov_el, vcov_el]
    y <- sum(y)
    y <- sqrt(y)
    return(y)
  })

  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)

  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])

    return(x)
  })

  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)

  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  return(y)
}

#' `tryCatch` around `car::Anova`
#'
#' Captures warnings when executing [car::Anova].
#'
#' @inheritParams car::Anova
#'
#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
#'
#' @examples
#' # `car::Anova` on cox regression model including strata and expected
#' # a likelihood ratio test triggers a warning as only `Wald` method is
#' # accepted.
#'
#' library(survival)
#'
#' mod <- coxph(
#'   formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
#'   data = ovarian
#' )
#'
#' @keywords internal
try_car_anova <- function(mod,
                          test.statistic) { # nolint
  y <- tryCatch(
    withCallingHandlers(
      expr = {
        warn_text <- c()
        list(
          aov = car::Anova(
            mod,
            test.statistic = test.statistic,
            type = "III"
          ),
          warn_text = warn_text
        )
      },
      warning = function(w) {
        # If a warning is detected it is handled as "w".
        warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))

        # A warning is sometimes expected, then, we want to restart
        # the execution while ignoring the warning.
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )

  return(y)
}

#' Fit the Cox Regression Model and `Anova`
#'
#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
#' This last package introduces more flexibility to get the effect p.values.
#'
#' @inheritParams t_coxreg
#'
#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
#'   `aov` (result of [car::Anova()]).
#'
#' @noRd
fit_n_aov <- function(formula,
                      data = data,
                      conf_level = conf_level,
                      pval_method = c("wald", "likelihood"),
                      ...) {
  pval_method <- match.arg(pval_method)

  environment(formula) <- environment()
  suppressWarnings({
    # We expect some warnings due to coxph which fails strict programming.
    mod <- survival::coxph(formula, data = data, ...)
    msum <- summary(mod, conf.int = conf_level)
  })

  aov <- try_car_anova(
    mod,
    test.statistic = switch(pval_method,
      "wald" = "Wald",
      "likelihood" = "LR"
    )
  )

  warn_attr <- aov$warn_text
  if (!is.null(aov$warn_text)) message(warn_attr)

  aov <- aov$aov
  y <- list(mod = mod, msum = msum, aov = aov)
  attr(y, "message") <- warn_attr

  return(y)
}

# argument_checks
check_formula <- function(formula) {
  if (!(inherits(formula, "formula"))) {
    stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
  }

  invisible()
}

check_covariate_formulas <- function(covariates) {
  if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) || is.null(covariates)) {
    stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
  }

  invisible()
}

name_covariate_names <- function(covariates) {
  miss_names <- names(covariates) == ""
  no_names <- is.null(names(covariates))
  if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
  if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
  return(covariates)
}

check_increments <- function(increments, covariates) {
  if (!is.null(increments)) {
    covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
    lapply(
      X = names(increments), FUN = function(x) {
        if (!x %in% covariates) {
          warning(
            paste(
              "Check `increments`, the `increment` for ", x,
              "doesn't match any names in investigated covariate(s)."
            )
          )
        }
      }
    )
  }

  invisible()
}

#' Multivariate Cox Model - Summarized Results
#'
#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
#' covariates included in the model.
#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
#' the p.values need to be interpreted with caution. (**Statistical Analysis of Clinical Trials Data with R**,
#' `NEST's bookdown`)
#'
#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
#'   including covariates.
#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
#'   `"wald"` (default) or `"likelihood"`.
#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
#'   method for tie handling, one of `exact` (default), `efron`, `breslow`.
#'
#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
#'
#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
#'   but is out of scope as defined by the  Global Data Standards Repository
#'   (**`GDS_Standard_TLG_Specs_Tables_2.doc`**).
#'
#' @seealso [estimate_coef()].
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
#' adtte_f <- filter(
#'   adtte_f,
#'   PARAMCD == "OS" &
#'     SEX %in% c("F", "M") &
#'     RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
#' )
#' adtte_f$SEX <- droplevels(adtte_f$SEX)
#' adtte_f$RACE <- droplevels(adtte_f$RACE)
#'
#' @keywords internal
s_cox_multivariate <- function(formula, data,
                               conf_level = 0.95,
                               pval_method = c("wald", "likelihood"),
                               ...) {
  tf <- stats::terms(formula, specials = c("strata"))
  covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
  lapply(
    X = covariates,
    FUN = function(x) {
      if (is.character(data[[x]])) {
        data[[x]] <<- as.factor(data[[x]])
      }
      invisible()
    }
  )
  pval_method <- match.arg(pval_method)

  # Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
  y <- fit_n_aov(
    formula = formula,
    data = data,
    conf_level = conf_level,
    pval_method = pval_method,
    ...
  )
  mod <- y$mod
  aov <- y$aov
  msum <- y$msum
  list2env(as.list(y), environment())

  all_term_labs <- attr(mod$terms, "term.labels")
  term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
  names(term_labs) <- term_labs

  coef_inter <- NULL
  if (any(attr(mod$terms, "order") > 1)) {
    for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
    names(for_inter) <- for_inter
    mmat <- stats::model.matrix(mod)[1, ]
    mmat[!mmat == 0] <- 0
    mcoef <- stats::coef(mod)
    mvcov <- stats::vcov(mod)

    estimate_coef_local <- function(variable, given) {
      estimate_coef(
        variable, given,
        coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
        lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
      )
    }

    coef_inter <- lapply(
      for_inter, function(x) {
        y <- attr(mod$terms, "factor")[, x]
        y <- names(y[y > 0])
        Map(estimate_coef_local, variable = y, given = rev(y))
      }
    )
  }

  list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
}

#' Tabulate Survival Duration by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as median survival time and hazard ratio for population subgroups.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_coxph_pairwise
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_survival_subgroups()].
#' @param vars (`character`)\cr the name of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_events`: Number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `n`: Number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
#'   are required.
#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_survival_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c(
#'   "ARM" = adtte_labels[["ARM"]],
#'   "SEX" = adtte_labels[["SEX"]],
#'   "AVALU" = adtte_labels[["AVALU"]],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' df_grouped <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @name survival_duration_subgroups
#' @order 1
NULL

#' Prepares Survival Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams survival_duration_subgroups
#' @inheritParams survival_coxph_pairwise
#'
#' @return A named `list` of two elements:
#'   * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
#'     `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [survival_duration_subgroups]
#'
#' @export
extract_survival_subgroups <- function(variables,
                                       data,
                                       groups_lists = list(),
                                       control = control_coxph(),
                                       label_all = "All Patients") {
  df_survtime <- h_survtime_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_hr <- h_coxph_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    control = control,
    label_all = label_all
  )

  list(survtime = df_survtime, hr = df_hr)
}

#' @describeIn survival_duration_subgroups  Formatted analysis function which is used as
#'   `afun` in `tabulate_survival_subgroups()`.
#'
#' @return
#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_survival_subgroups <- function(.formats = list( # nolint start
                                   n = "xx",
                                   n_events = "xx",
                                   n_tot_events = "xx",
                                   median = "xx.x",
                                   n_tot = "xx",
                                   hr = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)"
                                 ),
                                 na_str = default_na_str()) { # nolint end
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt, na_str) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = combine_vectors(df$lcl, df$ucl),
            .labels = as.character(df$subgroup),
            .formats = fmt,
            .format_na_strs = na_str
          )
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = as.list(df[[stat]]),
            .labels = as.character(df$subgroup),
            .formats = fmt,
            .format_na_strs = na_str
          )
        }
      }
    },
    stat = names(.formats),
    fmt = .formats,
    na_str = na_str
  )

  afun_lst
}

#' @describeIn survival_duration_subgroups Table-creating function which creates a table
#'   summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @return An `rtables` table summarizing survival by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#'
#' ## Table with a manually chosen set of columns: adding "pval".
#' basic_table() %>%
#'   tabulate_survival_subgroups(
#'     df = df,
#'     vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
#'     time_unit = adtte_f$AVALU[1]
#'   )
#'
#' @export
#' @order 2
tabulate_survival_subgroups <- function(lyt,
                                        df,
                                        vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
                                        groups_lists = list(),
                                        label_all = "All Patients",
                                        time_unit = NULL,
                                        na_str = default_na_str()) {
  conf_level <- df$hr$conf_level[1]
  method <- df$hr$pval_label[1]

  extra_args <- list(groups_lists = groups_lists, conf_level = conf_level, method = method, label_all = label_all)

  afun_lst <- a_survival_subgroups(na_str = na_str)
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = conf_level,
    method = method,
    time_unit = time_unit
  )

  colvars_survtime <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "n_events", "median")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "n_events", "median")]
  )
  colvars_hr <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")]
  )

  # Columns from table_survtime are optional.
  if (length(colvars_survtime$vars) > 0) {
    lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
    lyt_survtime <- split_rows_by(
      lyt = lyt_survtime,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE
    )
    lyt_survtime <- summarize_row_groups(
      lyt = lyt_survtime,
      var = "var_label",
      cfun = afun_lst[names(colvars_survtime$labels)],
      na_str = na_str,
      extra_args = extra_args
    )
    lyt_survtime <- split_cols_by_multivar(
      lyt = lyt_survtime,
      vars = colvars_survtime$vars,
      varlabels = colvars_survtime$labels
    )

    if ("analysis" %in% df$survtime$row_type) {
      lyt_survtime <- split_rows_by(
        lyt = lyt_survtime,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
      lyt_survtime <- analyze_colvars(
        lyt = lyt_survtime,
        afun = afun_lst[names(colvars_survtime$labels)],
        na_str = na_str,
        inclNAs = TRUE,
        extra_args = extra_args
      )
    }

    table_survtime <- build_table(lyt_survtime, df = df$survtime)
  } else {
    table_survtime <- NULL
  }

  # Columns "n_tot_events" or "n_tot", and "hr", "ci" in table_hr are required.
  lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
  lyt_hr <- split_rows_by(
    lyt = lyt_hr,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )
  lyt_hr <- summarize_row_groups(
    lyt = lyt_hr,
    var = "var_label",
    cfun = afun_lst[names(colvars_hr$labels)],
    na_str = na_str,
    extra_args = extra_args
  )
  lyt_hr <- split_cols_by_multivar(
    lyt = lyt_hr,
    vars = colvars_hr$vars,
    varlabels = colvars_hr$labels
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$survtime$row_type) {
    lyt_hr <- split_rows_by(
      lyt = lyt_hr,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
    lyt_hr <- analyze_colvars(
      lyt = lyt_hr,
      afun = afun_lst[names(colvars_hr$labels)],
      na_str = na_str,
      inclNAs = TRUE,
      extra_args = extra_args
    )
  }
  table_hr <- build_table(lyt_hr, df = df$hr)

  # There can be one or two vars starting with "n_tot".
  n_tot_ids <- grep("^n_tot", colvars_hr$vars)
  if (is.null(table_survtime)) {
    result <- table_hr
    hr_id <- match("hr", colvars_hr$vars)
    ci_id <- match("lcl", colvars_hr$vars)
  } else {
    # Reorder the table.
    result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
    # And then calculate column indices accordingly.
    hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
    ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("lcl", colvars_hr$vars[-n_tot_ids])
    n_tot_ids <- seq_along(n_tot_ids)
  }

  structure(
    result,
    forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    # Take the first one for scaling the symbol sizes in graph.
    col_symbol_size = n_tot_ids[1]
  )
}

#' Labels for Column Variables in Survival Duration by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
#'
#' @inheritParams tabulate_survival_subgroups
#' @inheritParams argument_convention
#' @param method (`character`)\cr p-value method for testing hazard ratio = 1.
#'
#' @return A `list` of variables and their labels to tabulate.
#'
#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
#'
#' @export
d_survival_subgroups_colvars <- function(vars,
                                         conf_level,
                                         method,
                                         time_unit = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_string(time_unit, null.ok = TRUE)
  checkmate::assert_subset(c("hr", "ci"), vars)
  checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
  checkmate::assert_subset(
    vars,
    c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
  )

  propcase_time_label <- if (!is.null(time_unit)) {
    paste0("Median (", time_unit, ")")
  } else {
    "Median"
  }

  varlabels <- c(
    n = "n",
    n_events = "Events",
    median = propcase_time_label,
    n_tot = "Total n",
    n_tot_events = "Total Events",
    hr = "Hazard Ratio",
    ci = paste0(100 * conf_level, "% Wald CI"),
    pval = method
  )

  colvars <- vars

  # The `lcl` variable is just a placeholder available in the analysis data,
  # it is not acutally used in the tabulation.
  # Variables used in the tabulation are lcl and ucl, see `a_survival_subgroups` for details.
  colvars[colvars == "ci"] <- "lcl"

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Summarize Variables in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This analyze function uses the S3 generic function [s_summary()] to summarize different variables
#' that are arranged in columns. Additional standard formatting arguments are available. It is a
#' minimal wrapper for [rtables::analyze_colvars()]. The latter function is meant to add different
#' analysis methods for each column variables as different rows. To have the analysis methods as
#' column labels, please refer to [analyze_vars_in_cols()].
#'
#' @inheritParams argument_convention
#' @param ... arguments passed to `s_summary()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @seealso [rtables::split_cols_by_multivar()] and [`analyze_colvars_functions`].
#'
#' @examples
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9)),
#'   CHG = c(1:9, rep(NA, 9))
#' )
#'
#' ## Default output within a `rtables` pipeline.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars() %>%
#'   build_table(dta_test)
#'
#' ## Selection of statistics, formats and labels also work.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(
#'     .stats = c("n", "mean_sd"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD")
#'   ) %>%
#'   build_table(dta_test)
#'
#' ## Use arguments interpreted by `s_summary`.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
#'   summarize_colvars(na.rm = FALSE) %>%
#'   build_table(dta_test)
#'
#' @export
summarize_colvars <- function(lyt,
                              ...,
                              na_level = lifecycle::deprecated(),
                              na_str = default_na_str(),
                              .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "summarize_colvars(na_level)", "summarize_colvars(na_str)")
    na_str <- na_level
  }

  extra_args <- list(.stats = .stats, na_str = na_str, ...)
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  analyze_colvars(
    lyt,
    afun = a_summary,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' Tabulate Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate statistics such as response rate and odds ratio for population subgroups.
#'
#' @inheritParams extract_rsp_subgroups
#' @inheritParams argument_convention
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. Tables typically used as part of forest plot.
#'
#' @seealso [extract_rsp_subgroups()]
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' # Unstratified analysis.
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f
#' )
#' df
#'
#' # Stratified analysis.
#' df_strat <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strat = "STRATA1"),
#'   data = adrs_f
#' )
#' df_strat
#'
#' # Grouping of the BMRKR2 levels.
#' df_grouped <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
#'   data = adrs_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @name response_subgroups
#' @order 1
NULL

#' Prepares Response Data for Population Subgroups in Data Frames
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param label_all (`string`)\cr label for the total population analysis.
#'
#' @return A named list of two elements:
#'   * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
#'     `var_label`, and `row_type`.
#'   * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
#'     `subgroup`, `var`, `var_label`, and `row_type`.
#'
#' @seealso [response_subgroups]
#'
#' @export
extract_rsp_subgroups <- function(variables,
                                  data,
                                  groups_lists = list(),
                                  conf_level = 0.95,
                                  method = NULL,
                                  label_all = "All Patients") {
  df_prop <- h_proportion_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    label_all = label_all
  )
  df_or <- h_odds_ratio_subgroups_df(
    variables,
    data,
    groups_lists = groups_lists,
    conf_level = conf_level,
    method = method,
    label_all = label_all
  )

  list(prop = df_prop, or = df_or)
}

#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
#'
#' @return
#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_response_subgroups <- function(.formats = list(
                                   n = "xx", # nolint start
                                   n_rsp = "xx",
                                   prop = "xx.x%",
                                   n_tot = "xx",
                                   or = list(format_extreme_values(2L)),
                                   ci = list(format_extreme_values_ci(2L)),
                                   pval = "x.xxxx | (<0.0001)" # nolint end
                                 ),
                                 na_str = default_na_str()) {
  checkmate::assert_list(.formats)
  checkmate::assert_subset(
    names(.formats),
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  afun_lst <- Map(
    function(stat, fmt, na_str) {
      if (stat == "ci") {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = combine_vectors(df$lcl, df$ucl),
            .labels = as.character(df$subgroup),
            .formats = fmt,
            .format_na_strs = na_str
          )
        }
      } else {
        function(df, labelstr = "", ...) {
          in_rows(
            .list = as.list(df[[stat]]),
            .labels = as.character(df$subgroup),
            .formats = fmt,
            .format_na_strs = na_str
          )
        }
      }
    },
    stat = names(.formats),
    fmt = .formats,
    na_str = na_str
  )

  afun_lst
}

#' @describeIn response_subgroups Table-creating function which creates a table
#'   summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
#'   and [rtables::summarize_row_groups()].
#'
#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
#'   created using [extract_rsp_subgroups()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n`: Total number of observations per group.
#'   * `n_rsp`: Number of responders per group.
#'   * `prop`: Proportion of responders.
#'   * `n_tot`: Total number of observations.
#'   * `or`: Odds ratio.
#'   * `ci` : Confidence interval of odds ratio.
#'   * `pval`: p-value of the effect.
#'   Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing binary response by subgroup.
#'
#' @examples
#' ## Table with default columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#'
#' ## Table with selected columns.
#' basic_table() %>%
#'   tabulate_rsp_subgroups(
#'     df = df,
#'     vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
#'   )
#'
#' @export
#' @order 2
tabulate_rsp_subgroups <- function(lyt,
                                   df,
                                   vars = c("n_tot", "n", "prop", "or", "ci"),
                                   groups_lists = list(),
                                   label_all = "All Patients",
                                   na_str = default_na_str()) {
  conf_level <- df$or$conf_level[1]
  method <- if ("pval_label" %in% names(df$or)) {
    df$or$pval_label[1]
  } else {
    NULL
  }

  extra_args <- list(groups_lists = groups_lists, conf_level = conf_level, method = method, label_all = label_all)

  afun_lst <- a_response_subgroups(na_str = na_str)
  colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)

  colvars_prop <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n", "prop", "n_rsp")],
    labels = colvars$labels[names(colvars$labels) %in% c("n", "prop", "n_rsp")]
  )
  colvars_or <- list(
    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")],
    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")]
  )

  # Columns from table_prop are optional.
  if (length(colvars_prop$vars) > 0) {
    lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
    lyt_prop <- split_cols_by_multivar(
      lyt = lyt_prop,
      vars = colvars_prop$vars,
      varlabels = colvars_prop$labels
    )

    # "All Patients" row
    lyt_prop <- split_rows_by(
      lyt = lyt_prop,
      var = "row_type",
      split_fun = keep_split_levels("content"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_prop <- analyze_colvars(
      lyt = lyt_prop,
      afun = afun_lst[names(colvars_prop$labels)],
      na_str = na_str,
      extra_args = extra_args
    )

    if ("analysis" %in% df$prop$row_type) {
      lyt_prop <- split_rows_by(
        lyt = lyt_prop,
        var = "row_type",
        split_fun = keep_split_levels("analysis"),
        nested = FALSE,
        child_labels = "hidden"
      )
      lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE)
      lyt_prop <- analyze_colvars(
        lyt = lyt_prop,
        afun = afun_lst[names(colvars_prop$labels)],
        na_str = na_str,
        inclNAs = TRUE,
        extra_args = extra_args
      )
    }

    table_prop <- build_table(lyt_prop, df = df$prop)
  } else {
    table_prop <- NULL
  }

  # Columns "n_tot", "or", "ci" in table_or are required.
  lyt_or <- split_cols_by(lyt = lyt, var = "arm")
  lyt_or <- split_cols_by_multivar(
    lyt = lyt_or,
    vars = colvars_or$vars,
    varlabels = colvars_or$labels
  )

  # "All Patients" row
  lyt_or <- split_rows_by(
    lyt = lyt_or,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE,
    child_labels = "hidden"
  )
  lyt_or <- analyze_colvars(
    lyt = lyt_or,
    afun = afun_lst[names(colvars_or$labels)],
    na_str = na_str,
    extra_args = extra_args
  ) %>%
    append_topleft("Baseline Risk Factors")

  if ("analysis" %in% df$or$row_type) {
    lyt_or <- split_rows_by(
      lyt = lyt_or,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )
    lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE)
    lyt_or <- analyze_colvars(
      lyt = lyt_or,
      afun = afun_lst[names(colvars_or$labels)],
      na_str = na_str,
      inclNAs = TRUE,
      extra_args = extra_args
    )
  }
  table_or <- build_table(lyt_or, df = df$or)

  n_tot_id <- match("n_tot", colvars_or$vars)
  if (is.null(table_prop)) {
    result <- table_or
    or_id <- match("or", colvars_or$vars)
    ci_id <- match("lcl", colvars_or$vars)
  } else {
    result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
    or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
    ci_id <- 1L + ncol(table_prop) + match("lcl", colvars_or$vars[-n_tot_id])
    n_tot_id <- 1L
  }
  structure(
    result,
    forest_header = paste0(levels(df$prop$arm), "\nBetter"),
    col_x = or_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_id
  )
}

#' Labels for Column Variables in Binary Response by Subgroup Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
#'
#' @inheritParams argument_convention
#' @inheritParams tabulate_rsp_subgroups
#'
#' @return A `list` of variables to tabulate and their labels.
#'
#' @export
d_rsp_subgroups_colvars <- function(vars,
                                    conf_level = NULL,
                                    method = NULL) {
  checkmate::assert_character(vars)
  checkmate::assert_subset(c("n_tot", "or", "ci"), vars)
  checkmate::assert_subset(
    vars,
    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
  )

  varlabels <- c(
    n = "n",
    n_rsp = "Responders",
    prop = "Response (%)",
    n_tot = "Total n",
    or = "Odds Ratio"
  )
  colvars <- vars

  if ("ci" %in% colvars) {
    checkmate::assert_false(is.null(conf_level))

    varlabels <- c(
      varlabels,
      ci = paste0(100 * conf_level, "% CI")
    )

    # The `lcl`` variable is just a placeholder available in the analysis data,
    # it is not acutally used in the tabulation.
    # Variables used in the tabulation are lcl and ucl, see `a_response_subgroups` for details.
    colvars[colvars == "ci"] <- "lcl"
  }

  if ("pval" %in% colvars) {
    varlabels <- c(
      varlabels,
      pval = method
    )
  }

  list(
    vars = colvars,
    labels = varlabels[vars]
  )
}

#' Control Function for Descriptive Statistics
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for summaries of descriptive statistics. Typically used internally to specify
#' details for [s_summary()]. This function family is mainly used by [analyze_vars()].
#'
#' @inheritParams argument_convention
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles to calculate.
#' @param quantile_type (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'   Default is set to 2 as this matches the default quantile algorithm in SAS `proc univariate` set by `QNTLDEF=5`.
#'   This differs from R's default. See more about `type` in [stats::quantile()].
#' @param test_mean (`numeric`)\cr to test against the mean under the null hypothesis when calculating p-value.
#'
#' @note Deprecation cycle started for `control_summarize_vars` as it is going to renamed into
#'   `control_analyze_vars`. Intention is to reflect better the core underlying `rtables`
#'   functions; in this case [analyze_vars()] wraps [rtables::analyze()].
#'
#' @return A list of components with the same names as the arguments.
#'
#' @export control_analyze_vars control_summarize_vars
#' @aliases control_summarize_vars
control_analyze_vars <- function(conf_level = 0.95,
                                 quantiles = c(0.25, 0.75),
                                 quantile_type = 2,
                                 test_mean = 0) {
  checkmate::assert_vector(quantiles, len = 2)
  checkmate::assert_int(quantile_type, lower = 1, upper = 9)
  checkmate::assert_numeric(test_mean)
  lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, quantiles = quantiles, quantile_type = quantile_type, test_mean = test_mean)
}

control_summarize_vars <- control_analyze_vars


#' Analyze Variables
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The analyze function [analyze_vars()] generates a summary of one or more variables, using the S3 generic function
#' [s_summary()] to calculate a list of summary statistics. A list of all available statistics for numeric
#' variables can be viewed by running `get_stats("analyze_vars_numeric")` and for non-numeric variables by running
#' `get_stats("analyze_vars_counts")`. Use the `.stats` parameter to specify the statistics to include in your output
#' summary table.
#'
#' @details
#' **Automatic digit formatting:** The number of digits to display can be automatically determined from the analyzed
#' variable(s) (`vars`) for certain statistics by setting the statistic format to `"auto"` in `.formats`.
#' This utilizes the [format_auto()] formatting function. Note that only data for the current row & variable (for all
#' columns) will be considered (`.df_row[[.var]]`, see [`rtables::additional_fun_params`]) and not the whole dataset.
#'
#' @note
#' * Deprecation cycle started for `summarize_vars` which has been renamed to `analyze_vars`. This renaming is intended
#'   to better reflect its core underlying `rtables` functions - in this case [rtables::analyze()].
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("analyze_vars_numeric")` to see
#'   statistics available for numeric variables, and `get_stats("analyze_vars_counts")` for statistics available
#'   for non-numeric variables.
#'
#' @name analyze_variables
#' @order 1
NULL

#' @describeIn analyze_variables S3 generic function to produces a variable summary.
#'
#' @return
#' * `s_summary()` returns different statistics depending on the class of `x`.
#'
#' @export
s_summary <- function(x,
                      na.rm = TRUE, # nolint
                      denom,
                      .N_row, # nolint
                      .N_col, # nolint
                      .var,
                      ...) {
  checkmate::assert_flag(na.rm)
  UseMethod("s_summary", x)
}

#' @describeIn analyze_variables Method for `numeric` class.
#'
#' @param control (`list`)\cr parameters for descriptive statistics details, specified by using
#'   the helper function [control_analyze_vars()]. Some possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for mean and median.
#'   * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles.
#'   * `quantile_type` (`numeric`)\cr between 1 and 9 selecting quantile algorithms to be used.
#'     See more about `type` in [stats::quantile()].
#'   * `test_mean` (`numeric`)\cr value to test against the mean under the null hypothesis when calculating p-value.
#'
#' @return
#'   * If `x` is of class `numeric`, returns a `list` with the following named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `sum`: The [sum()] of `x`.
#'     * `mean`: The [mean()] of `x`.
#'     * `sd`: The [stats::sd()] of `x`.
#'     * `se`: The standard error of `x` mean, i.e.: (`sd(x) / sqrt(length(x))`).
#'     * `mean_sd`: The [mean()] and [stats::sd()] of `x`.
#'     * `mean_se`: The [mean()] of `x` and its standard error (see above).
#'     * `mean_ci`: The CI for the mean of `x` (from [stat_mean_ci()]).
#'     * `mean_sei`: The SE interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()] / [sqrt()]).
#'     * `mean_sdi`: The SD interval for the mean of `x`, i.e.: ([mean()] -/+ [stats::sd()]).
#'     * `mean_pval`: The two-sided p-value of the mean of `x` (from [stat_mean_pval()]).
#'     * `median`: The [stats::median()] of `x`.
#'     * `mad`: The median absolute deviation of `x`, i.e.: ([stats::median()] of `xc`,
#'       where `xc` = `x` - [stats::median()]).
#'     * `median_ci`: The CI for the median of `x` (from [stat_median_ci()]).
#'     * `quantiles`: Two sample quantiles of `x` (from [stats::quantile()]).
#'     * `iqr`: The [stats::IQR()] of `x`.
#'     * `range`: The [range_noinf()] of `x`.
#'     * `min`: The [max()] of `x`.
#'     * `max`: The [min()] of `x`.
#'     * `median_range`: The [median()] and [range_noinf()] of `x`.
#'     * `cv`: The coefficient of variation of `x`, i.e.: ([stats::sd()] / [mean()] * 100).
#'     * `geom_mean`: The geometric mean of `x`, i.e.: (`exp(mean(log(x)))`).
#'     * `geom_cv`: The geometric coefficient of variation of `x`, i.e.: (`sqrt(exp(sd(log(x)) ^ 2) - 1) * 100`).
#'
#' @note
#' * If `x` is an empty vector, `NA` is returned. This is the expected feature so as to return `rcell` content in
#'   `rtables` when the intersection of a column and a row delimits an empty data selection.
#' * When the `mean` function is applied to an empty vector, `NA` will be returned instead of `NaN`, the latter
#'   being standard behavior in R.
#'
#' @method s_summary numeric
#'
#' @examples
#' # `s_summary.numeric`
#'
#' ## Basic usage: empty numeric returns NA-filled items.
#' s_summary(numeric())
#'
#' ## Management of NA values.
#' x <- c(NA_real_, 1)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' x <- c(NA_real_, 1, 2)
#' s_summary(x, stats = NULL)
#'
#' ## Benefits in `rtables` contructions:
#' require(rtables)
#' dta_test <- data.frame(
#'   Group = rep(LETTERS[1:3], each = 2),
#'   sub_group = rep(letters[1:2], each = 3),
#'   x = 1:6
#' )
#'
#' ## The summary obtained in with `rtables`:
#' basic_table() %>%
#'   split_cols_by(var = "Group") %>%
#'   split_rows_by(var = "sub_group") %>%
#'   analyze(vars = "x", afun = s_summary) %>%
#'   build_table(df = dta_test)
#'
#' ## By comparison with `lapply`:
#' X <- split(dta_test, f = with(dta_test, interaction(Group, sub_group)))
#' lapply(X, function(x) s_summary(x$x))
#'
#' @export
s_summary.numeric <- function(x,
                              na.rm = TRUE, # nolint
                              denom,
                              .N_row, # nolint
                              .N_col, # nolint
                              .var,
                              control = control_analyze_vars(),
                              ...) {
  checkmate::assert_numeric(x)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  y <- list()

  y$n <- c("n" = length(x))

  y$sum <- c("sum" = ifelse(length(x) == 0, NA_real_, sum(x, na.rm = FALSE)))

  y$mean <- c("mean" = ifelse(length(x) == 0, NA_real_, mean(x, na.rm = FALSE)))

  y$sd <- c("sd" = stats::sd(x, na.rm = FALSE))

  y$se <- c("se" = stats::sd(x, na.rm = FALSE) / sqrt(length(stats::na.omit(x))))

  y$mean_sd <- c(y$mean, "sd" = stats::sd(x, na.rm = FALSE))

  y$mean_se <- c(y$mean, y$se)

  mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$mean_ci <- formatters::with_label(mean_ci, paste("Mean", f_conf_level(control$conf_level)))

  mean_sei <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE) / sqrt(y$n)
  names(mean_sei) <- c("mean_sei_lwr", "mean_sei_upr")
  y$mean_sei <- formatters::with_label(mean_sei, "Mean -/+ 1xSE")

  mean_sdi <- y$mean[[1]] + c(-1, 1) * stats::sd(x, na.rm = FALSE)
  names(mean_sdi) <- c("mean_sdi_lwr", "mean_sdi_upr")
  y$mean_sdi <- formatters::with_label(mean_sdi, "Mean -/+ 1xSD")

  mean_pval <- stat_mean_pval(x, test_mean = control$test_mean, na.rm = FALSE, n_min = 2)
  y$mean_pval <- formatters::with_label(mean_pval, paste("Mean", f_pval(control$test_mean)))

  y$median <- c("median" = stats::median(x, na.rm = FALSE))

  y$mad <- c("mad" = stats::median(x - y$median, na.rm = FALSE))

  median_ci <- stat_median_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE)
  y$median_ci <- formatters::with_label(median_ci, paste("Median", f_conf_level(control$conf_level)))

  q <- control$quantiles
  if (any(is.na(x))) {
    qnts <- rep(NA_real_, length(q))
  } else {
    qnts <- stats::quantile(x, probs = q, type = control$quantile_type, na.rm = FALSE)
  }
  names(qnts) <- paste("quantile", q, sep = "_")
  y$quantiles <- formatters::with_label(qnts, paste0(paste(paste0(q * 100, "%"), collapse = " and "), "-ile"))

  y$iqr <- c("iqr" = ifelse(
    any(is.na(x)),
    NA_real_,
    stats::IQR(x, na.rm = FALSE, type = control$quantile_type)
  ))

  y$range <- stats::setNames(range_noinf(x, na.rm = FALSE), c("min", "max"))
  y$min <- y$range[1]
  y$max <- y$range[2]

  y$median_range <- formatters::with_label(c(y$median, y$range), "Median (Min - Max)")

  y$cv <- c("cv" = unname(y$sd) / unname(y$mean) * 100)

  # Convert negative values to NA for log calculation.
  x_no_negative_vals <- x
  x_no_negative_vals[x_no_negative_vals <= 0] <- NA
  y$geom_mean <- c("geom_mean" = exp(mean(log(x_no_negative_vals), na.rm = FALSE)))
  geom_mean_ci <- stat_mean_ci(x, conf_level = control$conf_level, na.rm = FALSE, gg_helper = FALSE, geom_mean = TRUE)
  y$geom_mean_ci <- formatters::with_label(geom_mean_ci, paste("Geometric Mean", f_conf_level(control$conf_level)))

  y$geom_cv <- c("geom_cv" = sqrt(exp(stats::sd(log(x_no_negative_vals), na.rm = FALSE) ^ 2) - 1) * 100) # styler: off

  y
}

#' @describeIn analyze_variables Method for `factor` class.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `factor` or converted from `character`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x`.
#'     * `count`: A list with the number of cases for each level of the factor `x`.
#'     * `count_fraction`: Similar to `count` but also includes the proportion of cases for each level of the
#'       factor `x` relative to the denominator, or `NA` if the denominator is zero.
#'
#' @note
#' * If `x` is an empty `factor`, a list is still returned for `counts` with one element
#'   per factor level. If there are no levels in `x`, the function fails.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
#'   default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
#'
#' @method s_summary factor
#'
#' @examples
#' # `s_summary.factor`
#'
#' ## Basic usage:
#' s_summary(factor(c("a", "a", "b", "c", "a")))
#'
#' # Empty factor returns zero-filled items.
#' s_summary(factor(levels = c("a", "b", "c")))
#'
#' ## Management of NA values.
#' x <- factor(c(NA, "Female"))
#' x <- explicit_na(x)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- factor(c("a", "a", "b", "c", "a"))
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.factor <- function(x,
                             na.rm = TRUE, # nolint
                             denom = c("n", "N_row", "N_col"),
                             .N_row, # nolint
                             .N_col, # nolint
                             ...) {
  assert_valid_factor(x)
  denom <- match.arg(denom)

  if (na.rm) {
    x <- x[!is.na(x)] %>% fct_discard("<Missing>")
  } else {
    x <- x %>% explicit_na(label = "NA")
  }

  y <- list()

  y$n <- length(x)

  y$count <- as.list(table(x, useNA = "ifany"))
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count_fraction <- lapply(
    y$count,
    function(x) {
      c(x, ifelse(dn > 0, x / dn, 0))
    }
  )

  y$n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", x))

  y
}

#' @describeIn analyze_variables Method for `character` class. This makes an automatic
#'   conversion to factor (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Defaults to `TRUE`, which prints out warnings and messages. It is mainly used
#'   to print out information about factor casting.
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee that the table
#'   can be generated correctly. In particular for sparse tables this very likely can fail.
#'   It is therefore better to always pre-process the dataset such that factors are manually
#'   created from character variables before passing the dataset to [rtables::build_table()].
#'
#' @method s_summary character
#'
#' @examples
#' # `s_summary.character`
#'
#' ## Basic usage:
#' s_summary(c("a", "a", "b", "c", "a"), .var = "x", verbose = FALSE)
#' s_summary(c("a", "a", "b", "c", "a", ""), .var = "x", na.rm = FALSE, verbose = FALSE)
#'
#' @export
s_summary.character <- function(x,
                                na.rm = TRUE, # nolint
                                denom = c("n", "N_row", "N_col"),
                                .N_row, # nolint
                                .N_col, # nolint
                                .var,
                                verbose = TRUE,
                                ...) {
  if (na.rm) {
    y <- as_factor_keep_attributes(x, verbose = verbose)
  } else {
    y <- as_factor_keep_attributes(x, verbose = verbose, na_level = "NA")
  }

  s_summary(
    x = y,
    na.rm = na.rm,
    denom = denom,
    .N_row = .N_row,
    .N_col = .N_col,
    ...
  )
}

#' @describeIn analyze_variables Method for `logical` class.
#'
#' @param denom (`string`)\cr choice of denominator for proportion. Options are:
#'   * `n`: number of values in this row and column intersection.
#'   * `N_row`: total number of values in this row across columns.
#'   * `N_col`: total number of values in this column across rows.
#'
#' @return
#'   * If `x` is of class `logical`, returns a `list` with named `numeric` items:
#'     * `n`: The [length()] of `x` (possibly after removing `NA`s).
#'     * `count`: Count of `TRUE` in `x`.
#'     * `count_fraction`: Count and proportion of `TRUE` in `x` relative to the denominator, or `NA` if the
#'       denominator is zero. Note that `NA`s in `x` are never counted or leading to `NA` here.
#'
#' @method s_summary logical
#'
#' @examples
#' # `s_summary.logical`
#'
#' ## Basic usage:
#' s_summary(c(TRUE, FALSE, TRUE, TRUE))
#'
#' # Empty factor returns zero-filled items.
#' s_summary(as.logical(c()))
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' s_summary(x, na.rm = TRUE)
#' s_summary(x, na.rm = FALSE)
#'
#' ## Different denominators.
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' s_summary(x, denom = "N_row", .N_row = 10L)
#' s_summary(x, denom = "N_col", .N_col = 20L)
#'
#' @export
s_summary.logical <- function(x,
                              na.rm = TRUE, # nolint
                              denom = c("n", "N_row", "N_col"),
                              .N_row, # nolint
                              .N_col, # nolint
                              ...) {
  denom <- match.arg(denom)
  if (na.rm) x <- x[!is.na(x)]
  y <- list()
  y$n <- length(x)
  count <- sum(x, na.rm = TRUE)
  dn <- switch(denom,
    n = length(x),
    N_row = .N_row,
    N_col = .N_col
  )
  y$count <- count
  y$count_fraction <- c(count, ifelse(dn > 0, count / dn, 0))
  y$n_blq <- 0L
  y
}

#' @describeIn analyze_variables Formatted analysis function which is used as `afun` in `analyze_vars()` and
#'   `compare_vars()` and as `cfun` in `summarize_colvars()`.
#'
#' @param compare (`logical`)\cr Whether comparison statistics should be analyzed instead of summary statistics
#'   (`compare = TRUE` adds `pval` statistic comparing against reference group).
#'
#' @return
#' * `a_summary()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @note
#' * To use for comparison (with additional p-value statistic), parameter `compare` must be set to `TRUE`.
#' * Ensure that either all `NA` values are converted to an explicit `NA` level or all `NA` values are left as is.
#'
#' @examples
#' a_summary(factor(c("a", "a", "b", "c", "a")), .N_row = 10, .N_col = 10)
#' a_summary(
#'   factor(c("a", "a", "b", "c", "a")),
#'   .ref_group = factor(c("a", "a", "b", "c")), compare = TRUE
#' )
#'
#' a_summary(c("A", "B", "A", "C"), .var = "x", .N_col = 10, .N_row = 10, verbose = FALSE)
#' a_summary(
#'   c("A", "B", "A", "C"),
#'   .ref_group = c("B", "A", "C"), .var = "x", compare = TRUE, verbose = FALSE
#' )
#'
#' a_summary(c(TRUE, FALSE, FALSE, TRUE, TRUE), .N_row = 10, .N_col = 10)
#' a_summary(
#'   c(TRUE, FALSE, FALSE, TRUE, TRUE),
#'   .ref_group = c(TRUE, FALSE), .in_ref_col = TRUE, compare = TRUE
#' )
#'
#' a_summary(rnorm(10), .N_col = 10, .N_row = 20, .var = "bla")
#' a_summary(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .var = "bla", compare = TRUE)
#'
#' @export
a_summary <- function(x,
                      .N_col, # nolint
                      .N_row, # nolint
                      .var = NULL,
                      .df_row = NULL,
                      .ref_group = NULL,
                      .in_ref_col = FALSE,
                      compare = FALSE,
                      .stats = NULL,
                      .formats = NULL,
                      .labels = NULL,
                      .indent_mods = NULL,
                      na.rm = TRUE, # nolint
                      na_level = lifecycle::deprecated(),
                      na_str = default_na_str(),
                      ...) {
  extra_args <- list(...)
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "a_summary(na_level)", "a_summary(na_str)")
    na_str <- na_level
  }

  if (is.numeric(x)) {
    type <- "numeric"
    if (!is.null(.stats) && any(grepl("^pval", .stats))) {
      .stats[grepl("^pval", .stats)] <- "pval" # tmp fix xxx
    }
  } else {
    type <- "counts"
    if (!is.null(.stats) && any(grepl("^pval", .stats))) {
      .stats[grepl("^pval", .stats)] <- "pval_counts" # tmp fix xxx
    }
  }

  # If one col has NA vals, must add NA row to other cols (using placeholder lvl `fill-na-level`)
  if (any(is.na(.df_row[[.var]])) && !any(is.na(x)) && !na.rm) levels(x) <- c(levels(x), "fill-na-level")

  x_stats <- if (!compare) {
    s_summary(x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, ...)
  } else {
    s_compare(
      x = x, .N_col = .N_col, .N_row = .N_row, na.rm = na.rm, .ref_group = .ref_group, .in_ref_col = .in_ref_col, ...
    )
  }

  # Fill in with formatting defaults if needed
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  .stats <- get_stats(met_grp, stats_in = .stats, add_pval = compare)
  .formats <- get_formats_from_stats(.stats, .formats)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods)

  lbls <- get_labels_from_stats(.stats, .labels)
  # Check for custom labels from control_analyze_vars
  .labels <- if ("control" %in% names(extra_args)) {
    lbls %>% labels_use_control(extra_args[["control"]], .labels)
  } else {
    lbls
  }

  if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]]
  x_stats <- x_stats[.stats]

  if (is.factor(x) || is.character(x)) {
    # Ungroup statistics with values for each level of x
    x_ungrp <- ungroup_stats(x_stats, .formats, .labels, .indent_mods)
    x_stats <- x_ungrp[["x"]]
    .formats <- x_ungrp[[".formats"]]
    .labels <- gsub("fill-na-level", "NA", x_ungrp[[".labels"]])
    .indent_mods <- x_ungrp[[".indent_mods"]]
  }

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, .df_row, .var)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = na_str
  )
}

#' Constructor Function for [analyze_vars()] and [summarize_colvars()]
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return Combined formatted analysis function for use in [analyze_vars()].
#'
#' @note This function has been deprecated in favor of direct implementation of `a_summary()`.
#'
#' @seealso [analyze_vars()]
#'
#' @export
create_afun_summary <- function(.stats, .formats, .labels, .indent_mods) {
  lifecycle::deprecate_warn(
    "0.8.5.9010",
    "create_afun_summary()",
    details = "Please use a_summary() directly instead."
  )
  function(x,
           .ref_group,
           .in_ref_col,
           ...,
           .var) {
    a_summary(x,
      .stats = .stats,
      .formats = .formats,
      .labels = .labels,
      .indent_mods = .indent_mods,
      .ref_group = .ref_group,
      .in_ref_col = .in_ref_col,
      .var = .var, ...
    )
  }
}

#' @describeIn analyze_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_summary()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `analyze_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_summary()` to the table layout.
#'
#' @examples
#' ## Fabricated dataset.
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   PARAMCD = rep("lab", 6 * 3),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' # `analyze_vars()` in `rtables` pipelines
#' ## Default output within a `rtables` pipeline.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(vars = "AVAL")
#'
#' build_table(l, df = dta_test)
#'
#' ## Select and format statistics output.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(
#'     vars = "AVAL",
#'     .stats = c("n", "mean_sd", "quantiles"),
#'     .formats = c("mean_sd" = "xx.x, xx.x"),
#'     .labels = c(n = "n", mean_sd = "Mean, SD", quantiles = c("Q1 - Q3"))
#'   )
#'
#' build_table(l, df = dta_test)
#'
#' ## Use arguments interpreted by `s_summary`.
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(vars = "AVAL", na.rm = FALSE)
#'
#' build_table(l, df = dta_test)
#'
#' ## Handle `NA` levels first when summarizing factors.
#' dta_test$AVISIT <- NA_character_
#' dta_test <- df_explicit_na(dta_test)
#' l <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   analyze_vars(vars = "AVISIT", na.rm = FALSE)
#'
#' build_table(l, df = dta_test)
#'
#' # auto format
#' dt <- data.frame("VAR" = c(0.001, 0.2, 0.0011000, 3, 4))
#' basic_table() %>%
#'   analyze_vars(
#'     vars = "VAR",
#'     .stats = c("n", "mean", "mean_sd", "range"),
#'     .formats = c("mean_sd" = "auto", "range" = "auto")
#'   ) %>%
#'   build_table(dt)
#'
#' @export analyze_vars summarize_vars
#' @order 2
analyze_vars <- function(lyt,
                         vars,
                         var_labels = vars,
                         na_level = lifecycle::deprecated(),
                         na_str = default_na_str(),
                         nested = TRUE,
                         ...,
                         na.rm = TRUE, # nolint
                         show_labels = "default",
                         table_names = vars,
                         section_div = NA_character_,
                         .stats = c("n", "mean_sd", "median", "range", "count_fraction"),
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "analyze_vars(na_level)", "analyze_vars(na_str)")
    na_str <- na_level
  }

  extra_args <- list(.stats = .stats, na.rm = na.rm, na_str = na_str, ...)
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = a_summary,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div
  )
}
#' @describeIn analyze_variables `r lifecycle::badge("deprecated")` Use `analyze_vars` instead.
summarize_vars <- function(...) {
  lifecycle::deprecate_warn(when = "0.8.5.9010", "summarize_vars()", "analyze_vars()")
  analyze_vars(...)
}

#' Split Function to Configure Risk Difference Column
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Wrapper function for [rtables::add_combo_levels()] which configures settings for the risk difference
#' column to be added to an `rtables` object. To add a risk difference column to a table, this function
#' should be used as `split_fun` in calls to [rtables::split_cols_by()], followed by setting argument
#' `riskdiff` to `TRUE` in all following analyze function calls.
#'
#' @param arm_x (`character`)\cr Name of reference arm to use in risk difference calculations.
#' @param arm_y (`character`)\cr Names of one or more arms to compare to reference arm in risk difference
#'   calculations. A new column will be added for each value of `arm_y`.
#' @param col_label (`character`)\cr Labels to use when rendering the risk difference column within the table.
#'   If more than one comparison arm is specified in `arm_y`, default labels will specify which two arms are
#'   being compared (reference arm vs. comparison arm).
#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
#'
#' @return A closure suitable for use as a split function (`split_fun`) within [rtables::split_cols_by()]
#'   when creating a table layout.
#'
#' @seealso [stat_propdiff_ci()] for details on risk difference calculation.
#'
#' @examples
#' adae <- tern_ex_adae
#' adae$AESEV <- factor(adae$AESEV)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_riskdiff(arm_x = "ARM A", arm_y = c("ARM B", "ARM C"))) %>%
#'   count_occurrences_by_grade(
#'     var = "AESEV",
#'     riskdiff = TRUE
#'   )
#'
#' tbl <- build_table(lyt, df = adae)
#' tbl
#'
#' @export
add_riskdiff <- function(arm_x,
                         arm_y,
                         col_label = paste0(
                           "Risk Difference (%) (95% CI)", if (length(arm_y) > 1) paste0("\n", arm_x, " vs. ", arm_y)
                         ),
                         pct = TRUE) {
  checkmate::assert_character(arm_x, len = 1)
  checkmate::assert_character(arm_y, min.len = 1)
  checkmate::assert_character(col_label, len = length(arm_y))

  combodf <- tibble::tribble(~valname, ~label, ~levelcombo, ~exargs)
  for (i in seq_len(length(arm_y))) {
    combodf <- rbind(
      combodf,
      tibble::tribble(
        ~valname, ~label, ~levelcombo, ~exargs,
        paste("riskdiff", arm_x, arm_y[i], sep = "_"), col_label[i], c(arm_x, arm_y[i]), list()
      )
    )
  }
  if (pct) combodf$valname <- paste0(combodf$valname, "_pct")
  add_combo_levels(combodf)
}

#' Analysis Function to Calculate Risk Difference Column Values
#'
#' In the risk difference column, this function uses the statistics function associated with `afun` to
#' calculates risk difference values from arm X (reference group) and arm Y. These arms are specified
#' when configuring the risk difference column which is done using the [add_riskdiff()] split function in
#' the previous call to [rtables::split_cols_by()]. For all other columns, applies `afun` as usual. This
#' function utilizes the [stat_propdiff_ci()] function to perform risk difference calculations.
#'
#' @inheritParams argument_convention
#' @param afun (named `list`)\cr A named list containing one name-value pair where the name corresponds to
#'   the name of the statistics function that should be used in calculations and the value is the corresponding
#'   analysis function.
#' @param s_args (named `list`)\cr Additional arguments to be passed to the statistics function and analysis
#'   function supplied in `afun`.
#'
#' @return A list of formatted [rtables::CellValue()].
#'
#' @seealso
#' * [stat_propdiff_ci()] for details on risk difference calculation.
#' * Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()] with
#'   `riskdiff` argument set to `TRUE` in subsequent analyze functions calls, adds a risk difference column
#'   to a table layout.
#'
#' @keywords internal
afun_riskdiff <- function(df,
                          labelstr = "",
                          .var,
                          .N_col, # nolint
                          .N_row, # nolint
                          .df_row,
                          .spl_context,
                          .all_col_counts,
                          .stats,
                          .formats = NULL,
                          .labels = NULL,
                          .indent_mods = NULL,
                          na_str = default_na_str(),
                          afun,
                          s_args = list()) {
  if (!any(grepl("riskdiff", names(.spl_context)))) {
    stop(
      "Please set up levels to use in risk difference calculations using the `add_riskdiff` ",
      "split function within `split_cols_by`. See ?add_riskdiff for details."
    )
  }
  checkmate::assert_list(afun, len = 1, types = "function")
  checkmate::assert_named(afun)
  afun_args <- list(
    .var = .var, .df_row = .df_row, .N_row = .N_row, denom = "N_col", labelstr = labelstr,
    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str
  )
  afun_args <- afun_args[intersect(names(afun_args), names(as.list(args(afun[[1]]))))]
  if ("denom" %in% names(s_args)) afun_args[["denom"]] <- NULL

  cur_split <- tail(.spl_context$cur_col_split_val[[1]], 1)
  if (!grepl("^riskdiff", cur_split)) {
    # Apply basic afun (no risk difference) in all other columns
    do.call(afun[[1]], args = c(list(df = df, .N_col = .N_col), afun_args, s_args))
  } else {
    arm_x <- strsplit(cur_split, "_")[[1]][2]
    arm_y <- strsplit(cur_split, "_")[[1]][3]
    if (length(.spl_context$cur_col_split[[1]]) > 1) { # Different split name for nested column splits
      arm_spl_x <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 2)], collapse = ""))
      arm_spl_y <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 3)], collapse = ""))
    } else {
      arm_spl_x <- arm_x
      arm_spl_y <- arm_y
    }
    N_col_x <- .all_col_counts[[arm_spl_x]] # nolint
    N_col_y <- .all_col_counts[[arm_spl_y]] # nolint
    cur_var <- tail(.spl_context$cur_col_split[[1]], 1)

    # Apply statistics function to arm X and arm Y data
    s_args <- c(s_args, afun_args[intersect(names(afun_args), names(as.list(args(names(afun)))))])
    s_x <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args))
    s_y <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args))

    # Get statistic name and row names
    stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique")
    if ("flag_variables" %in% names(s_args)) {
      var_nms <- s_args$flag_variables
    } else if (!is.null(names(s_x[[stat]]))) {
      var_nms <- names(s_x[[stat]])
    } else {
      var_nms <- ""
      s_x[[stat]] <- list(s_x[[stat]])
      s_y[[stat]] <- list(s_y[[stat]])
    }

    # Calculate risk difference for each row, repeated if multiple statistics in table
    pct <- tail(strsplit(cur_split, "_")[[1]], 1) == "pct"
    rd_ci <- rep(stat_propdiff_ci(
      lapply(s_x[[stat]], `[`, 1), lapply(s_y[[stat]], `[`, 1),
      N_col_x, N_col_y,
      list_names = var_nms,
      pct = pct
    ), max(1, length(.stats)))

    in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods)
  }
}

#' Summary numeric variables in columns
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Layout-creating function which can be used for creating column-wise summary tables.
#' This function sets the analysis methods as column labels and is a wrapper for
#' [rtables::analyze_colvars()]. It was designed principally for PK tables.
#'
#' @inheritParams argument_convention
#' @inheritParams rtables::analyze_colvars
#' @param imp_rule (`character`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can
#'   also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order
#'   to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()]
#'   for more details on imputation.
#' @param avalcat_var (`character`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a
#'   row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of
#'   the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable
#'   used to calculate the `n_blq` statistic (if included in `.stats`).
#' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will
#'   speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is
#'   used for multiple tables with different data. Defaults to `FALSE`.
#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
#'   character vector applies on the column space. You can change the row labels by defining this
#'   parameter to a named character vector with names corresponding to the split values. It defaults
#'   to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
#'   label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
#'   to define row labels. This behavior is not supported as we never need to overload row labels.
#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
#'   This option allows you to add multiple instances of this functions, also in a nested fashion,
#'   without adding more splits. This split must happen only one time on a single layout.
#'
#' @return
#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
#' in columns, and add it to the table layout.
#'
#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and
#'   [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its
#'   support to more complex analysis pipelines on the column space. For the same reasons,
#'   we encourage to read the examples carefully and file issues for cases that differ from
#'   them.
#'
#'   Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`),
#'   row labels are assigned automatically to the split values in case of `rtables::analyze_colvars`
#'   (`do_summarize_row_groups = FALSE`, the default), and to the group label for
#'   `do_summarize_row_groups = TRUE`.
#'
#' @seealso [analyze_vars()], [rtables::analyze_colvars()].
#'
#' @examples
#' library(dplyr)
#'
#' # Data preparation
#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
#'
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_label = "hidden"
#'   ) %>% # Removes duplicated labels
#'   analyze_vars_in_cols(vars = "AGE")
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # By selecting just some statistics and ad-hoc labels
#' lyt <- basic_table() %>%
#'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
#'   split_rows_by(
#'     var = "SEX",
#'     label_pos = "topleft",
#'     child_labels = "hidden",
#'     split_fun = drop_split_levels
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     .stats = c("n", "cv", "geom_mean"),
#'     .labels = c(
#'       n = "aN",
#'       cv = "aCV",
#'       geom_mean = "aGeomMean"
#'     )
#'   )
#' result <- build_table(lyt = lyt, df = adpp)
#' result
#'
#' # Changing row labels
#' lyt <- basic_table() %>%
#'   analyze_vars_in_cols(
#'     vars = "AGE",
#'     row_labels = "some custom label"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Pharmacokinetic parameters
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft",
#'     child_label = "hidden"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL"
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' # Multiple calls (summarize label and analyze underneath)
#' lyt <- basic_table() %>%
#'   split_rows_by(
#'     var = "TLG_DISPLAY",
#'     split_label = "PK Parameter",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     do_summarize_row_groups = TRUE # does a summarize level
#'   ) %>%
#'   split_rows_by("SEX",
#'     child_label = "hidden",
#'     label_pos = "topleft"
#'   ) %>%
#'   analyze_vars_in_cols(
#'     vars = "AVAL",
#'     split_col_vars = FALSE # avoids re-splitting the columns
#'   )
#' result <- build_table(lyt, df = adpp)
#' result
#'
#' @export
analyze_vars_in_cols <- function(lyt,
                                 vars,
                                 ...,
                                 .stats = c(
                                   "n",
                                   "mean",
                                   "sd",
                                   "se",
                                   "cv",
                                   "geom_cv"
                                 ),
                                 .labels = c(
                                   n = "n",
                                   mean = "Mean",
                                   sd = "SD",
                                   se = "SE",
                                   cv = "CV (%)",
                                   geom_cv = "CV % Geometric Mean"
                                 ),
                                 row_labels = NULL,
                                 do_summarize_row_groups = FALSE,
                                 split_col_vars = TRUE,
                                 imp_rule = NULL,
                                 avalcat_var = "AVALCAT1",
                                 cache = FALSE,
                                 .indent_mods = NULL,
                                 na_level = lifecycle::deprecated(),
                                 na_str = default_na_str(),
                                 nested = TRUE,
                                 .formats = NULL,
                                 .aligns = NULL) {
  extra_args <- list(...)
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "analyze_vars_in_cols(na_level)", "analyze_vars_in_cols(na_str)")
    na_str <- na_level
  }

  checkmate::assert_string(na_str, na.ok = TRUE, null.ok = TRUE)
  checkmate::assert_character(row_labels, null.ok = TRUE)
  checkmate::assert_int(.indent_mods, null.ok = TRUE)
  checkmate::assert_flag(nested)
  checkmate::assert_flag(split_col_vars)
  checkmate::assert_flag(do_summarize_row_groups)

  # Filtering
  met_grps <- paste0("analyze_vars", c("_numeric", "_counts"))
  .stats <- get_stats(met_grps, stats_in = .stats)
  formats_v <- get_formats_from_stats(stats = .stats, formats_in = .formats)
  labels_v <- get_labels_from_stats(stats = .stats, labels_in = .labels)
  if ("control" %in% names(extra_args)) labels_v <- labels_v %>% labels_use_control(extra_args[["control"]], .labels)

  # Check for vars in the case that one or more are used
  if (length(vars) == 1) {
    vars <- rep(vars, length(.stats))
  } else if (length(vars) != length(.stats)) {
    stop(
      "Analyzed variables (vars) does not have the same ",
      "number of elements of specified statistics (.stats)."
    )
  }

  if (split_col_vars) {
    # Checking there is not a previous identical column split
    clyt <- tail(clayout(lyt), 1)[[1]]

    dummy_lyt <- split_cols_by_multivar(
      lyt = basic_table(),
      vars = vars,
      varlabels = labels_v
    )

    if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
      stop(
        "Column split called again with the same values. ",
        "This can create many unwanted columns. Please consider adding ",
        "split_col_vars = FALSE to the last call of ",
        deparse(sys.calls()[[sys.nframe() - 1]]), "."
      )
    }

    # Main col split
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = vars,
      varlabels = labels_v
    )
  }

  env <- new.env() # create caching environment

  if (do_summarize_row_groups) {
    if (length(unique(vars)) > 1) {
      stop("When using do_summarize_row_groups only one label level var should be inserted.")
    }

    # Function list for do_summarize_row_groups. Slightly different handling of labels
    cfun_list <- Map(
      function(stat, use_cache, cache_env) {
        function(u, .spl_context, labelstr, .df_row, ...) {
          # Statistic
          var_row_val <- paste(
            gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
            paste(.spl_context$value, collapse = "_"),
            sep = "_"
          )
          if (use_cache) {
            if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
            x_stats <- cache_env[[var_row_val]]
          } else {
            x_stats <- s_summary(u, ...)
          }

          if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
            res <- x_stats[[stat]]
          } else {
            timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1)))
            res_imp <- imputation_rule(
              .df_row, x_stats, stat,
              imp_rule = imp_rule,
              post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0,
              avalcat_var = avalcat_var
            )
            res <- res_imp[["val"]]
            na_str <- res_imp[["na_str"]]
          }

          # Label check and replacement
          if (length(row_labels) > 1) {
            if (!(labelstr %in% names(row_labels))) {
              stop(
                "Replacing the labels in do_summarize_row_groups needs a named vector",
                "that contains the split values. In the current split variable ",
                .spl_context$split[nrow(.spl_context)],
                " the labelstr value (split value by default) ", labelstr, " is not in",
                " row_labels names: ", names(row_labels)
              )
            }
            lbl <- unlist(row_labels[labelstr])
          } else {
            lbl <- labelstr
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_str,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
            align = .aligns
          )
        }
      },
      stat = .stats,
      use_cache = cache,
      cache_env = replicate(length(.stats), env)
    )

    # Main call to rtables
    summarize_row_groups(
      lyt = lyt,
      var = unique(vars),
      cfun = cfun_list,
      na_str = na_str,
      extra_args = extra_args
    )
  } else {
    # Function list for analyze_colvars
    afun_list <- Map(
      function(stat, use_cache, cache_env) {
        function(u, .spl_context, .df_row, ...) {
          # Main statistics
          var_row_val <- paste(
            gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
            paste(.spl_context$value, collapse = "_"),
            sep = "_"
          )
          if (use_cache) {
            if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
            x_stats <- cache_env[[var_row_val]]
          } else {
            x_stats <- s_summary(u, ...)
          }

          if (is.null(imp_rule) || !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
            res <- x_stats[[stat]]
          } else {
            timept <- as.numeric(gsub(".*?([0-9\\.]+).*", "\\1", tail(.spl_context$value, 1)))
            res_imp <- imputation_rule(
              .df_row, x_stats, stat,
              imp_rule = imp_rule,
              post = grepl("Predose", tail(.spl_context$value, 1)) || timept > 0,
              avalcat_var = avalcat_var
            )
            res <- res_imp[["val"]]
            na_str <- res_imp[["na_str"]]
          }

          if (is.list(res)) {
            if (length(res) > 1) {
              stop("The analyzed column produced more than one category of results.")
            } else {
              res <- unlist(res)
            }
          }

          # Label from context
          label_from_context <- .spl_context$value[nrow(.spl_context)]

          # Label switcher
          if (is.null(row_labels)) {
            lbl <- label_from_context
          } else {
            if (length(row_labels) > 1) {
              if (!(label_from_context %in% names(row_labels))) {
                stop(
                  "Replacing the labels in do_summarize_row_groups needs a named vector",
                  "that contains the split values. In the current split variable ",
                  .spl_context$split[nrow(.spl_context)],
                  " the split value ", label_from_context, " is not in",
                  " row_labels names: ", names(row_labels)
                )
              }
              lbl <- unlist(row_labels[label_from_context])
            } else {
              lbl <- row_labels
            }
          }

          # Cell creation
          rcell(res,
            label = lbl,
            format = formats_v[names(formats_v) == stat][[1]],
            format_na_str = na_str,
            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
            align = .aligns
          )
        }
      },
      stat = .stats,
      use_cache = cache,
      cache_env = replicate(length(.stats), env)
    )

    # Main call to rtables
    analyze_colvars(lyt,
      afun = afun_list,
      na_str = na_str,
      nested = nested,
      extra_args = extra_args
    )
  }
}

# Help function
get_last_col_split <- function(lyt) {
  tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Binary (Response) Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
#' (response) outcome. The treatment arm variable must have exactly 2 levels,
#' where the first one is taken as reference and the estimated odds ratios are
#' for the comparison of the second level vs. the first one.
#'
#' The (conditional) logistic regression model which is fit is:
#'
#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables:
#'   needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()]
#'   and [control_logistic()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the
#'   subgroup intervals used for the biomarker variable, including where the
#'   center of the intervals are and their bounds. The second part of the
#'   columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_logistic()] for the available
#'   customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(survival)
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(
#'     PARAMCD == "BESRSPI",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     SEX = factor(SEX)
#'   )
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
#' step_matrix <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.5))
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different logistic regression options, including confidence level.
#' step_matrix2 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = 0.6, degree = 1))
#' )
#'
#' # Use a global constant model. This is helpful as a reference for the subgroup models.
#' step_matrix3 <- fit_rsp_step(
#'   variables = variables,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
#' )
#'
#' # It is also possible to use strata, i.e. use conditional logistic regression models.
#' variables2 <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = "AGE",
#'   response = "RSP",
#'   strata = c("STRATA1", "STRATA2")
#' )
#'
#' step_matrix4 <- fit_rsp_step(
#'   variables = variables2,
#'   data = adrs_f,
#'   control = c(control_logistic(), control_step(bandwidth = 0.6))
#' )
#'
#' @export
fit_rsp_step <- function(variables,
                         data,
                         control = c(control_step(), control_logistic())) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_rsp_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_rsp_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_rsp_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Line plot with the optional table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot with the optional table.
#'
#' @param df (`data.frame`)\cr data set containing all analysis variables.
#' @param alt_counts_df (`data.frame` or `NULL`)\cr data set that will be used (only)
#'   to counts objects in groups for stratification.
#' @param variables (named `character` vector) of variable names in `df` data set. Details are:
#'   * `x` (`character`)\cr name of x-axis variable.
#'   * `y` (`character`)\cr name of y-axis variable.
#'   * `group_var` (`character`)\cr name of grouping variable (or strata), i.e. treatment arm.
#'     Can be `NA` to indicate lack of groups.
#'   * `subject_var` (`character`)\cr name of subject variable. Only applies if `group_var` is
#'      not NULL.
#'   * `paramcd` (`character`)\cr name of the variable for parameter's code. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if `paramcd` is not to be added to the y-axis label or subtitle.
#'   * `y_unit` (`character`)\cr name of variable with units of `y`. Used for y-axis label and plot's subtitle.
#'     Can be `NA` if y unit is not to be added to the y-axis label or subtitle.
#' @param mid (`character` or `NULL`)\cr names of the statistics that will be plotted as midpoints.
#'   All the statistics indicated in `mid` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length one.
#' @param interval (`character` or `NULL`)\cr names of the statistics that will be plotted as intervals.
#'   All the statistics indicated in `interval` variable must be present in the object returned by `sfun`,
#'   and be of a `double` or `numeric` type vector of length two. Set `interval = NULL` if intervals should not be
#'   added to the plot.
#' @param whiskers (`character`)\cr names of the interval whiskers that will be plotted. Names must match names
#'   of the list element `interval` that will be returned by `sfun` (e.g. `mean_ci_lwr` element of
#'   `sfun(x)[["mean_ci"]]`). It is possible to specify one whisker only, or to suppress all whiskers by setting
#'   `interval = NULL`.
#' @param table (`character` or `NULL`)\cr names of the statistics that will be displayed in the table below the plot.
#'   All the statistics indicated in `table` variable must be present in the object returned by `sfun`.
#' @param sfun (`closure`)\cr the function to compute the values of required statistics. It must return a named `list`
#'   with atomic vectors. The names of the `list` elements refer to the names of the statistics and are used by `mid`,
#'   `interval`, `table`. It must be able to accept as input a vector with data for which statistics are computed.
#' @param ... optional arguments to `sfun`.
#' @param mid_type (`character`)\cr controls the type of the `mid` plot, it can be point (`p`), line (`l`),
#'   or point and line (`pl`).
#' @param mid_point_size (`integer` or `double`)\cr controls the font size of the point for `mid` plot.
#' @param position (`character` or `call`)\cr geom element position adjustment, either as a string, or the result of
#'   a call to a position adjustment function.
#' @param legend_title (`character` string)\cr legend title.
#' @param legend_position (`character`)\cr the position of the plot legend (`none`, `left`, `right`, `bottom`, `top`,
#'   or two-element numeric vector).
#' @param ggtheme (`theme`)\cr a graphical theme as provided by `ggplot2` to control styling of the plot.
#' @param x_lab (`character`)\cr x-axis label. If equal to `NULL`, then no label will be added.
#' @param y_lab (`character`)\cr y-axis label. If equal to `NULL`, then no label will be added.
#' @param y_lab_add_paramcd (`logical`)\cr should `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` be added to the
#'   y-axis label `y_lab`?
#' @param y_lab_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the y-axis
#'   label `y_lab`?
#' @param title (`character`)\cr plot title.
#' @param subtitle (`character`)\cr plot subtitle.
#' @param subtitle_add_paramcd (`logical`)\cr should `paramcd`, i.e. `unique(df[[variables["paramcd"]]])` be added to
#'   the plot's subtitle `subtitle`?
#' @param subtitle_add_unit (`logical`)\cr should y unit, i.e. `unique(df[[variables["y_unit"]]])` be added to the
#'   plot's subtitle `subtitle`?
#' @param caption (`character`)\cr optional caption below the plot.
#' @param table_format (named `character` or `NULL`)\cr format patterns for descriptive statistics used in the
#'   (optional) table appended to the plot. It is passed directly to the `h_format_row` function through the `format`
#'   parameter. Names of `table_format` must match the names of statistics returned by `sfun` function.
#' @param table_labels (named `character` or `NULL`)\cr labels for descriptive statistics used in the (optional) table
#'   appended to the plot. Names of `table_labels` must match the names of statistics returned by `sfun` function.
#' @param table_font_size (`integer` or `double`)\cr controls the font size of values in the table.
#' @param newpage (`logical`)\cr should plot be drawn on new page?
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` line plot (and statistics table if applicable).
#'
#' @examples
#' library(nestcolor)
#'
#' adsl <- tern_ex_adsl
#' adlb <- tern_ex_adlb %>% dplyr::filter(ANL01FL == "Y", PARAMCD == "ALT", AVISIT != "SCREENING")
#' adlb$AVISIT <- droplevels(adlb$AVISIT)
#' adlb <- dplyr::mutate(adlb, AVISIT = forcats::fct_reorder(AVISIT, AVISITN, min))
#'
#' # Mean with CI
#' g_lineplot(adlb, adsl, subtitle = "Laboratory Test:")
#'
#' # Mean with CI, no stratification with group_var
#' g_lineplot(adlb, variables = control_lineplot_vars(group_var = NA))
#'
#' # Mean, upper whisker of CI, no group_var(strata) counts N
#' g_lineplot(
#'   adlb,
#'   whiskers = "mean_ci_upr",
#'   title = "Plot of Mean and Upper 95% Confidence Limit by Visit"
#' )
#'
#' # Median with CI
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   mid = "median",
#'   interval = "median_ci",
#'   whiskers = c("median_ci_lwr", "median_ci_upr"),
#'   title = "Plot of Median and 95% Confidence Limits by Visit"
#' )
#'
#' # Mean, +/- SD
#' g_lineplot(adlb, adsl,
#'   interval = "mean_sdi",
#'   whiskers = c("mean_sdi_lwr", "mean_sdi_upr"),
#'   title = "Plot of Median +/- SD by Visit"
#' )
#'
#' # Mean with CI plot with stats table
#' g_lineplot(adlb, adsl, table = c("n", "mean", "mean_ci"))
#'
#' # Mean with CI, table and customized confidence level
#' g_lineplot(
#'   adlb,
#'   adsl,
#'   table = c("n", "mean", "mean_ci"),
#'   control = control_analyze_vars(conf_level = 0.80),
#'   title = "Plot of Mean and 80% Confidence Limits by Visit"
#' )
#'
#' # Mean with CI, table, filtered data
#' adlb_f <- dplyr::filter(adlb, ARMCD != "ARM A" | AVISIT == "BASELINE")
#' g_lineplot(adlb_f, table = c("n", "mean"))
#'
#' @export
g_lineplot <- function(df,
                       alt_counts_df = NULL,
                       variables = control_lineplot_vars(),
                       mid = "mean",
                       interval = "mean_ci",
                       whiskers = c("mean_ci_lwr", "mean_ci_upr"),
                       table = NULL,
                       sfun = tern::s_summary,
                       ...,
                       mid_type = "pl",
                       mid_point_size = 2,
                       position = ggplot2::position_dodge(width = 0.4),
                       legend_title = NULL,
                       legend_position = "bottom",
                       ggtheme = nestcolor::theme_nest(),
                       x_lab = obj_label(df[[variables[["x"]]]]),
                       y_lab = NULL,
                       y_lab_add_paramcd = TRUE,
                       y_lab_add_unit = TRUE,
                       title = "Plot of Mean and 95% Confidence Limits by Visit",
                       subtitle = "",
                       subtitle_add_paramcd = TRUE,
                       subtitle_add_unit = TRUE,
                       caption = NULL,
                       table_format = summary_formats(),
                       table_labels = summary_labels(),
                       table_font_size = 3,
                       newpage = TRUE,
                       col = NULL) {
  checkmate::assert_character(variables, any.missing = TRUE)
  checkmate::assert_character(mid, null.ok = TRUE)
  checkmate::assert_character(interval, null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  checkmate::assert_string(title, null.ok = TRUE)
  checkmate::assert_string(subtitle, null.ok = TRUE)

  if (is.character(interval)) {
    checkmate::assert_vector(whiskers, min.len = 0, max.len = 2)
  }

  if (length(whiskers) == 1) {
    checkmate::assert_character(mid)
  }

  if (is.character(mid)) {
    checkmate::assert_scalar(mid_type)
    checkmate::assert_subset(mid_type, c("pl", "p", "l"))
  }

  x <- variables[["x"]]
  y <- variables[["y"]]
  paramcd <- variables["paramcd"] # NA if paramcd == NA or it is not in variables
  y_unit <- variables["y_unit"] # NA if y_unit == NA or it is not in variables
  if (is.na(variables["group_var"])) {
    group_var <- NULL # NULL if group_var == NA or it is not in variables
  } else {
    group_var <- variables[["group_var"]]
    subject_var <- variables[["subject_var"]]
  }
  checkmate::assert_flag(y_lab_add_paramcd, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_paramcd, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_paramcd) || (!is.null(subtitle) && subtitle_add_paramcd)) {
    checkmate::assert_false(is.na(paramcd))
    checkmate::assert_scalar(unique(df[[paramcd]]))
  }

  checkmate::assert_flag(y_lab_add_unit, null.ok = TRUE)
  checkmate::assert_flag(subtitle_add_unit, null.ok = TRUE)
  if ((!is.null(y_lab) && y_lab_add_unit) || (!is.null(subtitle) && subtitle_add_unit)) {
    checkmate::assert_false(is.na(y_unit))
    checkmate::assert_scalar(unique(df[[y_unit]]))
  }

  if (!is.null(group_var) && !is.null(alt_counts_df)) {
    checkmate::assert_set_equal(unique(alt_counts_df[[group_var]]), unique(df[[group_var]]))
  }

  ####################################### |
  # ---- Compute required statistics ----
  ####################################### |
  if (!is.null(group_var)) {
    df_grp <- tidyr::expand(df, .data[[group_var]], .data[[x]]) # expand based on levels of factors
  } else {
    df_grp <- tidyr::expand(df, NULL, .data[[x]])
  }
  df_grp <- df_grp %>%
    dplyr::full_join(y = df[, c(group_var, x, y)], by = c(group_var, x), multiple = "all") %>%
    dplyr::group_by_at(c(group_var, x))

  df_stats <- df_grp %>%
    dplyr::summarise(
      data.frame(t(do.call(c, unname(sfun(.data[[y]], ...)[c(mid, interval)])))),
      .groups = "drop"
    )

  df_stats <- df_stats[!is.na(df_stats[[mid]]), ]

  # add number of objects N in group_var (strata)
  if (!is.null(group_var) && !is.null(alt_counts_df)) {
    strata_N <- paste0(group_var, "_N") # nolint

    df_N <- stats::aggregate(eval(parse(text = subject_var)) ~ eval(parse(text = group_var)), data = alt_counts_df, FUN = function(x) length(unique(x))) # nolint
    colnames(df_N) <- c(group_var, "N") # nolint
    df_N[[strata_N]] <- paste0(df_N[[group_var]], " (N = ", df_N$N, ")") # nolint

    # strata_N should not be in clonames(df_stats)
    checkmate::assert_disjunct(strata_N, colnames(df_stats))

    df_stats <- merge(x = df_stats, y = df_N[, c(group_var, strata_N)], by = group_var)
  } else if (!is.null(group_var)) {
    strata_N <- group_var # nolint
  } else {
    strata_N <- NULL # nolint
  }

  ############################################### |
  # ---- Prepare certain plot's properties. ----
  ############################################### |
  # legend title
  if (is.null(legend_title) && !is.null(group_var) && legend_position != "none") {
    legend_title <- attr(df[[group_var]], "label")
  }

  # y label
  if (!is.null(y_lab)) {
    if (y_lab_add_paramcd) {
      y_lab <- paste(y_lab, unique(df[[paramcd]]))
    }

    if (y_lab_add_unit) {
      y_lab <- paste0(y_lab, " (", unique(df[[y_unit]]), ")")
    }

    y_lab <- trimws(y_lab)
  }

  # subtitle
  if (!is.null(subtitle)) {
    if (subtitle_add_paramcd) {
      subtitle <- paste(subtitle, unique(df[[paramcd]]))
    }

    if (subtitle_add_unit) {
      subtitle <- paste0(subtitle, " (", unique(df[[y_unit]]), ")")
    }

    subtitle <- trimws(subtitle)
  }

  ############################### |
  # ---- Build plot object. ----
  ############################### |
  p <- ggplot2::ggplot(
    data = df_stats,
    mapping = ggplot2::aes(
      x = .data[[x]], y = .data[[mid]],
      color = if (is.null(strata_N)) NULL else .data[[strata_N]],
      shape = if (is.null(strata_N)) NULL else .data[[strata_N]],
      lty = if (is.null(strata_N)) NULL else .data[[strata_N]],
      group = if (is.null(strata_N)) NULL else .data[[strata_N]]
    )
  )

  if (!is.null(mid)) {
    # points
    if (grepl("p", mid_type, fixed = TRUE)) {
      p <- p + ggplot2::geom_point(position = position, size = mid_point_size, na.rm = TRUE)
    }

    # lines
    # further conditions in if are to ensure that not all of the groups consist of only one observation
    if (grepl("l", mid_type, fixed = TRUE) && !is.null(group_var) &&
      !all(dplyr::summarise(df_grp, count_n = dplyr::n())[["count_n"]] == 1L)) { # nolint
      p <- p + ggplot2::geom_line(position = position, na.rm = TRUE)
    }
  }

  # interval
  if (!is.null(interval)) {
    p <- p +
      ggplot2::geom_errorbar(
        ggplot2::aes(ymin = .data[[whiskers[1]]], ymax = .data[[whiskers[max(1, length(whiskers))]]]),
        width = 0.45,
        position = position
      )

    if (length(whiskers) == 1) { # lwr or upr only; mid is then required
      # workaround as geom_errorbar does not provide single-direction whiskers
      p <- p +
        ggplot2::geom_linerange(
          data = df_stats[!is.na(df_stats[[whiskers]]), ], # as na.rm =TRUE does not suppress warnings
          ggplot2::aes(ymin = .data[[mid]], ymax = .data[[whiskers]]),
          position = position,
          na.rm = TRUE,
          show.legend = FALSE
        )
    }
  }

  p <- p +
    ggplot2::scale_y_continuous(labels = scales::comma) +
    ggplot2::labs(
      title = title,
      subtitle = subtitle,
      caption = caption,
      color = legend_title,
      lty = legend_title,
      shape = legend_title,
      x = x_lab,
      y = y_lab
    )

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ggtheme)) {
    p <- p + ggtheme
  } else {
    p <- p +
      ggplot2::theme_bw() +
      ggplot2::theme(
        legend.key.width = grid::unit(1, "cm"),
        legend.position = legend_position,
        legend.direction = ifelse(
          legend_position %in% c("top", "bottom"),
          "horizontal",
          "vertical"
        )
      )
  }

  ############################################################# |
  # ---- Optionally, add table to the bottom of the plot. ----
  ############################################################# |
  if (!is.null(table)) {
    df_stats_table <- df_grp %>%
      dplyr::summarise(
        h_format_row(
          x = sfun(.data[[y]], ...)[table],
          format = table_format,
          labels = table_labels
        ),
        .groups = "drop"
      )

    stats_lev <- rev(setdiff(colnames(df_stats_table), c(group_var, x)))

    df_stats_table <- df_stats_table %>%
      tidyr::pivot_longer(
        cols = -dplyr::all_of(c(group_var, x)),
        names_to = "stat",
        values_to = "value",
        names_ptypes = list(stat = factor(levels = stats_lev))
      )

    tbl <- ggplot2::ggplot(
      df_stats_table,
      ggplot2::aes(x = .data[[x]], y = .data[["stat"]], label = .data[["value"]])
    ) +
      ggplot2::geom_text(size = table_font_size) +
      ggplot2::theme_bw() +
      ggplot2::theme(
        panel.border = ggplot2::element_blank(),
        panel.grid.major = ggplot2::element_blank(),
        panel.grid.minor = ggplot2::element_blank(),
        axis.ticks = ggplot2::element_blank(),
        axis.title = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_blank(),
        axis.text.y = ggplot2::element_text(margin = ggplot2::margin(t = 0, r = 0, b = 0, l = 5)),
        strip.text = ggplot2::element_text(hjust = 0),
        strip.text.x = ggplot2::element_text(margin = ggplot2::margin(1.5, 0, 1.5, 0, "pt")),
        strip.background = ggplot2::element_rect(fill = "grey95", color = NA),
        legend.position = "none"
      )

    if (!is.null(group_var)) {
      tbl <- tbl + ggplot2::facet_wrap(facets = group_var, ncol = 1)
    }

    # align plot and table
    cowplot::plot_grid(p, tbl, ncol = 1, align = "v", axis = "tblr")
  } else {
    p
  }
}

#' Helper function to get the right formatting in the optional table in `g_lineplot`.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param x (named `list`)\cr list of numerical values to be formatted and optionally labeled.
#'   Elements of `x` must be `numeric` vectors.
#' @param format (named `character` or `NULL`)\cr format patterns for `x`. Names of the `format` must
#'   match the names of `x`. This parameter is passed directly to the `rtables::format_rcell`
#'   function through the `format` parameter.
#' @param labels (named `character` or `NULL`)\cr optional labels for `x`. Names of the `labels` must
#'   match the names of `x`. When a label is not specified for an element of `x`,
#'   then this function tries to use `label` or `names` (in this order) attribute of that element
#'   (depending on which one exists and it is not `NULL` or `NA` or `NaN`). If none of these attributes
#'   are attached to a given element of `x`, then the label is automatically generated.
#'
#' @return A single row `data.frame` object.
#'
#' @examples
#' mean_ci <- c(48, 51)
#' x <- list(mean = 50, mean_ci = mean_ci)
#' format <- c(mean = "xx.x", mean_ci = "(xx.xx, xx.xx)")
#' labels <- c(mean = "My Mean")
#' h_format_row(x, format, labels)
#'
#' attr(mean_ci, "label") <- "Mean 95% CI"
#' x <- list(mean = 50, mean_ci = mean_ci)
#' h_format_row(x, format, labels)
#'
#' @export
h_format_row <- function(x, format, labels = NULL) {
  # cell: one row, one column data.frame
  format_cell <- function(x, format, label = NULL) {
    fc <- format_rcell(x = x, format = unlist(format))
    if (is.na(fc)) {
      fc <- "NA"
    }
    x_label <- attr(x, "label")
    if (!is.null(label) && !is.na(label)) {
      names(fc) <- label
    } else if (!is.null(x_label) && !is.na(x_label)) {
      names(fc) <- x_label
    } else if (length(x) == length(fc)) {
      names(fc) <- names(x)
    }
    as.data.frame(t(fc))
  }

  row <- do.call(
    cbind,
    lapply(
      names(x), function(xn) format_cell(x[[xn]], format = format[xn], label = labels[xn])
    )
  )

  row
}

#' Control Function for `g_lineplot` Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Default values for `variables` parameter in `g_lineplot` function.
#' A variable's default value can be overwritten for any variable.
#'
#' @param x (`character`)\cr x variable name.
#' @param y (`character`)\cr y variable name.
#' @param group_var (`character` or `NA`)\cr group variable name.
#' @param strata (`character` or `NA`)\cr deprecated - group variable name.
#' @param subject_var (`character` or `NA`)\cr subject variable name.
#' @param cohort_id (`character` or `NA`)\cr deprecated - subject variable name.
#' @param paramcd (`character` or `NA`)\cr `paramcd` variable name.
#' @param y_unit (`character` or `NA`)\cr `y_unit` variable name.
#'
#' @return A named character vector of variable names.
#'
#' @examples
#' control_lineplot_vars()
#' control_lineplot_vars(group_var = NA)
#'
#' @export
control_lineplot_vars <- function(x = "AVISIT", y = "AVAL", group_var = "ARM", paramcd = "PARAMCD", y_unit = "AVALU",
                                  subject_var = "USUBJID", strata = lifecycle::deprecated(),
                                  cohort_id = lifecycle::deprecated()) {
  if (lifecycle::is_present(strata)) {
    lifecycle::deprecate_warn("0.9.2", "control_lineplot_vars(strata)", "control_lineplot_vars(group_var)")
    group_var <- strata
  }

  if (lifecycle::is_present(cohort_id)) {
    lifecycle::deprecate_warn("0.9.2", "control_lineplot_vars(cohort_id)", "control_lineplot_vars(subject_id)")
    subject_id <- cohort_id
  }

  checkmate::assert_string(x)
  checkmate::assert_string(y)
  checkmate::assert_string(group_var, na.ok = TRUE)
  checkmate::assert_string(subject_var, na.ok = TRUE)
  checkmate::assert_string(paramcd, na.ok = TRUE)
  checkmate::assert_string(y_unit, na.ok = TRUE)

  variables <- c(x = x, y = y, group_var = group_var, paramcd = paramcd, y_unit = y_unit, subject_var = subject_var)
  return(variables)
}

#' Multivariate Logistic Regression Table
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
#' category or specified values and corresponding Wald confidence intervals as default but allow user
#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
#' that covariate has no effect on response in model containing all specified covariates.
#' Allow option to include one two-way interaction and present similar output for
#' each interaction degree of freedom.
#'
#' @inheritParams argument_convention
#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
#'
#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
#'   Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
#'
#' @note For the formula, the variable names need to be standard `data.frame` column names without
#'   special characters.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' # flagging empty strings with "_"
#' df <- df_explicit_na(df, na_level = "_")
#' df2 <- df_explicit_na(df2, na_level = "_")
#'
#' result1 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df)
#' result1
#'
#' result2 <- basic_table() %>%
#'   summarize_logistic(
#'     conf_level = 0.95,
#'     drop_and_remove_str = "_"
#'   ) %>%
#'   build_table(df = df2)
#' result2
#'
#' @export
#' @order 1
summarize_logistic <- function(lyt,
                               conf_level,
                               drop_and_remove_str = "",
                               .indent_mods = NULL) {
  # checks
  checkmate::assert_string(drop_and_remove_str)

  sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
  sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
  sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
  split_fun <- drop_and_remove_levels(drop_and_remove_str)

  lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
  lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
  lyt <- sum_logistic_variable_test(lyt)
  lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
  lyt <- sum_logistic_term_estimates(lyt)
  lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
  lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
  lyt <- sum_logistic_odds_ratios(lyt)
  lyt
}

#' Fit for Logistic Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fit a (conditional) logistic regression model.
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the (conditional) logistic regression model on the left hand
#'   side of the formula.
#'
#' @return A fitted logistic regression model.
#'
#' @section Model Specification:
#'
#' The `variables` list needs to include the following elements:
#'   * `arm`: Treatment arm variable name.
#'   * `response`: The response arm variable name. Usually this is a 0/1 variable.
#'   * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
#'   * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
#'     included in `covariates`. Then the interaction with the treatment arm is included in the model.
#'
#' @examples
#' library(dplyr)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' @export
fit_logistic <- function(data,
                         variables = list(
                           response = "Response",
                           arm = "ARMCD",
                           covariates = NULL,
                           interaction = NULL,
                           strata = NULL
                         ),
                         response_definition = "response") {
  assert_df_with_variables(data, variables)
  checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
  checkmate::assert_string(response_definition)
  checkmate::assert_true(grepl("response", response_definition))

  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (!is.null(variables$covariates)) {
    form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
  }
  if (!is.null(variables$interaction)) {
    checkmate::assert_string(variables$interaction)
    checkmate::assert_subset(variables$interaction, variables$covariates)
    form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  formula <- stats::as.formula(form)
  if (is.null(variables$strata)) {
    stats::glm(
      formula = formula,
      data = data,
      family = stats::binomial("logit")
    )
  } else {
    clogit_with_tryCatch(
      formula = formula,
      data = data,
      x = TRUE
    )
  }
}

#' Custom Tidy Method for Binomial GLM Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
#' with `binomial` family.
#'
#' @inheritParams argument_convention
#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
#' @param x logistic regression model fitted by [stats::glm()] with "binomial" family.
#'
#' @return A `data.frame` containing the tidied model.
#'
#' @method tidy glm
#'
#' @seealso [h_logistic_regression] for relevant helper functions.
#'
#' @examples
#' library(dplyr)
#' library(broom)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
#'   mutate(
#'     Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
#'     RACE = factor(RACE),
#'     SEX = factor(SEX)
#'   )
#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
#' mod1 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE")
#'   )
#' )
#' mod2 <- fit_logistic(
#'   data = adrs_f,
#'   variables = list(
#'     response = "Response",
#'     arm = "ARMCD",
#'     covariates = c("AGE", "RACE"),
#'     interaction = "AGE"
#'   )
#' )
#'
#' df <- tidy(mod1, conf_level = 0.99)
#' df2 <- tidy(mod2, conf_level = 0.99)
#'
#' @export
tidy.glm <- function(x, # nolint
                     conf_level = 0.95,
                     at = NULL,
                     ...) {
  checkmate::assert_class(x, "glm")
  checkmate::assert_set_equal(x$family$family, "binomial")

  terms_name <- attr(stats::terms(x), "term.labels")
  xs_class <- attr(x$terms, "dataClasses")
  interaction <- terms_name[which(!terms_name %in% names(xs_class))]
  df <- if (length(interaction) == 0) {
    h_logistic_simple_terms(
      x = terms_name,
      fit_glm = x,
      conf_level = conf_level
    )
  } else {
    h_logistic_inter_terms(
      x = terms_name,
      fit_glm = x,
      conf_level = conf_level,
      at = at
    )
  }
  for (var in c("variable", "term", "interaction", "reference")) {
    df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
  }
  df
}

#' Logistic Regression Multivariate Column Layout Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Layout-creating function which creates a multivariate column layout summarizing logistic
#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
#'
#' @inheritParams argument_convention
#'
#' @return A layout object suitable for passing to further layouting functions. Adding this
#'   function to an `rtable` layout will split the table into columns corresponding to
#'   statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
#'
#' @export
logistic_regression_cols <- function(lyt,
                                     conf_level = 0.95) {
  vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
  var_labels <- c(
    df = "Degrees of Freedom",
    estimate = "Parameter Estimate",
    std_error = "Standard Error",
    odds_ratio = "Odds Ratio",
    ci = paste("Wald", f_conf_level(conf_level)),
    pvalue = "p-value"
  )
  split_cols_by_multivar(
    lyt = lyt,
    vars = vars,
    varlabels = var_labels
  )
}

#' Logistic Regression Summary Table Constructor Function
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
#'   content function.
#'
#' @return A content function.
#'
#' @export
logistic_summary_by_flag <- function(flag_var, na_str = default_na_str(), .indent_mods = NULL) {
  checkmate::assert_string(flag_var)
  function(lyt) {
    cfun_list <- list(
      df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
      estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
      odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
      ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
      pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx | (<0.0001)", .indent_mods = .indent_mods)
    )
    summarize_row_groups(
      lyt = lyt,
      cfun = cfun_list,
      na_str = na_str
    )
  }
}

#' Combination Functions Class
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
#' can be combined and negated with the logical operators.
#'
#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
#' @param x (`CombinationFunction`)\cr the function which should be negated.
#'
#' @return Returns a logical value indicating whether the left hand side of the equation equals the right hand side.
#'
#' @exportClass CombinationFunction
#' @export CombinationFunction
#'
#' @examples
#' higher <- function(a) {
#'   force(a)
#'   CombinationFunction(
#'     function(x) {
#'       x > a
#'     }
#'   )
#' }
#'
#' lower <- function(b) {
#'   force(b)
#'   CombinationFunction(
#'     function(x) {
#'       x < b
#'     }
#'   )
#' }
#'
#' c1 <- higher(5)
#' c2 <- lower(10)
#' c3 <- higher(5) & lower(10)
#' c3(7)
#'
#' @aliases CombinationFunction-class
#' @name combination_function
CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint

#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "AND" of the two individual results.
#'
#' @export
methods::setMethod(
  "&",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) && e2(...)
    })
  }
)

#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the two argument functions. The result
#'   is then the "OR" of the two individual results.
#'
#' @export
methods::setMethod(
  "|",
  signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
  definition = function(e1, e2) {
    CombinationFunction(function(...) {
      e1(...) || e2(...)
    })
  }
)

#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
#'   The resulting object is of the same class, and evaluates the original function. The result
#'   is then the opposite of this results.
#'
#' @export
methods::setMethod(
  "!",
  signature = c(x = "CombinationFunction"),
  definition = function(x) {
    CombinationFunction(function(...) {
      !x(...)
    })
  }
)

#' Helper Function to create a map dataframe that can be used in `trim_levels_to_map` split function.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a map dataframe from the input dataset, which can be used as an argument in the
#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
#'   abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
#'   `abnormal = list(Low = "LOW", High = "HIGH"))`
#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
#'
#' @return A map `data.frame`.
#'
#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
#'   `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
#'   `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
#'   for low direction and at least one observation with high range is not missing for high direction.
#'
#' @examples
#' adlb <- df_explicit_na(tern_ex_adlb)
#'
#' h_map_for_count_abnormal(
#'   df = adlb,
#'   variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "default",
#'   na_str = "<Missing>"
#' )
#'
#' df <- data.frame(
#'   USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
#'   AVISIT = c(
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2),
#'     rep("WEEK 1", 2),
#'     rep("WEEK 2", 2)
#'   ),
#'   PARAM = rep(c("ALT", "CPR"), 6),
#'   ANRIND = c(
#'     "NORMAL", "NORMAL", "LOW",
#'     "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
#'   ),
#'   ANRLO = rep(5, 12),
#'   ANRHI = rep(20, 12)
#' )
#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
#' h_map_for_count_abnormal(
#'   df = df,
#'   variables = list(
#'     anl = "ANRIND",
#'     split_rows = c("PARAM"),
#'     range_low = "ANRLO",
#'     range_high = "ANRHI"
#'   ),
#'   abnormal = list(low = c("LOW"), high = c("HIGH")),
#'   method = "range",
#'   na_str = "<Missing>"
#' )
#'
#' @export
h_map_for_count_abnormal <- function(df,
                                     variables = list(
                                       anl = "ANRIND",
                                       split_rows = c("PARAM"),
                                       range_low = "ANRLO",
                                       range_high = "ANRHI"
                                     ),
                                     abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")),
                                     method = c("default", "range"),
                                     na_level = lifecycle::deprecated(),
                                     na_str = "<Missing>") {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "h_map_for_count_abnormal(na_level)", "h_map_for_count_abnormal(na_str)")
    na_str <- na_level
  }

  method <- match.arg(method)
  checkmate::assert_subset(c("anl", "split_rows"), names(variables))
  checkmate::assert_false(anyNA(df[variables$split_rows]))
  assert_df_with_variables(df,
    variables = list(anl = variables$anl, split_rows = variables$split_rows),
    na_level = na_str
  )
  assert_df_with_factors(df, list(val = variables$anl))
  assert_valid_factor(df[[variables$anl]], any.missing = FALSE)
  assert_list_of_variables(variables)
  checkmate::assert_list(abnormal, types = "character", len = 2)

  # Drop usued levels from df as they are not supposed to be in the final map
  df <- droplevels(df)

  normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal))

  # Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
  checkmate::assert_vector(normal_value, len = 1)

  # Default method will only have what is observed in the df, and records with all normal values will be excluded to
  # avoid error in layout building.
  if (method == "default") {
    df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal))
    map <- unique(df_abnormal[c(variables$split_rows, variables$anl)])
    map_normal <- unique(subset(map, select = variables$split_rows))
    map_normal[[variables$anl]] <- normal_value
    map <- rbind(map, map_normal)
  } else if (method == "range") {
    # range method follows the rule that at least one observation with ANRLO > 0 for low
    # direction and at least one observation with ANRHI is not missing for high direction.
    checkmate::assert_subset(c("range_low", "range_high"), names(variables))
    checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal)))

    assert_df_with_variables(df,
      variables = list(
        range_low = variables$range_low,
        range_high = variables$range_high
      )
    )

    # Define low direction of map
    df_low <- subset(df, df[[variables$range_low]] > 0)
    map_low <- unique(df_low[variables$split_rows])
    low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"]))
    low_levels_df <- as.data.frame(low_levels)
    colnames(low_levels_df) <- variables$anl
    low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE))
    rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed
    map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE]
    map_low <- cbind(map_low, low_levels_df)

    # Define high direction of map
    df_high <- subset(df, df[[variables$range_high]] != na_str | !is.na(df[[variables$range_high]]))
    map_high <- unique(df_high[variables$split_rows])
    high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"]))
    high_levels_df <- as.data.frame(high_levels)
    colnames(high_levels_df) <- variables$anl
    high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE))
    rownames(map_high) <- NULL
    map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE]
    map_high <- cbind(map_high, high_levels_df)

    # Define normal of map
    map_normal <- unique(rbind(map_low, map_high)[variables$split_rows])
    map_normal[variables$anl] <- normal_value

    map <- rbind(map_low, map_high, map_normal)
  }

  # map should be all characters
  map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE)

  # sort the map final output by split_rows variables
  for (i in rev(seq_len(length(variables$split_rows)))) {
    map <- map[order(map[[i]]), ]
  }
  map
}

#' Create a Forest Plot based on a Table
#'
#' Create a forest plot from any [rtables::rtable()] object that has a
#' column with a single value and a column with 2 values.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams grid::gTree
#' @inheritParams argument_convention
#' @param tbl (`rtable`)
#' @param col_x (`integer`)\cr column index with estimator. By default tries to get this from
#'   `tbl` attribute `col_x`, otherwise needs to be manually specified.
#' @param col_ci (`integer`)\cr column index with confidence intervals. By default tries
#'   to get this from `tbl` attribute `col_ci`, otherwise needs to be manually specified.
#' @param vline (`number`)\cr x coordinate for vertical line, if `NULL` then the line is omitted.
#' @param forest_header (`character`, length 2)\cr text displayed to the left and right of `vline`, respectively.
#'   If `vline = NULL` then `forest_header` needs to be `NULL` too.
#'   By default tries to get this from `tbl` attribute `forest_header`.
#' @param xlim (`numeric`)\cr limits for x axis.
#' @param logx (`flag`)\cr show the x-values on logarithm scale.
#' @param x_at (`numeric`)\cr x-tick locations, if `NULL` they get automatically chosen.
#' @param width_row_names (`unit`)\cr width for row names.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_columns (`unit`)\cr widths for the table columns.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param width_forest (`unit`)\cr width for the forest column.
#'   If `NULL` the widths get automatically calculated. See [grid::unit()].
#' @param col_symbol_size (`integer`)\cr column index from `tbl` containing data to be used
#'   to determine relative size for estimator plot symbol. Typically, the symbol size is proportional
#'   to the sample size used to calculate the estimator. If `NULL`, the same symbol size is used for all subgroups.
#'   By default tries to get this from `tbl` attribute `col_symbol_size`, otherwise needs to be manually specified.
#' @param col (`character`)\cr color(s).
#'
#' @return `gTree` object containing the forest plot and table.
#'
#' @examples
#' \donttest{
#' library(dplyr)
#' library(forcats)
#' library(nestcolor)
#'
#' adrs <- tern_ex_adrs
#' n_records <- 20
#' adrs_labels <- formatters::var_labels(adrs, fill = TRUE)
#' adrs <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
#'   slice(seq_len(n_records)) %>%
#'   droplevels() %>%
#'   mutate(
#'     # Reorder levels of factor to make the placebo group the reference arm.
#'     ARM = fct_relevel(ARM, "B: Placebo"),
#'     rsp = AVALC == "CR"
#'   )
#' formatters::var_labels(adrs) <- c(adrs_labels, "Response")
#' df <- extract_rsp_subgroups(
#'   variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "STRATA2")),
#'   data = adrs
#' )
#' # Full commonly used response table.
#'
#' tbl <- basic_table() %>%
#'   tabulate_rsp_subgroups(df)
#' p <- g_forest(tbl, gp = grid::gpar(fontsize = 10))
#'
#' draw_grob(p)
#'
#' # Odds ratio only table.
#'
#' tbl_or <- basic_table() %>%
#'   tabulate_rsp_subgroups(df, vars = c("n_tot", "or", "ci"))
#' tbl_or
#' p <- g_forest(
#'   tbl_or,
#'   forest_header = c("Comparison\nBetter", "Treatment\nBetter")
#' )
#'
#' draw_grob(p)
#'
#' # Survival forest plot example.
#' adtte <- tern_ex_adtte
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = TRUE)
#' adtte_f <- adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X"),
#'     SEX %in% c("M", "F")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
#'     SEX = droplevels(SEX),
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- list(
#'   "ARM" = adtte_labels["ARM"],
#'   "SEX" = adtte_labels["SEX"],
#'   "AVALU" = adtte_labels["AVALU"],
#'   "is_event" = "Event Flag"
#' )
#' formatters::var_labels(adtte_f)[names(labels)] <- as.character(labels)
#' df <- extract_survival_subgroups(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     arm = "ARM", subgroups = c("SEX", "BMRKR2")
#'   ),
#'   data = adtte_f
#' )
#' table_hr <- basic_table() %>%
#'   tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
#' g_forest(table_hr)
#' # Works with any `rtable`.
#' tbl <- rtable(
#'   header = c("E", "CI", "N"),
#'   rrow("", 1, c(.8, 1.2), 200),
#'   rrow("", 1.2, c(1.1, 1.4), 50)
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   col_symbol_size = 3
#' )
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", rcell("A", colspan = 2)),
#'     rrow("", "c1", "c2")
#'   ),
#'   rrow("row 1", 1, c(.8, 1.2)),
#'   rrow("row 2", 1.2, c(1.1, 1.4))
#' )
#' g_forest(
#'   tbl = tbl,
#'   col_x = 1,
#'   col_ci = 2,
#'   xlim = c(0.5, 2),
#'   x_at = c(0.5, 1, 2),
#'   vline = 1,
#'   forest_header = c("Hello", "World")
#' )
#' }
#'
#' @export
g_forest <- function(tbl,
                     col_x = attr(tbl, "col_x"),
                     col_ci = attr(tbl, "col_ci"),
                     vline = 1,
                     forest_header = attr(tbl, "forest_header"),
                     xlim = c(0.1, 10),
                     logx = TRUE,
                     x_at = c(0.1, 1, 10),
                     width_row_names = NULL,
                     width_columns = NULL,
                     width_forest = grid::unit(1, "null"),
                     col_symbol_size = attr(tbl, "col_symbol_size"),
                     col = getOption("ggplot2.discrete.colour")[1],
                     gp = NULL,
                     draw = TRUE,
                     newpage = TRUE) {
  checkmate::assert_class(tbl, "VTableTree")

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  if (is.null(col)) {
    col <- "blue"
  }

  checkmate::assert_number(col_x, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_ci, lower = 0, upper = nc, null.ok = FALSE)
  checkmate::assert_number(col_symbol_size, lower = 0, upper = nc, null.ok = TRUE)
  checkmate::assert_true(col_x > 0)
  checkmate::assert_true(col_ci > 0)
  checkmate::assert_character(col)
  if (!is.null(col_symbol_size)) {
    checkmate::assert_true(col_symbol_size > 0)
  }

  x_e <- vapply(seq_len(nr), function(i) {
    # If a label row is selected NULL is returned with a warning (suppressed)
    xi <- suppressWarnings(as.vector(tbl[i, col_x, drop = TRUE]))

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      xi
    } else {
      NA_real_
    }
  }, numeric(1))

  x_ci <- lapply(seq_len(nr), function(i) {
    xi <- suppressWarnings(as.vector(tbl[i, col_ci, drop = TRUE])) # as above

    if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
      if (length(xi) != 2) {
        stop("ci column needs two elements")
      }
      xi
    } else {
      c(NA_real_, NA_real_)
    }
  })

  lower <- vapply(x_ci, `[`, numeric(1), 1)
  upper <- vapply(x_ci, `[`, numeric(1), 2)

  symbol_size <- if (!is.null(col_symbol_size)) {
    tmp_symbol_size <- vapply(seq_len(nr), function(i) {
      suppressWarnings(xi <- as.vector(tbl[i, col_symbol_size, drop = TRUE]))

      if (!is.null(xi) && !(length(xi) <= 0) && is.numeric(xi)) {
        xi
      } else {
        NA_real_
      }
    }, numeric(1))

    # Scale symbol size.
    tmp_symbol_size <- sqrt(tmp_symbol_size)
    max_size <- max(tmp_symbol_size, na.rm = TRUE)
    # Biggest points have radius is 2 * (1/3.5) lines not to overlap.
    # See forest_dot_line.
    2 * tmp_symbol_size / max_size
  } else {
    NULL
  }

  grob_forest <- forest_grob(
    tbl,
    x_e,
    lower,
    upper,
    vline,
    forest_header,
    xlim,
    logx,
    x_at,
    width_row_names,
    width_columns,
    width_forest,
    symbol_size = symbol_size,
    col = col,
    gp = gp,
    vp = grid::plotViewport(margins = rep(1, 4))
  )

  if (draw) {
    if (newpage) grid::grid.newpage()
    grid::grid.draw(grob_forest)
  }

  invisible(grob_forest)
}

#' Forest Plot Grob
#'
#' @inheritParams g_forest
#' @param tbl ([rtables::rtable()])
#' @param x (`numeric`)\cr coordinate of point.
#' @param lower,upper (`numeric`)\cr lower/upper bound of the confidence interval.
#' @param symbol_size (`numeric`)\cr vector with relative size for plot symbol.
#' If `NULL`, the same symbol size is used.
#'
#' @details
#' The heights get automatically determined.
#'
#' @noRd
#'
#' @examples
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2), "N"),
#'     rrow("", "A", "B", "C", "D")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1, 16),
#'   rrow("row 2", 1.4, 0.8, 1.6, 25),
#'   rrow("row 3", 1.2, 0.8, 1.6, 36)
#' )
#'
#' x <- c(1, 1.4, 1.2)
#' lower <- c(0.8, 0.8, 0.8)
#' upper <- c(1.1, 1.6, 1.6)
#' # numeric vector with multiplication factor to scale each circle radius
#' # default radius is 1/3.5 lines
#' symbol_scale <- c(1, 1.25, 1.5)
#'
#' # Internal function - forest_grob
#' \donttest{
#' p <- forest_grob(tbl, x, lower, upper,
#'   vline = 1, forest_header = c("A", "B"),
#'   x_at = c(.1, 1, 10), xlim = c(0.1, 10), logx = TRUE, symbol_size = symbol_scale,
#'   vp = grid::plotViewport(margins = c(1, 1, 1, 1))
#' )
#'
#' draw_grob(p)
#' }
forest_grob <- function(tbl,
                        x,
                        lower,
                        upper,
                        vline,
                        forest_header,
                        xlim = NULL,
                        logx = FALSE,
                        x_at = NULL,
                        width_row_names = NULL,
                        width_columns = NULL,
                        width_forest = grid::unit(1, "null"),
                        symbol_size = NULL,
                        col = "blue",
                        name = NULL,
                        gp = NULL,
                        vp = NULL) {
  nr <- nrow(tbl)
  if (is.null(vline)) {
    checkmate::assert_true(is.null(forest_header))
  } else {
    checkmate::assert_number(vline)
    checkmate::assert_character(forest_header, len = 2, null.ok = TRUE)
  }

  checkmate::assert_numeric(x, len = nr)
  checkmate::assert_numeric(lower, len = nr)
  checkmate::assert_numeric(upper, len = nr)
  checkmate::assert_numeric(symbol_size, len = nr, null.ok = TRUE)
  checkmate::assert_character(col)

  if (is.null(symbol_size)) {
    symbol_size <- rep(1, nr)
  }

  if (is.null(xlim)) {
    r <- range(c(x, lower, upper), na.rm = TRUE)
    xlim <- r + c(-0.05, 0.05) * diff(r)
  }

  if (logx) {
    if (is.null(x_at)) {
      x_at <- pretty(log(stats::na.omit(c(x, lower, upper))))
      x_labels <- exp(x_at)
    } else {
      x_labels <- x_at
      x_at <- log(x_at)
    }
    xlim <- log(xlim)
    x <- log(x)
    lower <- log(lower)
    upper <- log(upper)
    if (!is.null(vline)) {
      vline <- log(vline)
    }
  } else {
    x_labels <- TRUE
  }

  data_forest_vp <- grid::dataViewport(xlim, c(0, 1))

  # Get table content as matrix form.
  mf <- matrix_form(tbl)

  # Use `rtables` indent_string eventually.
  mf$strings[, 1] <- paste0(
    strrep("    ", c(rep(0, attr(mf, "nrow_header")), mf$row_info$indent)),
    mf$strings[, 1]
  )

  n_header <- attr(mf, "nrow_header")

  if (any(mf$display[, 1] == FALSE)) stop("row names need to be always displayed")

  # Pre-process the data to be used in lapply and cell_in_rows.
  to_args_for_cell_in_rows_fun <- function(part = c("body", "header"),
                                           underline_colspan = FALSE) {
    part <- match.arg(part)
    if (part == "body") {
      mat_row_indices <- seq_len(nrow(tbl)) + n_header
      row_ind_offset <- -n_header
    } else {
      mat_row_indices <- seq_len(n_header)
      row_ind_offset <- 0
    }

    lapply(mat_row_indices, function(i) {
      disp <- mf$display[i, -1]
      list(
        row_name = mf$strings[i, 1],
        cells = mf$strings[i, -1][disp],
        cell_spans = mf$spans[i, -1][disp],
        row_index = i + row_ind_offset,
        underline_colspan = underline_colspan
      )
    })
  }

  args_header <- to_args_for_cell_in_rows_fun("header", underline_colspan = TRUE)
  args_body <- to_args_for_cell_in_rows_fun("body", underline_colspan = FALSE)

  grid::gTree(
    name = name,
    children = grid::gList(
      grid::gTree(
        children = do.call(grid::gList, lapply(args_header, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_header")
      ),
      grid::gTree(
        children = do.call(grid::gList, lapply(args_body, do.call, what = cell_in_rows)),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::linesGrob(
        grid::unit(c(0, 1), "npc"),
        y = grid::unit(c(.5, .5), "npc"),
        vp = grid::vpPath("vp_table_layout", "vp_spacer")
      ),
      # forest part
      if (is.null(vline)) {
        NULL
      } else {
        grid::gTree(
          children = grid::gList(
            grid::gTree(
              children = grid::gList(
                # this may overflow, to fix, look here
                # https://stackoverflow.com/questions/33623169/add-multi-line-footnote-to-tablegrob-while-using-gridextra-in-r #nolintr
                grid::textGrob(
                  forest_header[1],
                  x = grid::unit(vline, "native") - grid::unit(1, "lines"),
                  just = c("right", "center")
                ),
                grid::textGrob(
                  forest_header[2],
                  x = grid::unit(vline, "native") + grid::unit(1, "lines"),
                  just = c("left", "center")
                )
              ),
              vp = grid::vpStack(grid::viewport(layout.pos.col = ncol(tbl) + 2), data_forest_vp)
            )
          ),
          vp = grid::vpPath("vp_table_layout", "vp_header")
        )
      },
      grid::gTree(
        children = grid::gList(
          grid::gTree(
            children = grid::gList(
              grid::rectGrob(gp = grid::gpar(col = "gray90", fill = "gray90")),
              if (is.null(vline)) {
                NULL
              } else {
                grid::linesGrob(
                  x = grid::unit(rep(vline, 2), "native"),
                  y = grid::unit(c(0, 1), "npc"),
                  gp = grid::gpar(lwd = 2),
                  vp = data_forest_vp
                )
              },
              grid::xaxisGrob(at = x_at, label = x_labels, vp = data_forest_vp)
            ),
            vp = grid::viewport(layout.pos.col = ncol(tbl) + 2)
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      ),
      grid::gTree(
        children = do.call(
          grid::gList,
          Map(
            function(xi, li, ui, row_index, size_i, col) {
              forest_dot_line(
                xi,
                li,
                ui,
                row_index,
                xlim,
                symbol_size = size_i,
                col = col,
                datavp = data_forest_vp
              )
            },
            x,
            lower,
            upper,
            seq_along(x),
            symbol_size,
            col,
            USE.NAMES = FALSE
          )
        ),
        vp = grid::vpPath("vp_table_layout", "vp_body")
      )
    ),
    childrenvp = forest_viewport(tbl, width_row_names, width_columns, width_forest),
    vp = vp,
    gp = gp
  )
}


cell_in_rows <- function(row_name,
                         cells,
                         cell_spans,
                         row_index,
                         underline_colspan = FALSE) {
  checkmate::assert_string(row_name)
  checkmate::assert_character(cells, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(cell_spans, len = length(cells), any.missing = FALSE)
  checkmate::assert_number(row_index)
  checkmate::assert_flag(underline_colspan)

  vp_name_rn <- paste0("rowname-", row_index)
  g_rowname <- if (!is.null(row_name) && row_name != "") {
    grid::textGrob(
      name = vp_name_rn,
      label = row_name,
      x = grid::unit(0, "npc"),
      just = c("left", "center"),
      vp = grid::vpPath(paste0("rowname-", row_index))
    )
  } else {
    NULL
  }

  gl_cols <- if (!(length(cells) > 0)) {
    list(NULL)
  } else {
    j <- 1 # column index of cell

    lapply(seq_along(cells), function(k) {
      cell_ascii <- cells[[k]]
      cs <- cell_spans[[k]]

      if (is.na(cell_ascii) || is.null(cell_ascii)) {
        cell_ascii <- "NA"
      }

      cell_name <- paste0("g-cell-", row_index, "-", j)

      cell_grobs <- if (identical(cell_ascii, "")) {
        NULL
      } else {
        if (cs == 1) {
          grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = grid::vpPath(paste0("cell-", row_index, "-", j))
          )
        } else {
          # +1 because of rowname
          vp_joined_cols <- grid::viewport(layout.pos.row = row_index, layout.pos.col = seq(j + 1, j + cs))

          lab <- grid::textGrob(
            label = cell_ascii,
            name = cell_name,
            vp = vp_joined_cols
          )

          if (!underline_colspan || grepl("^[[:space:]]*$", cell_ascii)) {
            lab
          } else {
            grid::gList(
              lab,
              grid::linesGrob(
                x = grid::unit.c(grid::unit(.2, "lines"), grid::unit(1, "npc") - grid::unit(.2, "lines")),
                y = grid::unit(c(0, 0), "npc"),
                vp = vp_joined_cols
              )
            )
          }
        }
      }
      j <<- j + cs

      cell_grobs
    })
  }

  grid::gList(
    g_rowname,
    do.call(grid::gList, gl_cols)
  )
}

#' Graphic Object: Forest Dot Line
#'
#' Calculate the `grob` corresponding to the dot line within the forest plot.
#'
#' @noRd
forest_dot_line <- function(x,
                            lower,
                            upper,
                            row_index,
                            xlim,
                            symbol_size = 1,
                            col = "blue",
                            datavp) {
  ci <- c(lower, upper)
  if (any(!is.na(c(x, ci)))) {
    # line
    y <- grid::unit(c(0.5, 0.5), "npc")

    g_line <- if (all(!is.na(ci)) && ci[2] > xlim[1] && ci[1] < xlim[2]) {
      # -
      if (ci[1] >= xlim[1] && ci[2] <= xlim[2]) {
        grid::linesGrob(x = grid::unit(c(ci[1], ci[2]), "native"), y = y)
      } else if (ci[1] < xlim[1] && ci[2] > xlim[2]) {
        # <->
        grid::linesGrob(
          x = grid::unit(xlim, "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "both")
        )
      } else if (ci[1] < xlim[1] && ci[2] <= xlim[2]) {
        # <-
        grid::linesGrob(
          x = grid::unit(c(xlim[1], ci[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "first")
        )
      } else if (ci[1] >= xlim[1] && ci[2] > xlim[2]) {
        # ->
        grid::linesGrob(
          x = grid::unit(c(ci[1], xlim[2]), "native"),
          y = y,
          arrow = grid::arrow(angle = 30, length = grid::unit(0.5, "lines"), ends = "last")
        )
      }
    } else {
      NULL
    }

    g_circle <- if (!is.na(x) && x >= xlim[1] && x <= xlim[2]) {
      grid::circleGrob(
        x = grid::unit(x, "native"),
        y = y,
        r = grid::unit(1 / 3.5 * symbol_size, "lines"),
        name = "point"
      )
    } else {
      NULL
    }

    grid::gTree(
      children = grid::gList(
        grid::gTree(
          children = grid::gList(
            grid::gList(
              g_line,
              g_circle
            )
          ),
          vp = datavp,
          gp = grid::gpar(col = col, fill = col)
        )
      ),
      vp = grid::vpPath(paste0("forest-", row_index))
    )
  } else {
    NULL
  }
}

#' Create a Viewport Tree for the Forest Plot
#' @param tbl (`rtable`)
#' @param width_row_names (`grid::unit`)\cr Width of row names
#' @param width_columns (`grid::unit`)\cr Width of column spans
#' @param width_forest (`grid::unit`)\cr Width of the forest plot
#' @param gap_column (`grid::unit`)\cr Gap width between the columns
#' @param gap_header (`grid::unit`)\cr Gap width between the header
#' @param mat_form matrix print form of the table
#' @return A viewport tree.
#'
#' @examples
#' library(grid)
#'
#' tbl <- rtable(
#'   header = rheader(
#'     rrow("", "E", rcell("CI", colspan = 2)),
#'     rrow("", "A", "B", "C")
#'   ),
#'   rrow("row 1", 1, 0.8, 1.1),
#'   rrow("row 2", 1.4, 0.8, 1.6),
#'   rrow("row 3", 1.2, 0.8, 1.2)
#' )
#'
#' \donttest{
#' v <- forest_viewport(tbl)
#'
#' grid::grid.newpage()
#' showViewport(v)
#' }
#'
#' @export
forest_viewport <- function(tbl,
                            width_row_names = NULL,
                            width_columns = NULL,
                            width_forest = grid::unit(1, "null"),
                            gap_column = grid::unit(1, "lines"),
                            gap_header = grid::unit(1, "lines"),
                            mat_form = NULL) {
  checkmate::assert_class(tbl, "VTableTree")
  checkmate::assert_true(grid::is.unit(width_forest))
  if (!is.null(width_row_names)) {
    checkmate::assert_true(grid::is.unit(width_row_names))
  }
  if (!is.null(width_columns)) {
    checkmate::assert_true(grid::is.unit(width_columns))
  }

  if (is.null(mat_form)) mat_form <- matrix_form(tbl)

  mat_form$strings[!mat_form$display] <- ""

  nr <- nrow(tbl)
  nc <- ncol(tbl)
  nr_h <- attr(mat_form, "nrow_header")

  if (is.null(width_row_names) || is.null(width_columns)) {
    tbl_widths <- formatters::propose_column_widths(mat_form)
    strs_with_width <- strrep("x", tbl_widths) # that works for mono spaced fonts
    if (is.null(width_row_names)) width_row_names <- grid::stringWidth(strs_with_width[1])
    if (is.null(width_columns)) width_columns <- grid::stringWidth(strs_with_width[-1])
  }

  # Widths for row name, cols, forest.
  widths <- grid::unit.c(
    width_row_names + gap_column,
    width_columns + gap_column,
    width_forest
  )

  n_lines_per_row <- apply(
    X = mat_form$strings,
    MARGIN = 1,
    FUN = function(row) {
      tmp <- vapply(
        gregexpr("\n", row, fixed = TRUE),
        attr, numeric(1),
        "match.length"
      ) + 1
      max(c(tmp, 1))
    }
  )

  i_header <- seq_len(nr_h)

  height_body_rows <- grid::unit(n_lines_per_row[-i_header] * 1.2, "lines")
  height_header_rows <- grid::unit(n_lines_per_row[i_header] * 1.2, "lines")

  height_body <- grid::unit(sum(n_lines_per_row[-i_header]) * 1.2, "lines")
  height_header <- grid::unit(sum(n_lines_per_row[i_header]) * 1.2, "lines")

  nc_g <- nc + 2 # number of columns incl. row names and forest

  vp_tbl <- grid::vpTree(
    parent = grid::viewport(
      name = "vp_table_layout",
      layout = grid::grid.layout(
        nrow = 3, ncol = 1,
        heights = grid::unit.c(height_header, gap_header, height_body)
      )
    ),
    children = grid::vpList(
      vp_forest_table_part(nr_h, nc_g, 1, 1, widths, height_header_rows, "vp_header"),
      vp_forest_table_part(nr, nc_g, 3, 1, widths, height_body_rows, "vp_body"),
      grid::viewport(name = "vp_spacer", layout.pos.row = 2, layout.pos.col = 1)
    )
  )
  vp_tbl
}

#' Viewport Forest Plot: Table Part
#'
#' Prepares a viewport for the table included in the forest plot.
#'
#' @noRd
vp_forest_table_part <- function(nrow,
                                 ncol,
                                 l_row,
                                 l_col,
                                 widths,
                                 heights,
                                 name) {
  grid::vpTree(
    grid::viewport(
      name = name,
      layout.pos.row = l_row,
      layout.pos.col = l_col,
      layout = grid::grid.layout(nrow = nrow, ncol = ncol, widths = widths, heights = heights)
    ),
    children = grid::vpList(
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow), function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = 1, name = paste0("rowname-", i))
          }
        )
      ),
      do.call(
        grid::vpList,
        apply(
          expand.grid(seq_len(nrow), seq_len(ncol - 2)),
          1,
          function(x) {
            i <- x[1]
            j <- x[2]
            grid::viewport(layout.pos.row = i, layout.pos.col = j + 1, name = paste0("cell-", i, "-", j))
          }
        )
      ),
      do.call(
        grid::vpList,
        lapply(
          seq_len(nrow),
          function(i) {
            grid::viewport(layout.pos.row = i, layout.pos.col = ncol, name = paste0("forest-", i))
          }
        )
      )
    )
  )
}

#' Forest Rendering
#'
#' Renders the forest grob.
#'
#' @noRd
grid.forest <- function(...) { # nolint
  grid::grid.draw(forest_grob(...))
}

#' Number of Patients
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Count the number of unique and non-unique patients in a column (variable).
#'
#' @inheritParams argument_convention
#' @param count_by (`vector`)\cr optional vector of any type to be combined with `x` when counting `nonunique`
#'   records.
#' @param unique_count_suffix (`logical`)\cr should `"(n)"` suffix be added to `unique_count` labels.
#'   Defaults to `TRUE`.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("summarize_num_patients")`
#'   to see available statistics for this function.
#'
#' @name summarize_num_patients
#' @order 1
NULL

#' @describeIn summarize_num_patients Statistics function which counts the number of
#'   unique patients, the corresponding percentage taken with respect to the
#'   total number of patients, and the number of non-unique patients.
#'
#' @param x (`character` or `factor`)\cr vector of patient IDs.
#'
#' @return
#' * `s_num_patients()` returns a named `list` of 3 statistics:
#'   * `unique`: Vector of counts and percentages.
#'   * `nonunique`: Vector of counts.
#'   * `unique_count`: Counts.
#'
#' @examples
#' # Use the statistics function to count number of unique and nonunique patients.
#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
#' s_num_patients(
#'   x = as.character(c(1, 1, 1, 2, 4, NA)),
#'   labelstr = "",
#'   .N_col = 6L,
#'   count_by = c(1, 1, 2, 1, 1, 1)
#' )
#'
#' @export
s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint

  checkmate::assert_string(labelstr)
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_flag(unique_count_suffix)

  count1 <- n_available(unique(x))
  count2 <- n_available(x)

  if (!is.null(count_by)) {
    checkmate::assert_vector(count_by, len = length(x))
    count2 <- n_available(unique(interaction(x, count_by)))
  }

  out <- list(
    unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
    nonunique = formatters::with_label(count2, labelstr),
    unique_count = formatters::with_label(
      count1, ifelse(unique_count_suffix, paste0(labelstr, if (nzchar(labelstr)) " ", "(n)"), labelstr)
    )
  )

  out
}

#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
#'   in a column (variable), the corresponding percentage taken with respect to the total number of
#'   patients, and the number of non-unique patients in the column.
#'
#' @param required (`character` or `NULL`)\cr optional name of a variable that is required to be non-missing.
#'
#' @return
#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
#'
#' @examples
#' # Count number of unique and non-unique patients.
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = as.character(c(10, 15, 10, 17, 8))
#' )
#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
#'
#' df_by_event <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA)),
#'   EVENT = c(10, 15, 10, 17, 8)
#' )
#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
#'
#' @export
s_num_patients_content <- function(df,
                                   labelstr = "",
                                   .N_col, # nolint
                                   .var,
                                   required = NULL,
                                   count_by = NULL,
                                   unique_count_suffix = TRUE) {
  checkmate::assert_string(.var)
  checkmate::assert_data_frame(df)
  if (is.null(count_by)) {
    assert_df_with_variables(df, list(id = .var))
  } else {
    assert_df_with_variables(df, list(id = .var, count_by = count_by))
  }
  if (!is.null(required)) {
    checkmate::assert_string(required)
    assert_df_with_variables(df, list(required = required))
    df <- df[!is.na(df[[required]]), , drop = FALSE]
  }

  x <- df[[.var]]
  y <- if (is.null(count_by)) NULL else df[[count_by]]

  s_num_patients(
    x = x,
    labelstr = labelstr,
    .N_col = .N_col,
    count_by = y,
    unique_count_suffix = unique_count_suffix
  )
}

c_num_patients <- make_afun(
  s_num_patients_content,
  .stats = c("unique", "nonunique", "unique_count"),
  .formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx")
)

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @export
#' @order 3
summarize_num_patients <- function(lyt,
                                   var,
                                   required = NULL,
                                   count_by = NULL,
                                   unique_count_suffix = TRUE,
                                   na_str = default_na_str(),
                                   .stats = NULL,
                                   .formats = NULL,
                                   .labels = c(
                                     unique = "Number of patients with at least one event",
                                     nonunique = "Number of events"
                                   ),
                                   indent_mod = lifecycle::deprecated(),
                                   .indent_mods = 0L,
                                   riskdiff = FALSE,
                                   ...) {
  checkmate::assert_flag(riskdiff)

  if (lifecycle::is_present(indent_mod)) {
    lifecycle::deprecate_warn("0.8.2", "summarize_num_patients(indent_mod)", "summarize_num_patients(.indent_mods)")
    .indent_mods <- indent_mod
  }

  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  s_args <- list(required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, ...)

  cfun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  extra_args <- if (isFALSE(riskdiff)) {
    s_args
  } else {
    list(
      afun = list("s_num_patients_content" = cfun),
      .stats = .stats,
      .indent_mods = .indent_mods,
      s_args = s_args
    )
  }

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = ifelse(isFALSE(riskdiff), cfun, afun_riskdiff),
    na_str = na_str,
    extra_args = extra_args,
    indent_mod = .indent_mods
  )
}

#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_num_patients_content()` to the table layout.
#'
#' @details In general, functions that starts with `analyze*` are expected to
#'   work like [rtables::analyze()], while functions that starts with `summarize*`
#'   are based upon [rtables::summarize_row_groups()]. The latter provides a
#'   value for each dividing split in the row and column space, but, being it
#'   bound to the fundamental splits, it is repeated by design in every page
#'   when pagination is involved.
#'
#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
#'   ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
#'   AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17)
#' )
#'
#' tbl <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients("USUBJID", .stats = c("unique")) %>%
#'   build_table(df)
#'
#' tbl
#'
#' @export
#' @order 2
analyze_num_patients <- function(lyt,
                                 vars,
                                 required = NULL,
                                 count_by = NULL,
                                 unique_count_suffix = TRUE,
                                 na_str = default_na_str(),
                                 nested = TRUE,
                                 .stats = NULL,
                                 .formats = NULL,
                                 .labels = c(
                                   unique = "Number of patients with at least one event",
                                   nonunique = "Number of events"
                                 ),
                                 show_labels = c("default", "visible", "hidden"),
                                 indent_mod = lifecycle::deprecated(),
                                 .indent_mods = 0L,
                                 riskdiff = FALSE,
                                 ...) {
  checkmate::assert_flag(riskdiff)

  if (lifecycle::is_present(indent_mod)) {
    lifecycle::deprecate_warn("0.8.2", "analyze_num_patients(indent_mod)", "analyze_num_patients(.indent_mods)")
    .indent_mods <- indent_mod
  }

  if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
  if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]

  s_args <- list(required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, ...)

  afun <- make_afun(
    c_num_patients,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels
  )

  extra_args <- if (isFALSE(riskdiff)) {
    s_args
  } else {
    list(
      afun = list("s_num_patients_content" = afun),
      .stats = .stats,
      .indent_mods = .indent_mods,
      s_args = s_args
    )
  }

  analyze(
    afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff),
    lyt = lyt,
    vars = vars,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    indent_mod = .indent_mods
  )
}

#' Create a STEP Graph
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR
#' along the continuous biomarker value subgroups.
#'
#' @param df (`tibble`)\cr result of [tidy.step()].
#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual
#'   biomarker values.
#' @param est (named `list`)\cr `col` and `lty` settings for estimate line.
#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval
#'   ribbon area, or `NULL` to not plot a CI ribbon.
#' @param col (`character`)\cr colors.
#'
#' @return A `ggplot` STEP graph.
#'
#' @seealso Custom tidy method [tidy.step()].
#'
#' @examples
#' library(nestcolor)
#' library(survival)
#' lung$sex <- factor(lung$sex)
#'
#' # Survival example.
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' step_data <- broom::tidy(step_matrix)
#'
#' # Default plot.
#' g_step(step_data)
#'
#' # Add the reference 1 horizontal line.
#' library(ggplot2)
#' g_step(step_data) +
#'   ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2)
#'
#' # Use actual values instead of percentiles, different color for estimate and no CI,
#' # use log scale for y axis.
#' g_step(
#'   step_data,
#'   use_percentile = FALSE,
#'   est = list(col = "blue", lty = 1),
#'   ci_ribbon = NULL
#' ) + scale_y_log10()
#'
#' # Adding another curve based on additional column.
#' step_data$extra <- exp(step_data$`Percentile Center`)
#' g_step(step_data) +
#'   ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green")
#'
#' # Response example.
#' vars <- list(
#'   response = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#'
#' step_matrix <- fit_rsp_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(
#'     control_logistic(response_definition = "I(response == 2)"),
#'     control_step()
#'   )
#' )
#' step_data <- broom::tidy(step_matrix)
#' g_step(step_data)
#'
#' @export
g_step <- function(df,
                   use_percentile = "Percentile Center" %in% names(df),
                   est = list(col = "blue", lty = 1),
                   ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5),
                   col = getOption("ggplot2.discrete.colour")) {
  checkmate::assert_tibble(df)
  checkmate::assert_flag(use_percentile)
  checkmate::assert_character(col, null.ok = TRUE)
  checkmate::assert_list(est, names = "named")
  checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE)

  x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center")
  df$x <- df[[x_var]]
  attrs <- attributes(df)
  df$y <- df[[attrs$estimate]]

  # Set legend names. To be modified also at call level
  legend_names <- c("Estimate", "CI 95%")

  p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]]))

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_color_manual(values = col)
  }

  if (!is.null(ci_ribbon)) {
    if (is.null(ci_ribbon$fill)) {
      ci_ribbon$fill <- "lightblue"
    }
    p <- p + ggplot2::geom_ribbon(
      ggplot2::aes(
        ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]],
        fill = legend_names[2]
      ),
      alpha = ci_ribbon$alpha
    ) +
      scale_fill_manual(
        name = "", values = c("CI 95%" = ci_ribbon$fill)
      )
  }
  suppressMessages(p <- p +
    ggplot2::geom_line(
      ggplot2::aes(y = .data[["y"]], color = legend_names[1]),
      linetype = est$lty
    ) +
    scale_colour_manual(
      name = "", values = c("Estimate" = "blue")
    ))

  p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate)
  if (use_percentile) {
    p <- p + ggplot2::scale_x_continuous(labels = scales::percent)
  }
  p
}

#' Custom Tidy Method for STEP Results
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tidy the STEP results into a `tibble` format ready for plotting.
#'
#' @param x (`step` matrix)\cr results from [fit_survival_step()].
#' @param ... not used here.
#'
#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale,
#'   respectively. Additional attributes carry metadata also used for plotting.
#'
#' @seealso [g_step()] which consumes the result from this function.
#'
#' @method tidy step
#'
#' @examples
#' library(survival)
#' lung$sex <- factor(lung$sex)
#' vars <- list(
#'   time = "time",
#'   event = "status",
#'   arm = "sex",
#'   biomarker = "age"
#' )
#' step_matrix <- fit_survival_step(
#'   variables = vars,
#'   data = lung,
#'   control = c(control_coxph(), control_step(num_points = 10, degree = 2))
#' )
#' broom::tidy(step_matrix)
#'
#' @export
tidy.step <- function(x, ...) { # nolint
  checkmate::assert_class(x, "step")
  dat <- as.data.frame(x)
  nams <- names(dat)
  is_surv <- "loghr" %in% names(dat)
  est_var <- ifelse(is_surv, "loghr", "logor")
  new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio")
  new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper"))
  names(dat)[match(est_var, nams)] <- new_est_var
  dat[, new_y_vars] <- exp(dat[, new_y_vars])
  any_is_na <- any(is.na(dat[, new_y_vars]))
  any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE)
  if (any_is_na) {
    warning(paste(
      "Missing values in the point estimate or CI columns,",
      "this will lead to holes in the `g_step()` plot"
    ))
  }
  if (any_is_very_large) {
    warning(paste(
      "Very large absolute values in the point estimate or CI columns,",
      "consider adding `scale_y_log10()` to the `g_step()` result for plotting"
    ))
  }
  if (any_is_na || any_is_very_large) {
    warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting")
  }
  structure(
    tibble::as_tibble(dat),
    estimate = new_est_var,
    biomarker = attr(x, "variables")$biomarker,
    ci = f_conf_level(attr(x, "control")$conf_level)
  )
}

#' Encode Categorical Missing Values in a Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function to encode missing entries across groups of categorical
#' variables in a data frame.
#'
#' @details Missing entries are those with `NA` or empty strings and will
#'   be replaced with a specified value. If factor variables include missing
#'   values, the missing value will be inserted as the last level.
#'   Similarly, in case character or logical variables should be converted to factors
#'   with the `char_as_factor` or `logical_as_factor` options, the missing values will
#'   be set as the last level.
#'
#' @param data (`data.frame`)\cr data set.
#' @param omit_columns (`character`)\cr names of variables from `data` that should
#'   not be modified by this function.
#' @param char_as_factor (`flag`)\cr whether to convert character variables
#'   in `data` to factors.
#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
#'   in `data` to factors.
#' @param na_level (`string`)\cr used to replace all `NA` or empty
#'   values inside non-`omit_columns` columns.
#'
#' @return A `data.frame` with the chosen modifications applied.
#'
#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
#'
#' @examples
#' my_data <- data.frame(
#'   u = c(TRUE, FALSE, NA, TRUE),
#'   v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
#'   w = c("A", "B", NA, "C"),
#'   x = c("D", "E", "F", NA),
#'   y = c("G", "H", "I", ""),
#'   z = c(1, 2, 3, 4),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Example 1
#' # Encode missing values in all character or factor columns.
#' df_explicit_na(my_data)
#' # Also convert logical columns to factor columns.
#' df_explicit_na(my_data, logical_as_factor = TRUE)
#' # Encode missing values in a subset of columns.
#' df_explicit_na(my_data, omit_columns = c("x", "y"))
#'
#' # Example 2
#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
#' # included when generating `rtables`.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
#' adsl <- tern_ex_adsl
#' adsl$SEX[adsl$SEX == "M"] <- NA
#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
#'
#' # Example 3
#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
#' # a numeric variable will not be included in the summary statistics, nor will they be included
#' # in the denominator value for calculating the percent values.
#' adsl <- tern_ex_adsl
#' adsl$AGE[adsl$AGE < 30] <- NA
#' adsl <- df_explicit_na(adsl)
#'
#' @export
df_explicit_na <- function(data,
                           omit_columns = NULL,
                           char_as_factor = TRUE,
                           logical_as_factor = FALSE,
                           na_level = "<Missing>") {
  checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
  checkmate::assert_data_frame(data)
  checkmate::assert_flag(char_as_factor)
  checkmate::assert_flag(logical_as_factor)
  checkmate::assert_string(na_level)

  target_vars <- if (is.null(omit_columns)) {
    names(data)
  } else {
    setdiff(names(data), omit_columns) # May have duplicates.
  }
  if (length(target_vars) == 0) {
    return(data)
  }

  l_target_vars <- split(target_vars, target_vars)

  # Makes sure target_vars exist in data and names are not duplicated.
  assert_df_with_variables(data, l_target_vars)

  for (x in target_vars) {
    xi <- data[[x]]
    xi_label <- obj_label(xi)

    # Determine whether to convert character or logical input.
    do_char_conversion <- is.character(xi) && char_as_factor
    do_logical_conversion <- is.logical(xi) && logical_as_factor

    # Pre-convert logical to character to deal correctly with replacing NA
    # values below.
    if (do_logical_conversion) {
      xi <- as.character(xi)
    }

    if (is.factor(xi) || is.character(xi)) {
      # Handle empty strings and NA values.
      xi <- explicit_na(sas_na(xi), label = na_level)

      # Convert to factors if requested for the original type,
      # set na_level as the last value.
      if (do_char_conversion || do_logical_conversion) {
        levels_xi <- setdiff(sort(unique(xi)), na_level)
        if (na_level %in% unique(xi)) {
          levels_xi <- c(levels_xi, na_level)
        }

        xi <- factor(xi, levels = levels_xi)
      }

      data[, x] <- formatters::with_label(xi, label = xi_label)
    }
  }
  return(data)
}

#' Helper Functions for Subgroup Treatment Effect Pattern (STEP) Calculations
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions that are used internally for the STEP calculations.
#'
#' @inheritParams argument_convention
#'
#' @name h_step
#' @include control_step.R
NULL

#' @describeIn h_step creates the windows for STEP, based on the control settings
#'   provided.
#'
#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
#' @param control (named `list`)\cr output from `control_step()`.
#'
#' @return
#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
#'   and the interval information matrix `interval`.
#'
#' @export
h_step_window <- function(x,
                          control = control_step()) {
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  sel <- matrix(FALSE, length(x), control$num_points)
  out <- matrix(0, control$num_points, 3)
  colnames(out) <- paste("Interval", c("Center", "Lower", "Upper"))
  if (control$use_percentile) {
    # Create windows according to percentile cutoffs.
    out <- cbind(out, out)
    colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper"))
    xs <- seq(0, 1, length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, 0),
        min(xs[i] + control$bandwidth, 1)
      )
      out[i, 5:6] <- stats::quantile(x, out[i, 2:3])
      sel[, i] <- x >= out[i, 5] & x <= out[i, 6]
    }
    # Center is the middle point of the percentile window.
    out[, 1] <- xs[-control$num_points - 1]
    out[, 4] <- stats::quantile(x, out[, 1])
  } else {
    # Create windows according to cutoffs.
    m <- c(min(x), max(x))
    xs <- seq(m[1], m[2], length = control$num_points + 2)[-1]
    for (i in seq_len(control$num_points)) {
      out[i, 2:3] <- c(
        max(xs[i] - control$bandwidth, m[1]),
        min(xs[i] + control$bandwidth, m[2])
      )
      sel[, i] <- x >= out[i, 2] & x <= out[i, 3]
    }
    # Center is the same as the point for predicting.
    out[, 1] <- xs[-control$num_points - 1]
  }
  list(sel = sel, interval = out)
}

#' @describeIn h_step calculates the estimated treatment effect estimate
#'   on the linear predictor scale and corresponding standard error from a STEP `model` fitted
#'   on `data` given `variables` specification, for a single biomarker value `x`.
#'   This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
#'   ratio estimates.
#'
#' @param model the regression model object.
#'
#' @return
#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
#'
#' @export
h_step_trt_effect <- function(data,
                              model,
                              variables,
                              x) {
  checkmate::assert_multi_class(model, c("coxph", "glm"))
  checkmate::assert_number(x)
  assert_df_with_variables(data, variables)
  checkmate::assert_factor(data[[variables$arm]], n.levels = 2)

  newdata <- data[c(1, 1), ]
  newdata[, variables$biomarker] <- x
  newdata[, variables$arm] <- levels(data[[variables$arm]])
  model_terms <- stats::delete.response(stats::terms(model))
  model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels)
  mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts)
  coefs <- stats::coef(model)
  # Note: It is important to use the coef subset from matrix, otherwise intercept and
  # strata are included for coxph() models.
  mat <- mat[, names(coefs)]
  mat_diff <- diff(mat)
  est <- mat_diff %*% coefs
  var <- mat_diff %*% stats::vcov(model) %*% t(mat_diff)
  se <- sqrt(var)
  c(
    est = est,
    se = se
  )
}

#' @describeIn h_step builds the model formula used in survival STEP calculations.
#'
#' @return
#' * `h_step_survival_formula()` returns a model formula.
#'
#' @export
h_step_survival_formula <- function(variables,
                                    control = control_step()) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")])
  form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   Cox regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
#'   `events`, log hazard ratio estimates `loghr`, standard error `se`,
#'   and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
#'   included for each biomarker value in `x`.
#'
#' @export
h_step_survival_est <- function(formula,
                                data,
                                variables,
                                x,
                                subset = rep(TRUE, nrow(data)),
                                control = control_coxph()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  # Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  coxph_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- survival::coxph(
          formula = formula,
          data = data,
          subset = .subset,
          ties = control$ties
        )
      },
      warning = function(w) {
        coxph_warnings <<- c(coxph_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(coxph_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = fit$n,
    events = fit$nevent,
    loghr = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' @describeIn h_step builds the model formula used in response STEP calculations.
#'
#' @return
#' * `h_step_rsp_formula()` returns a model formula.
#'
#' @export
h_step_rsp_formula <- function(variables,
                               control = c(control_step(), control_logistic())) {
  checkmate::assert_character(variables$covariates, null.ok = TRUE)
  assert_list_of_variables(variables[c("arm", "biomarker", "response")])
  response_definition <- sub(
    pattern = "response",
    replacement = variables$response,
    x = control$response_definition,
    fixed = TRUE
  )
  form <- paste0(response_definition, " ~ ", variables$arm)
  if (control$degree > 0) {
    form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
  }
  if (!is.null(variables$covariates)) {
    form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
  }
  if (!is.null(variables$strata)) {
    strata_arg <- if (length(variables$strata) > 1) {
      paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
    } else {
      variables$strata
    }
    form <- paste0(form, "+ strata(", strata_arg, ")")
  }
  stats::as.formula(form)
}

#' @describeIn h_step estimates the model with `formula` built based on
#'   `variables` in `data` for a given `subset` and `control` parameters for the
#'   logistic regression.
#'
#' @param formula (`formula`)\cr the regression model formula.
#' @param subset (`logical`)\cr subset vector.
#'
#' @return
#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
#'   ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
#'   `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
#'
#' @export
h_step_rsp_est <- function(formula,
                           data,
                           variables,
                           x,
                           subset = rep(TRUE, nrow(data)),
                           control = control_logistic()) {
  checkmate::assert_formula(formula)
  assert_df_with_variables(data, variables)
  checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
  checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")
  # Note: `subset` in `glm` needs to be an expression referring to `data` variables.
  data$.subset <- subset
  fit_warnings <- NULL
  tryCatch(
    withCallingHandlers(
      expr = {
        fit <- if (is.null(variables$strata)) {
          stats::glm(
            formula = formula,
            data = data,
            subset = .subset,
            family = stats::binomial("logit")
          )
        } else {
          # clogit needs coxph and strata imported
          survival::clogit(
            formula = formula,
            data = data,
            subset = .subset
          )
        }
      },
      warning = function(w) {
        fit_warnings <<- c(fit_warnings, w)
        invokeRestart("muffleWarning")
      }
    ),
    finally = {
    }
  )
  if (!is.null(fit_warnings)) {
    warning(paste(
      "Fit warnings occurred, please consider using a simpler model, or",
      "larger `bandwidth`, less `num_points` in `control_step()` settings"
    ))
  }
  # Produce a matrix with one row per `x` and columns `est` and `se`.
  estimates <- t(vapply(
    X = x,
    FUN = h_step_trt_effect,
    FUN.VALUE = c(1, 2),
    data = data,
    model = fit,
    variables = variables
  ))
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  cbind(
    n = length(fit$y),
    logor = estimates[, "est"],
    se = estimates[, "se"],
    ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
    ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
  )
}

#' Convert List of Groups to Data Frame
#'
#' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()].
#'
#' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the
#'   levels that belong to it in the character vectors that are elements of the list.
#'
#' @return [tibble::tibble()] in the required format.
#'
#' @examples
#' grade_groups <- list(
#'   "Any Grade (%)" = c("1", "2", "3", "4", "5"),
#'   "Grade 3-4 (%)" = c("3", "4"),
#'   "Grade 5 (%)" = "5"
#' )
#' groups_list_to_df(grade_groups)
#'
#' @export
groups_list_to_df <- function(groups_list) {
  checkmate::assert_list(groups_list, names = "named")
  lapply(groups_list, checkmate::assert_character)
  tibble::tibble(
    valname = make_names(names(groups_list)),
    label = names(groups_list),
    levelcombo = unname(groups_list),
    exargs = replicate(length(groups_list), list())
  )
}

#' Reference and Treatment Group Combination
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of
#' columns in the `rtables` framework and teal modules.
#'
#' @param fct (`factor`)\cr the variable with levels which needs to be grouped.
#' @param ref (`string`)\cr the reference level(s).
#' @param collapse (`string`)\cr a character string to separate `fct` and `ref`.
#'
#' @return A `list` with first item `ref` (reference) and second item `trt` (treatment).
#'
#' @examples
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("B: Placebo")
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze_vars("AGE") %>%
#'   build_table(DM)
#'
#' @export
combine_groups <- function(fct,
                           ref = NULL,
                           collapse = "/") {
  checkmate::assert_string(collapse)
  checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE)
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  group_levels <- levels(fct)
  if (is.null(ref)) {
    ref <- group_levels[1]
  } else {
    checkmate::assert_subset(ref, group_levels)
  }

  groups <- list(
    ref = group_levels[group_levels %in% ref],
    trt = group_levels[!group_levels %in% ref]
  )
  stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse))
}

#' Split Columns by Groups of Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams groups_list_to_df
#' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to
#'   control formats (`format`), add a joint column for all groups (`incl_all`).
#'
#' @return A layout object suitable for passing to further layouting functions. Adding
#'   this function to an `rtable` layout will add a column split including the given
#'   groups to the table layout.
#'
#' @seealso [rtables::split_cols_by()]
#'
#' @examples
#' # 1 - Basic use
#'
#' # Without group combination `split_cols_by_groups` is
#' # equivalent to [rtables::split_cols_by()].
#' basic_table() %>%
#'   split_cols_by_groups("ARM") %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Add a reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 2 - Adding group specification
#'
#' # Manual preparation of the groups.
#' groups <- list(
#'   "Arms A+B" = c("A: Drug X", "B: Placebo"),
#'   "Arms A+C" = c("A: Drug X", "C: Combination")
#' )
#'
#' # Use of split_cols_by_groups without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Including differentiated output in the reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff. of Averages" = rcell(NULL))
#'       } else {
#'         in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' # 3 - Binary list dividing factor levels into reference and treatment
#'
#' # `combine_groups` defines reference and treatment.
#' groups <- combine_groups(
#'   fct = DM$ARM,
#'   ref = c("A: Drug X", "B: Placebo")
#' )
#' groups
#'
#' # Use group definition without reference column.
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups_list = groups) %>%
#'   add_colcounts() %>%
#'   analyze("AGE") %>%
#'   build_table(DM)
#'
#' # Use group definition with reference column (first item of groups).
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>%
#'   add_colcounts() %>%
#'   analyze(
#'     "AGE",
#'     afun = function(x, .ref_group, .in_ref_col) {
#'       if (.in_ref_col) {
#'         in_rows("Diff Mean" = rcell(NULL))
#'       } else {
#'         in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
#'       }
#'     }
#'   ) %>%
#'   build_table(DM)
#'
#' @export
split_cols_by_groups <- function(lyt,
                                 var,
                                 groups_list = NULL,
                                 ref_group = NULL,
                                 ...) {
  if (is.null(groups_list)) {
    split_cols_by(
      lyt = lyt,
      var = var,
      ref_group = ref_group,
      ...
    )
  } else {
    groups_df <- groups_list_to_df(groups_list)
    if (!is.null(ref_group)) {
      ref_group <- groups_df$valname[groups_df$label == ref_group]
    }
    split_cols_by(
      lyt = lyt,
      var = var,
      split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname),
      ref_group = ref_group,
      ...
    )
  }
}

#' Combine Counts
#'
#' Simplifies the estimation of column counts, especially when group combination is required.
#'
#' @inheritParams combine_groups
#' @inheritParams groups_list_to_df
#'
#' @return A `vector` of column counts.
#'
#' @seealso [combine_groups()]
#'
#' @examples
#' ref <- c("A: Drug X", "B: Placebo")
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#'
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' ref <- "A: Drug X"
#' groups <- combine_groups(fct = DM$ARM, ref = ref)
#' col_counts <- combine_counts(
#'   fct = DM$ARM,
#'   groups_list = groups
#' )
#'
#' basic_table() %>%
#'   split_cols_by_groups("ARM", groups) %>%
#'   add_colcounts() %>%
#'   analyze_vars("AGE") %>%
#'   build_table(DM, col_counts = col_counts)
#'
#' @export
combine_counts <- function(fct, groups_list = NULL) {
  checkmate::assert_multi_class(fct, classes = c("factor", "character"))

  fct <- as_factor_keep_attributes(fct)

  if (is.null(groups_list)) {
    y <- table(fct)
    y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]])
  } else {
    y <- vapply(
      X = groups_list,
      FUN = function(x) sum(table(fct)[x]),
      FUN.VALUE = 1
    )
  }
  y
}

#' Count Patients with Marked Laboratory Abnormalities
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates whether single, replicated or last marked laboratory
#' abnormality was observed (`factor`). Additional analysis variables are `id` (`character` or `factor`)
#' and `direction` (`factor`) indicating the direction of the abnormality. Denominator is number of
#' patients with at least one valid measurement during the analysis.
#'   * For `Single, not last` and `Last or replicated`: Numerator is number of patients
#'     with `Single, not last` and `Last or replicated` levels, respectively.
#'   * For `Any`: Numerator is the number of patients with either single or
#'     replicated marked abnormalities.
#'
#' @inheritParams argument_convention
#' @param category (`list`)\cr with different marked category names for single
#'   and last or replicated.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal_by_marked")`
#'   to see available statistics for this function.
#'
#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
#'   abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
#'   patient will be counted only under the `Last or replicated` category.
#'
#' @name abnormal_by_marked
#' @order 1
NULL

#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
#'
#' @return
#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
#'   `Last or replicated`, and `Any` results.
#'
#' @keywords internal
s_count_abnormal_by_marked <- function(df,
                                       .var = "AVALCAT1",
                                       .spl_context,
                                       category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
                                       variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir")) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_list(category)
  checkmate::assert_subset(names(category), c("single", "last_replicated"))
  checkmate::assert_subset(names(variables), c("id", "param", "direction"))
  checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1)

  assert_df_with_variables(df, c(aval = .var, variables))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))


  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  # Patients in the denominator have at least one post-baseline visit.
  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  if (denom != 0) {
    subjects_last_replicated <- unique(
      df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE]
    )
    subjects_single <- unique(
      df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE]
    )
    # Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
    subjects_single <- setdiff(subjects_single, subjects_last_replicated)
    n_single <- length(subjects_single)
    n_last_replicated <- length(subjects_last_replicated)
    n_any <- n_single + n_last_replicated
    result <- list(count_fraction = list(
      "Single, not last" = c(n_single, n_single / denom),
      "Last or replicated" = c(n_last_replicated, n_last_replicated / denom),
      "Any Abnormality" = c(n_any, n_any / denom)
    ))
  } else {
    result <- list(count_fraction = list(
      "Single, not last" = c(0, 0),
      "Last or replicated" = c(0, 0),
      "Any Abnormality" = c(0, 0)
    ))
  }

  result
}

#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_marked()`.
#'
#' @return
#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_by_marked <- make_afun(
  s_count_abnormal_by_marked,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_marked()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
#'   ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
#'   ANRIND = factor(c(
#'     "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
#'     "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
#'     "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
#'   )),
#'   ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
#'   PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
#'   AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
#'   stringsAsFactors = FALSE
#' )
#'
#' df <- df %>%
#'   mutate(abn_dir = factor(
#'     case_when(
#'       ANRIND == "LOW LOW" ~ "Low",
#'       ANRIND == "HIGH HIGH" ~ "High",
#'       TRUE ~ ""
#'     ),
#'     levels = c("Low", "High")
#'   ))
#'
#' # Select only post-baseline records.
#' df <- df %>% filter(ONTRTFL == "Y")
#' df_crp <- df %>%
#'   filter(PARAMCD == "CRP") %>%
#'   droplevels()
#' full_parent_df <- list(df_crp, "not_needed")
#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
#' spl_context <- data.frame(
#'   split = c("PARAMCD", "GRADE_DIR"),
#'   full_parent_df = I(full_parent_df),
#'   cur_col_subset = I(cur_col_subset)
#' )
#'
#' map <- unique(
#'   df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
#' ) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAMCD, abn_dir)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_to_map(map)
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAMCD") %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = "unique_count"
#'   ) %>%
#'   split_rows_by(
#'     "abn_dir",
#'     split_fun = trim_levels_in_group("abn_dir")
#'   ) %>%
#'   count_abnormal_by_marked(
#'     var = "AVALCAT1",
#'     variables = list(
#'       id = "USUBJID",
#'       param = "PARAMCD",
#'       direction = "abn_dir"
#'     )
#'   ) %>%
#'   build_table(df = df)
#'
#' @export
#' @order 2
count_abnormal_by_marked <- function(lyt,
                                     var,
                                     category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
                                     variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"),
                                     na_str = default_na_str(),
                                     nested = TRUE,
                                     ...,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  checkmate::assert_string(var)

  extra_args <- list(category = category, variables = variables, ...)

  afun <- make_afun(
    a_count_abnormal_by_marked,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    na_str = na_str,
    nested = nested,
    show_labels = "hidden",
    extra_args = extra_args
  )
  lyt
}

#' Stack Multiple Grobs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Stack grobs as a new grob with 1 column and multiple rows layout.
#'
#' @param ... grobs.
#' @param grobs list of grobs.
#' @param padding unit of length 1, space between each grob.
#' @param vp a [viewport()] object (or `NULL`).
#' @param name a character identifier for the grob.
#' @param gp A [gpar()] object.
#'
#' @return A `grob`.
#'
#' @examples
#' library(grid)
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid.newpage()
#' grid.draw(stack_grobs(g1, g2, g3))
#'
#' showViewport()
#'
#' grid.newpage()
#' pushViewport(viewport(layout = grid.layout(1, 2)))
#' vp1 <- viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(stack_grobs(g1, g2, g3, vp = vp1, name = "test"))
#'
#' showViewport()
#' grid.ls(grobs = TRUE, viewports = TRUE, print = FALSE)
#'
#' @export
stack_grobs <- function(...,
                        grobs = list(...),
                        padding = grid::unit(2, "line"),
                        vp = NULL,
                        gp = NULL,
                        name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  n_layout <- 2 * length(grobs) - 1
  hts <- lapply(
    seq(1, n_layout),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_layout, ncol = 1, heights = hts)
  )

  nested_grobs <- Map(function(g, i) {
    grid::gTree(
      children = grid::gList(g),
      vp = grid::viewport(layout.pos.row = i, layout.pos.col = 1)
    )
  }, grobs, seq_along(grobs) * 2 - 1)

  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Arrange Multiple Grobs
#'
#' Arrange grobs as a new grob with \verb{n*m (rows*cols)} layout.
#'
#' @inheritParams stack_grobs
#' @param ncol number of columns in layout.
#' @param nrow number of rows in layout.
#' @param padding_ht unit of length 1, vertical space between each grob.
#' @param padding_wt unit of length 1, horizontal space between each grob.
#'
#' @return A `grob`.
#' @examples
#' library(grid)
#'
#' \donttest{
#' num <- lapply(1:9, textGrob)
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(grobs = num, ncol = 2))
#'
#' showViewport()
#'
#' g1 <- circleGrob(gp = gpar(col = "blue"))
#' g2 <- circleGrob(gp = gpar(col = "red"))
#' g3 <- textGrob("TEST TEXT")
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, nrow = 2))
#'
#' showViewport()
#'
#' grid::grid.newpage()
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 3))
#'
#' grid::grid.newpage()
#' grid::pushViewport(grid::viewport(layout = grid::grid.layout(1, 2)))
#' vp1 <- grid::viewport(layout.pos.row = 1, layout.pos.col = 2)
#' grid.draw(arrange_grobs(g1, g2, g3, ncol = 2, vp = vp1))
#'
#' showViewport()
#' }
#' @export
arrange_grobs <- function(...,
                          grobs = list(...),
                          ncol = NULL, nrow = NULL,
                          padding_ht = grid::unit(2, "line"),
                          padding_wt = grid::unit(2, "line"),
                          vp = NULL,
                          gp = NULL,
                          name = NULL) {
  checkmate::assert_true(
    all(vapply(grobs, grid::is.grob, logical(1)))
  )

  if (length(grobs) == 1) {
    return(grobs[[1]])
  }

  if (is.null(ncol) && is.null(nrow)) {
    ncol <- 1
    nrow <- ceiling(length(grobs) / ncol)
  } else if (!is.null(ncol) && is.null(nrow)) {
    nrow <- ceiling(length(grobs) / ncol)
  } else if (is.null(ncol) && !is.null(nrow)) {
    ncol <- ceiling(length(grobs) / nrow)
  }

  if (ncol * nrow < length(grobs)) {
    stop("specififed ncol and nrow are not enough for arranging the grobs ")
  }

  if (ncol == 1) {
    return(stack_grobs(grobs = grobs, padding = padding_ht, vp = vp, gp = gp, name = name))
  }

  n_col <- 2 * ncol - 1
  n_row <- 2 * nrow - 1
  hts <- lapply(
    seq(1, n_row),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_ht
      }
    }
  )
  hts <- do.call(grid::unit.c, hts)

  wts <- lapply(
    seq(1, n_col),
    function(i) {
      if (i %% 2 != 0) {
        grid::unit(1, "null")
      } else {
        padding_wt
      }
    }
  )
  wts <- do.call(grid::unit.c, wts)

  main_vp <- grid::viewport(
    layout = grid::grid.layout(nrow = n_row, ncol = n_col, widths = wts, heights = hts)
  )

  nested_grobs <- list()
  k <- 0
  for (i in seq(nrow) * 2 - 1) {
    for (j in seq(ncol) * 2 - 1) {
      k <- k + 1
      if (k <= length(grobs)) {
        nested_grobs <- c(
          nested_grobs,
          list(grid::gTree(
            children = grid::gList(grobs[[k]]),
            vp = grid::viewport(layout.pos.row = i, layout.pos.col = j)
          ))
        )
      }
    }
  }
  grobs_mainvp <- grid::gTree(
    children = do.call(grid::gList, nested_grobs),
    vp = main_vp
  )

  grid::gTree(
    children = grid::gList(grobs_mainvp),
    vp = vp,
    gp = gp,
    name = name
  )
}

#' Draw `grob`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Draw grob on device page.
#'
#' @param grob grid object
#' @param newpage draw on a new page
#' @param vp a [viewport()] object (or `NULL`).
#'
#' @return A `grob`.
#'
#' @examples
#' library(dplyr)
#' library(grid)
#'
#' \donttest{
#' rect <- rectGrob(width = grid::unit(0.5, "npc"), height = grid::unit(0.5, "npc"))
#' rect %>% draw_grob(vp = grid::viewport(angle = 45))
#'
#' num <- lapply(1:10, textGrob)
#' num %>%
#'   arrange_grobs(grobs = .) %>%
#'   draw_grob()
#' showViewport()
#' }
#'
#' @export
draw_grob <- function(grob, newpage = TRUE, vp = NULL) {
  if (newpage) {
    grid::grid.newpage()
  }
  if (!is.null(vp)) {
    grid::pushViewport(vp)
  }
  grid::grid.draw(grob)
}

tern_grob <- function(x) {
  class(x) <- unique(c("ternGrob", class(x)))
  x
}

print.ternGrob <- function(x, ...) {
  grid::grid.newpage()
  grid::grid.draw(x)
}

#' Cox Regression Helper: Interactions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Test and estimate the effect of a treatment in interaction with a covariate.
#' The effect is estimated as the HR of the tested treatment for a given level
#' of the covariate, in comparison to the treatment control.
#'
#' @inheritParams argument_convention
#' @param x (`numeric` or `factor`)\cr the values of the covariate to be tested.
#' @param effect (`string`)\cr the name of the effect to be tested and estimated.
#' @param covar (`string`)\cr the name of the covariate in the model.
#' @param mod (`coxph`)\cr the Cox regression model.
#' @param label (`string`)\cr the label to be returned as `term_label`.
#' @param control (`list`)\cr a list of controls as returned by [control_coxreg()].
#' @param ... see methods.
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4,
#'       labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name cox_regression_inter
NULL

#' @describeIn cox_regression_inter S3 generic helper function to determine interaction effect.
#'
#' @return
#' * `h_coxreg_inter_effect()` returns a `data.frame` of covariate interaction effects consisting of the following
#'   variables: `effect`, `term`, `term_label`, `level`, `n`, `hr`, `lcl`, `ucl`, `pval`, and `pval_inter`.
#'
#' @export
h_coxreg_inter_effect <- function(x,
                                  effect,
                                  covar,
                                  mod,
                                  label,
                                  control,
                                  ...) {
  UseMethod("h_coxreg_inter_effect", x)
}

#' @describeIn cox_regression_inter Method for `numeric` class. Estimates the interaction with a `numeric` covariate.
#'
#' @method h_coxreg_inter_effect numeric
#'
#' @param at (`list`)\cr a list with items named after the covariate, every
#'   item is a vector of levels at which the interaction should be estimated.
#'
#' @export
h_coxreg_inter_effect.numeric <- function(x,
                                          effect,
                                          covar,
                                          mod,
                                          label,
                                          control,
                                          at,
                                          ...) {
  betas <- stats::coef(mod)
  attrs <- attr(stats::terms(mod), "term.labels")
  term_indices <- grep(
    pattern = effect,
    x = attrs[!grepl("strata\\(", attrs)]
  )
  checkmate::assert_vector(term_indices, len = 2)
  betas <- betas[term_indices]
  betas_var <- diag(stats::vcov(mod))[term_indices]
  betas_cov <- stats::vcov(mod)[term_indices[1], term_indices[2]]
  xval <- if (is.null(at[[covar]])) {
    stats::median(x)
  } else {
    at[[covar]]
  }
  effect_index <- !grepl(covar, names(betas))
  coef_hat <- betas[effect_index] + xval * betas[!effect_index]
  coef_se <- sqrt(
    betas_var[effect_index] +
      xval ^ 2 * betas_var[!effect_index] + # styler: off
      2 * xval * betas_cov
  )
  q_norm <- stats::qnorm((1 + control$conf_level) / 2)
  data.frame(
    effect = "Covariate:",
    term = rep(covar, length(xval)),
    term_label = paste0("  ", xval),
    level = as.character(xval),
    n = NA,
    hr = exp(coef_hat),
    lcl = exp(coef_hat - q_norm * coef_se),
    ucl = exp(coef_hat + q_norm * coef_se),
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Method for `factor` class. Estimate the interaction with a `factor` covariate.
#'
#' @method h_coxreg_inter_effect factor
#'
#' @param data (`data.frame`)\cr the data frame on which the model was fit.
#'
#' @export
h_coxreg_inter_effect.factor <- function(x,
                                         effect,
                                         covar,
                                         mod,
                                         label,
                                         control,
                                         data,
                                         ...) {
  lvl_given <- levels(x)
  y <- h_coxreg_inter_estimations(
    variable = effect, given = covar,
    lvl_var = levels(data[[effect]]),
    lvl_given = lvl_given,
    mod = mod,
    conf_level = 0.95
  )[[1]]

  data.frame(
    effect = "Covariate:",
    term = rep(covar, nrow(y)),
    term_label = paste0("  ", lvl_given),
    level = lvl_given,
    n = NA,
    hr = y[, "hr"],
    lcl = y[, "lcl"],
    ucl = y[, "ucl"],
    pval = NA,
    pval_inter = NA,
    stringsAsFactors = FALSE
  )
}

#' @describeIn cox_regression_inter Method for `character` class. Estimate the interaction with a `character` covariate.
#'   This makes an automatic conversion to `factor` and then forwards to the method for factors.
#'
#' @method h_coxreg_inter_effect character
#'
#' @note
#' * Automatic conversion of character to factor does not guarantee results can be generated correctly. It is
#'   therefore better to always pre-process the dataset such that factors are manually created from character
#'   variables before passing the dataset to [rtables::build_table()].
#'
#' @export
h_coxreg_inter_effect.character <- function(x,
                                            effect,
                                            covar,
                                            mod,
                                            label,
                                            control,
                                            data,
                                            ...) {
  y <- as.factor(x)

  h_coxreg_inter_effect(
    x = y,
    effect = effect,
    covar = covar,
    mod = mod,
    label = label,
    control = control,
    data = data,
    ...
  )
}

#' @describeIn cox_regression_inter A higher level function to get
#'   the results of the interaction test and the estimated values.
#'
#' @return
#' * `h_coxreg_extract_interaction()` returns the result of an interaction test and the estimated values. If
#'   no interaction, [h_coxreg_univar_extract()] is applied instead.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' h_coxreg_extract_interaction(
#'   mod = mod, effect = "armcd", covar = "covar1", data = dta_bladder,
#'   control = control_coxreg()
#' )
#'
#' @export
h_coxreg_extract_interaction <- function(effect,
                                         covar,
                                         mod,
                                         data,
                                         at,
                                         control) {
  if (!any(attr(stats::terms(mod), "order") == 2)) {
    y <- h_coxreg_univar_extract(
      effect = effect, covar = covar, mod = mod, data = data, control = control
    )
    y$pval_inter <- NA
    y
  } else {
    test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

    # Test the main treatment effect.
    mod_aov <- muffled_car_anova(mod, test_statistic)
    sum_anova <- broom::tidy(mod_aov)
    pval <- sum_anova[sum_anova$term == effect, ][["p.value"]]

    # Test the interaction effect.
    pval_inter <- sum_anova[grep(":", sum_anova$term), ][["p.value"]]
    covar_test <- data.frame(
      effect = "Covariate:",
      term = covar,
      term_label = unname(labels_or_names(data[covar])),
      level = "",
      n = mod$n, hr = NA, lcl = NA, ucl = NA, pval = pval,
      pval_inter = pval_inter,
      stringsAsFactors = FALSE
    )
    # Estimate the interaction.
    y <- h_coxreg_inter_effect(
      data[[covar]],
      covar = covar,
      effect = effect,
      mod = mod,
      label = unname(labels_or_names(data[covar])),
      at = at,
      control = control,
      data = data
    )
    rbind(covar_test, y)
  }
}

#' @describeIn cox_regression_inter Hazard ratio estimation in interactions.
#'
#' @param variable,given (`string`)\cr the name of variables in interaction. We seek the estimation
#'   of the levels of `variable` given the levels of `given`.
#' @param lvl_var,lvl_given (`character`)\cr corresponding levels has given by [levels()].
#' @param mod (`coxph`)\cr a fitted Cox regression model (see [survival::coxph()]).
#'
#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
#'   and Sex (F, M; reference Female) and the model being abbreviated: y ~ Arm + Sex + Arm:Sex.
#'   The cox regression estimates the coefficients along with a variance-covariance matrix for:
#'
#'   - b1 (arm b), b2 (arm c)
#'   - b3 (sex m)
#'   - b4 (arm b: sex m), b5 (arm c: sex m)
#'
#'   The estimation of the Hazard Ratio for arm C/sex M is given in reference
#'   to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5).
#'   The interaction coefficient is deduced by b2 + b5 while the standard error
#'   is obtained as $sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$.
#'
#' @return
#' * `h_coxreg_inter_estimations()` returns a list of matrices (one per level of variable) with rows corresponding
#'   to the combinations of `variable` and `given`, with columns:
#'   * `coef_hat`: Estimation of the coefficient.
#'   * `coef_se`: Standard error of the estimation.
#'   * `hr`: Hazard ratio.
#'   * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd * covar1, data = dta_bladder)
#' result <- h_coxreg_inter_estimations(
#'   variable = "armcd", given = "covar1",
#'   lvl_var = levels(dta_bladder$armcd),
#'   lvl_given = levels(dta_bladder$covar1),
#'   mod = mod, conf_level = .95
#' )
#' result
#'
#' @export
h_coxreg_inter_estimations <- function(variable,
                                       given,
                                       lvl_var,
                                       lvl_given,
                                       mod,
                                       conf_level = 0.95) {
  var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
  giv_lvl <- paste0(given, lvl_given)
  design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
  design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
  design_mat <- within(
    data = design_mat,
    expr = {
      inter <- paste0(variable, ":", given)
      rev_inter <- paste0(given, ":", variable)
    }
  )
  split_by_variable <- design_mat$variable
  interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")

  mmat <- stats::model.matrix(mod)[1, ]
  mmat[!mmat == 0] <- 0

  design_mat <- apply(
    X = design_mat, MARGIN = 1, FUN = function(x) {
      mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
      mmat
    }
  )
  colnames(design_mat) <- interaction_names

  coef <- stats::coef(mod)
  vcov <- stats::vcov(mod)
  betas <- as.matrix(coef)
  coef_hat <- t(design_mat) %*% betas
  dimnames(coef_hat)[2] <- "coef"
  coef_se <- apply(
    design_mat, 2,
    function(x) {
      vcov_el <- as.logical(x)
      y <- vcov[vcov_el, vcov_el]
      y <- sum(y)
      y <- sqrt(y)
      return(y)
    }
  )
  q_norm <- stats::qnorm((1 + conf_level) / 2)
  y <- cbind(coef_hat, `se(coef)` = coef_se)
  y <- apply(y, 1, function(x) {
    x["hr"] <- exp(x["coef"])
    x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
    x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
    x
  })
  y <- t(y)
  y <- by(y, split_by_variable, identity)
  y <- lapply(y, as.matrix)
  attr(y, "details") <- paste0(
    "Estimations of ", variable,
    " hazard ratio given the level of ", given, " compared to ",
    variable, " level ", lvl_var[1], "."
  )
  y
}

#' Controls for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Sets a list of parameters for Cox regression fit. Used internally.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#' @param interaction (`flag`)\cr if `TRUE`, the model includes the interaction between the studied
#'   treatment and candidate covariate. Note that for univariate models without treatment arm, and
#'   multivariate models, no interaction can be used so that this needs to be `FALSE`.
#' @param ties (`string`)\cr among `exact` (equivalent to `DISCRETE` in SAS), `efron` and `breslow`,
#'   see [survival::coxph()]. Note: there is no equivalent of SAS `EXACT` method in R.
#'
#' @return A `list` of items with names corresponding to the arguments.
#'
#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @examples
#' control_coxreg()
#'
#' @export
control_coxreg <- function(pval_method = c("wald", "likelihood"),
                           ties = c("exact", "efron", "breslow"),
                           conf_level = 0.95,
                           interaction = FALSE) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  checkmate::assert_flag(interaction)
  assert_proportion_value(conf_level)
  list(
    pval_method = pval_method,
    ties = ties,
    conf_level = conf_level,
    interaction = interaction
  )
}

#' Custom Tidy Methods for Cox Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x (`list`)\cr Result of the Cox regression model fitted by [fit_coxreg_univar()] (for univariate models)
#'   or [fit_coxreg_multivar()] (for multivariate models).
#'
#' @return [tidy()] returns:
#' * For `summary.coxph` objects,  a `data.frame` with columns: `Pr(>|z|)`, `exp(coef)`, `exp(-coef)`, `lower .95`,
#'   `upper .95`, `level`, and `n`.
#' * For `coxreg.univar` objects, a `data.frame` with columns: `effect`, `term`, `term_label`, `level`, `n`, `hr`,
#'   `lcl`, `ucl`, `pval`, and `ci`.
#' * For `coxreg.multivar` objects, a `data.frame` with columns: `term`, `pval`, `term_label`, `hr`, `lcl`, `ucl`,
#'   `level`, and `ci`.
#'
#' @seealso [cox_regression]
#'
#' @name tidy_coxreg
NULL

#' @describeIn tidy_coxreg Custom tidy method for [survival::coxph()] summary results.
#'
#' Tidy the [survival::coxph()] results into a `data.frame` to extract model results.
#'
#' @method tidy summary.coxph
#'
#' @examples
#' library(survival)
#' library(broom)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' formula <- "survival::Surv(time, status) ~ armcd + covar1"
#' msum <- summary(coxph(stats::as.formula(formula), data = dta_bladder))
#' tidy(msum)
#'
#' @export
tidy.summary.coxph <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "summary.coxph")
  pval <- x$coefficients
  confint <- x$conf.int
  levels <- rownames(pval)

  pval <- tibble::as_tibble(pval)
  confint <- tibble::as_tibble(confint)

  ret <- cbind(pval[, grepl("Pr", names(pval))], confint)
  ret$level <- levels
  ret$n <- x[["n"]]
  ret
}

#' @describeIn tidy_coxreg Custom tidy method for a univariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_univar()].
#'
#' @method tidy coxreg.univar
#'
#' @examples
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' tidy(mod1)
#' tidy(mod2)
#'
#' @export
tidy.coxreg.univar <- function(x, # nolint
                               ...) {
  checkmate::assert_class(x, "coxreg.univar")
  mod <- x$mod
  vars <- c(x$vars$arm, x$vars$covariates)
  has_arm <- "arm" %in% names(x$vars)

  result <- if (!has_arm) {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_multivar_extract(
          var = vars,
          data = x$data,
          mod = mod,
          control = x$control
        )
      }
    )
  } else if (x$control$interaction) {
    Map(
      mod = mod, covar = vars,
      f = function(mod, covar) {
        h_coxreg_extract_interaction(
          effect = x$vars$arm, covar = covar, mod = mod, data = x$data,
          at = x$at, control = x$control
        )
      }
    )
  } else {
    Map(
      mod = mod, vars = vars,
      f = function(mod, vars) {
        h_coxreg_univar_extract(
          effect = x$vars$arm, covar = vars, data = x$data, mod = mod,
          control = x$control
        )
      }
    )
  }
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$n <- lapply(result$n, empty_vector_if_na)
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  if (x$control$interaction) {
    result$pval_inter <- lapply(result$pval_inter, empty_vector_if_na)
    # Remove interaction p-values due to change in specifications.
    result$pval[result$effect != "Treatment:"] <- NA
  }
  result$pval <- lapply(result$pval, empty_vector_if_na)
  attr(result, "conf_level") <- x$control$conf_level
  result
}

#' @describeIn tidy_coxreg Custom tidy method for a multivariate Cox regression.
#'
#' Tidy up the result of a Cox regression model fitted by [fit_coxreg_multivar()].
#'
#' @method tidy coxreg.multivar
#'
#' @examples
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#' broom::tidy(multivar_model)
#'
#' @export
tidy.coxreg.multivar <- function(x, # nolint
                                 ...) {
  checkmate::assert_class(x, "coxreg.multivar")
  vars <- c(x$vars$arm, x$vars$covariates)

  # Convert the model summaries to data.
  result <- Map(
    vars = vars,
    f = function(vars) {
      h_coxreg_multivar_extract(
        var = vars, data = x$data,
        mod = x$mod, control = x$control
      )
    }
  )
  result <- do.call(rbind, result)

  result$ci <- Map(lcl = result$lcl, ucl = result$ucl, f = function(lcl, ucl) c(lcl, ucl))
  result$ci <- lapply(result$ci, empty_vector_if_na)
  result$hr <- lapply(result$hr, empty_vector_if_na)
  result$pval <- lapply(result$pval, empty_vector_if_na)
  result <- result[, names(result) != "n"]
  attr(result, "conf_level") <- x$control$conf_level

  result
}

#' Fits for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Fitting functions for univariate and multivariate Cox regression models.
#'
#' @param variables (`list`)\cr a named list corresponds to the names of variables found in `data`, passed as a named
#'   list and corresponding to `time`, `event`, `arm`, `strata`, and `covariates` terms. If `arm` is missing from
#'   `variables`, then only Cox model(s) including the `covariates` will be fitted and the corresponding effect
#'   estimates will be tabulated later.
#' @param data (`data.frame`)\cr the dataset containing the variables to fit the models.
#' @param at (`list` of `numeric`)\cr when the candidate covariate is a `numeric`, use `at` to specify
#'   the value of the covariate at which the effect should be estimated.
#' @param control (`list`)\cr a list of parameters as returned by the helper function [control_coxreg()].
#'
#' @seealso [h_cox_regression] for relevant helper functions, [cox_regression].
#'
#' @examples
#' library(survival)
#'
#' set.seed(1, kind = "Mersenne-Twister")
#'
#' # Testing dataset [survival::bladder].
#' dta_bladder <- with(
#'   data = bladder[bladder$enum < 5, ],
#'   data.frame(
#'     time = stop,
#'     status = event,
#'     armcd = as.factor(rx),
#'     covar1 = as.factor(enum),
#'     covar2 = factor(
#'       sample(as.factor(enum)),
#'       levels = 1:4, labels = c("F", "F", "M", "M")
#'     )
#'   )
#' )
#' labels <- c("armcd" = "ARM", "covar1" = "A Covariate Label", "covar2" = "Sex (F/M)")
#' formatters::var_labels(dta_bladder)[names(labels)] <- labels
#' dta_bladder$age <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
#'
#' plot(
#'   survfit(Surv(time, status) ~ armcd + covar1, data = dta_bladder),
#'   lty = 2:4,
#'   xlab = "Months",
#'   col = c("blue1", "blue2", "blue3", "blue4", "red1", "red2", "red3", "red4")
#' )
#'
#' @name fit_coxreg
NULL

#' @describeIn fit_coxreg Fit a series of univariate Cox regression models given the inputs.
#'
#' @return
#' * `fit_coxreg_univar()` returns a `coxreg.univar` class object which is a named `list`
#'   with 5 elements:
#'   * `mod`: Cox regression models fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'   * `at`: Value of the covariate at which the effect should be estimated.
#'
#' @note When using `fit_coxreg_univar` there should be two study arms.
#'
#' @examples
#' # fit_coxreg_univar
#'
#' ## Cox regression: arm + 1 covariate.
#' mod1 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = "covar1"
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: arm + 1 covariate + interaction, 2 candidate covariates.
#' mod2 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91, interaction = TRUE)
#' )
#'
#' ## Cox regression: arm + 1 covariate, stratified analysis.
#' mod3 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", strata = "covar2",
#'     covariates = c("covar1")
#'   ),
#'   data = dta_bladder,
#'   control = control_coxreg(conf_level = 0.91)
#' )
#'
#' ## Cox regression: no arm, only covariates.
#' mod4 <- fit_coxreg_univar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_univar <- function(variables,
                              data,
                              at = list(),
                              control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }
  if (has_arm) {
    assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
  }
  vars <- unlist(variables[c(arm_name, "covariates", "strata")], use.names = FALSE)
  for (i in vars) {
    if (is.factor(data[[i]])) {
      attr(data[[i]], "levels") <- levels(droplevels(data[[i]]))
    }
  }
  forms <- h_coxreg_univar_formulas(variables, interaction = control$interaction)
  mod <- lapply(
    forms, function(x) {
      survival::coxph(formula = stats::as.formula(x), data = data, ties = control$ties)
    }
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables,
      at = at
    ),
    class = "coxreg.univar"
  )
}

#' @describeIn fit_coxreg Fit a multivariate Cox regression model.
#'
#' @return
#' * `fit_coxreg_multivar()` returns a `coxreg.multivar` class object which is a named list
#'   with 4 elements:
#'   * `mod`: Cox regression model fitted by [survival::coxph()].
#'   * `data`: The original data frame input.
#'   * `control`: The original control input.
#'   * `vars`: The variables used in the model.
#'
#' @examples
#' # fit_coxreg_multivar
#'
#' ## Cox regression: multivariate Cox regression.
#' multivar_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' # Example without treatment arm.
#' multivar_covs_model <- fit_coxreg_multivar(
#'   variables = list(
#'     time = "time", event = "status",
#'     covariates = c("covar1", "covar2")
#'   ),
#'   data = dta_bladder
#' )
#'
#' @export
fit_coxreg_multivar <- function(variables,
                                data,
                                control = control_coxreg()) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  if (!is.null(variables$covariates)) {
    checkmate::assert_character(variables$covariates)
  }

  checkmate::assert_false(control$interaction)
  assert_df_with_variables(data, variables)
  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$strata)) {
    checkmate::assert_disjunct(control$pval_method, "likelihood")
  }

  form <- h_coxreg_multivar_formula(variables)
  mod <- survival::coxph(
    formula = stats::as.formula(form),
    data = data,
    ties = control$ties
  )
  structure(
    list(
      mod = mod,
      data = data,
      control = control,
      vars = variables
    ),
    class = "coxreg.multivar"
  )
}

#' Muffled `car::Anova`
#'
#' Applied on survival models, [car::Anova()] signal that the `strata` terms is dropped from the model formula when
#' present, this function deliberately muffles this message.
#'
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#' @param test_statistic (`string`)\cr the method used for estimation of p.values; `wald` (default) or `likelihood`.
#'
#' @return Returns the output of [car::Anova()], with convergence message muffled.
#'
#' @keywords internal
muffled_car_anova <- function(mod, test_statistic) {
  tryCatch(
    withCallingHandlers(
      expr = {
        car::Anova(
          mod,
          test.statistic = test_statistic,
          type = "III"
        )
      },
      message = function(m) invokeRestart("muffleMessage"),
      error = function(e) {
        stop(paste(
          "the model seems to have convergence problems, please try to change",
          "the configuration of covariates or strata variables, e.g.",
          "- original error:", e
        ))
      }
    )
  )
}

#' Counting Specific Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' We can count the occurrence of specific values in a variable of interest.
#'
#' @inheritParams argument_convention
#' @param values (`character`)\cr specific values that should be counted.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("count_values")`
#'   to see available statistics for this function.
#'
#' @note
#' * For `factor` variables, `s_count_values` checks whether `values` are all included in the levels of `x`
#'   and fails otherwise.
#' * For `count_values()`, variable labels are shown when there is more than one element in `vars`,
#'   otherwise they are hidden.
#'
#' @name count_values_funs
#' @order 1
NULL

#' @describeIn count_values_funs S3 generic function to count values.
#'
#' @inheritParams s_summary.logical
#'
#' @return
#' * `s_count_values()` returns output of [s_summary()] for specified values of a non-numeric variable.
#'
#' @export
s_count_values <- function(x,
                           values,
                           na.rm = TRUE, # nolint
                           .N_col, # nolint
                           .N_row, # nolint
                           denom = c("n", "N_row", "N_col")) {
  UseMethod("s_count_values", x)
}

#' @describeIn count_values_funs Method for `character` class.
#'
#' @method s_count_values character
#'
#' @examples
#' # `s_count_values.character`
#' s_count_values(x = c("a", "b", "a"), values = "a")
#' s_count_values(x = c("a", "b", "a", NA, NA), values = "b", na.rm = FALSE)
#'
#' @export
s_count_values.character <- function(x,
                                     values = "Y",
                                     na.rm = TRUE, # nolint
                                     ...) {
  checkmate::assert_character(values)

  if (na.rm) {
    x <- x[!is.na(x)]
  }

  is_in_values <- x %in% values

  s_summary(is_in_values, ...)
}

#' @describeIn count_values_funs Method for `factor` class. This makes an automatic
#'   conversion to `character` and then forwards to the method for characters.
#'
#' @method s_count_values factor
#'
#' @examples
#' # `s_count_values.factor`
#' s_count_values(x = factor(c("a", "b", "a")), values = "a")
#'
#' @export
s_count_values.factor <- function(x,
                                  values = "Y",
                                  ...) {
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Method for `logical` class.
#'
#' @method s_count_values logical
#'
#' @examples
#' # `s_count_values.logical`
#' s_count_values(x = c(TRUE, FALSE, TRUE))
#'
#' @export
s_count_values.logical <- function(x, values = TRUE, ...) {
  checkmate::assert_logical(values)
  s_count_values(as.character(x), values = as.character(values), ...)
}

#' @describeIn count_values_funs Formatted analysis function which is used as `afun`
#'   in `count_values()`.
#'
#' @return
#' * `a_count_values()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_values`
#' a_count_values(x = factor(c("a", "b", "a")), values = "a", .N_col = 10, .N_row = 10)
#'
#' @export
a_count_values <- make_afun(
  s_count_values,
  .formats = c(count_fraction = "xx (xx.xx%)", count = "xx")
)

#' @describeIn count_values_funs Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_values()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_values()` to the table layout.
#'
#' @examples
#' # `count_values`
#' basic_table() %>%
#'   count_values("Species", values = "setosa") %>%
#'   build_table(iris)
#'
#' @export
#' @order 2
count_values <- function(lyt,
                         vars,
                         values,
                         na_str = default_na_str(),
                         nested = TRUE,
                         ...,
                         table_names = vars,
                         .stats = "count_fraction",
                         .formats = NULL,
                         .labels = c(count_fraction = paste(values, collapse = ", ")),
                         .indent_mods = NULL) {
  extra_args <- list(values = values, ...)

  afun <- make_afun(
    a_count_values,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Summary for Poisson Negative Binomial.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Summarize results of a Poisson Negative Binomial Regression.
#' This can be used to analyze count and/or frequency data using a linear model.
#'
#' @inheritParams h_glm_count
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("summarize_glm_count")`
#'   to see available statistics for this function.
#'
#' @name summarize_glm_count
#' @order 1
NULL

#' Helper Functions for Poisson Models.
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Helper functions that can be used to return the results of various Poisson models.
#'
#' @inheritParams argument_convention
#'
#' @seealso [summarize_glm_count]
#'
#' @name h_glm_count
NULL

#' @describeIn h_glm_count Helper function to return results of a poisson model.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param weights (`character`)\cr a character vector specifying weights used
#'   in averaging predictions. Number of weights must equal the number of levels included in the covariates.
#'   Weights option passed to [emmeans::emmeans()].
#'
#' @return
#' * `h_glm_poisson()` returns the results of a Poisson model.
#'
#' @keywords internal
h_glm_poisson <- function(.var,
                          .df_row,
                          variables,
                          weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::poisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return results of a quasipoisson model.
#'
#' @inheritParams summarize_glm_count
#'
#' @return
#' * `h_glm_quasipoisson()` returns the results of a Quasi-Poisson model.
#'
#' @keywords internal
h_glm_quasipoisson <- function(.var,
                               .df_row,
                               variables,
                               weights) {
  arm <- variables$arm
  covariates <- variables$covariates
  offset <- .df_row[[variables$offset]]

  formula <- stats::as.formula(paste0(
    .var, " ~ ",
    " + ",
    paste(covariates, collapse = " + "),
    " + ",
    arm
  ))

  glm_fit <- stats::glm(
    formula = formula,
    offset = offset,
    data = .df_row,
    family = stats::quasipoisson(link = "log")
  )

  emmeans_fit <- emmeans::emmeans(
    glm_fit,
    specs = arm,
    data = .df_row,
    type = "response",
    offset = 0,
    weights = weights
  )

  list(
    glm_fit = glm_fit,
    emmeans_fit = emmeans_fit
  )
}

#' @describeIn h_glm_count Helper function to return the results of the
#'   selected model (poisson, quasipoisson, negative binomial).
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called
#'   in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with
#'   expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple
#'     groups will be summarized. Specifically, the first level of `arm` variable is taken as the
#'     reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as
#'     `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
#'   * `offset` (`numeric`)\cr a numeric vector or scalar adding an offset.
#' @param distribution (`character`)\cr a character value specifying the distribution
#'   used in the regression (poisson, quasipoisson).
#'
#' @return
#' * `h_glm_count()` returns the results of the selected model.
#'
#' @keywords internal
h_glm_count <- function(.var,
                        .df_row,
                        variables,
                        distribution,
                        weights) {
  if (distribution == "negbin") {
    stop("negative binomial distribution is not currently available.")
  }
  switch(distribution,
    poisson = h_glm_poisson(.var, .df_row, variables, weights),
    quasipoisson = h_glm_quasipoisson(.var, .df_row, variables, weights),
    negbin = list() # h_glm_negbin(.var, .df_row, variables, weights) # nolint
  )
}

#' @describeIn h_glm_count Helper function to return the estimated means.
#'
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param conf_level (`numeric`)\cr value used to derive the confidence interval for the rate.
#' @param obj (`glm.fit`)\cr fitted model object used to derive the mean rate estimates in each treatment arm.
#' @param arm (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'   summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'
#' @return
#' * `h_ppmeans()` returns the estimated means.
#'
#' @keywords internal
h_ppmeans <- function(obj, .df_row, arm, conf_level) {
  alpha <- 1 - conf_level
  p <- 1 - alpha / 2

  arm_levels <- levels(.df_row[[arm]])

  out <- lapply(arm_levels, function(lev) {
    temp <- .df_row
    temp[[arm]] <- factor(lev, levels = arm_levels)

    mf <- stats::model.frame(obj$formula, data = temp)
    X <- stats::model.matrix(obj$formula, data = mf) # nolint

    rate <- stats::predict(obj, newdata = mf, type = "response")
    rate_hat <- mean(rate)

    zz <- colMeans(rate * X)
    se <- sqrt(as.numeric(t(zz) %*% stats::vcov(obj) %*% zz))
    rate_lwr <- rate_hat * exp(-stats::qnorm(p) * se / rate_hat)
    rate_upr <- rate_hat * exp(stats::qnorm(p) * se / rate_hat)

    c(rate_hat, rate_lwr, rate_upr)
  })

  names(out) <- arm_levels
  out <- do.call(rbind, out)
  if ("negbin" %in% class(obj)) {
    colnames(out) <- c("response", "asymp.LCL", "asymp.UCL")
  } else {
    colnames(out) <- c("rate", "asymp.LCL", "asymp.UCL")
  }
  out <- as.data.frame(out)
  out[[arm]] <- rownames(out)
  out
}

#' @describeIn summarize_glm_count Statistics function that produces a named list of results
#'   of the investigated Poisson model.
#'
#' @return
#' * `s_glm_count()` returns a named `list` of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `rate`: Estimated event rate per follow-up time.
#'   * `rate_ci`: Confidence level for estimated rate per follow-up time.
#'   * `rate_ratio`: Ratio of event rates in each treatment arm to the reference arm.
#'   * `rate_ratio_ci`: Confidence level for the rate ratio.
#'   * `pval`: p-value.
#'
#' @keywords internal
s_glm_count <- function(df,
                        .var,
                        .df_row,
                        variables,
                        .ref_group,
                        .in_ref_col,
                        distribution,
                        conf_level,
                        rate_mean_method,
                        weights,
                        scale = 1) {
  arm <- variables$arm

  y <- df[[.var]]
  smry_level <- as.character(unique(df[[arm]]))

  # ensure there is only 1 value
  checkmate::assert_scalar(smry_level)

  results <- h_glm_count(
    .var = .var,
    .df_row = .df_row,
    variables = variables,
    distribution = distribution,
    weights
  )

  if (rate_mean_method == "emmeans") {
    emmeans_smry <- summary(results$emmeans_fit, level = conf_level)
  } else if (rate_mean_method == "ppmeans") {
    emmeans_smry <- h_ppmeans(results$glm_fit, .df_row, arm, conf_level)
  }

  emmeans_smry_level <- emmeans_smry[emmeans_smry[[arm]] == smry_level, ]

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(character(), "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    emmeans_contrasts <- emmeans::contrast(
      results$emmeans_fit,
      method = "trt.vs.ctrl",
      ref = grep(
        as.character(unique(.ref_group[[arm]])),
        as.data.frame(results$emmeans_fit)[[arm]]
      )
    )

    contrasts_smry <- summary(
      emmeans_contrasts,
      infer = TRUE,
      adjust = "none"
    )

    smry_contrasts_level <- contrasts_smry[grepl(smry_level, contrasts_smry$contrast), ]

    list(
      n = length(y[!is.na(y)]),
      rate = formatters::with_label(
        ifelse(distribution == "negbin", emmeans_smry_level$response * scale, emmeans_smry_level$rate),
        "Adjusted Rate"
      ),
      rate_ci = formatters::with_label(
        c(emmeans_smry_level$asymp.LCL * scale, emmeans_smry_level$asymp.UCL * scale),
        f_conf_level(conf_level)
      ),
      rate_ratio = formatters::with_label(smry_contrasts_level$ratio, "Adjusted Rate Ratio"),
      rate_ratio_ci = formatters::with_label(
        c(smry_contrasts_level$asymp.LCL, smry_contrasts_level$asymp.UCL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(smry_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_glm_count Formatted analysis function which is used as `afun` in `summarize_glm_count()`.
#'
#' @return
#' * `a_glm_count()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_glm_count <- make_afun(
  s_glm_count,
  .indent_mods = c(
    "n" = 0L,
    "rate" = 0L,
    "rate_ci" = 1L,
    "rate_ratio" = 0L,
    "rate_ratio_ci" = 1L,
    "pval" = 1L
  ),
  .formats = c(
    "n" = "xx",
    "rate" = "xx.xxxx",
    "rate_ci" = "(xx.xxxx, xx.xxxx)",
    "rate_ratio" = "xx.xxxx",
    "rate_ratio_ci" = "(xx.xxxx, xx.xxxx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_glm_count Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_glm_count()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_glm_count()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' anl <- tern_ex_adtte %>% filter(PARAMCD == "TNE")
#' anl$AVAL_f <- as.factor(anl$AVAL)
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM", ref_group = "B: Placebo") %>%
#'   add_colcounts() %>%
#'   analyze_vars(
#'     "AVAL_f",
#'     var_labels = "Number of exacerbations per patient",
#'     .stats = c("count_fraction"),
#'     .formats = c("count_fraction" = "xx (xx.xx%)"),
#'     .label = c("Number of exacerbations per patient")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = NULL),
#'     conf_level = 0.95,
#'     distribution = "poisson",
#'     rate_mean_method = "emmeans",
#'     var_labels = "Unadjusted exacerbation rate (per year)",
#'     table_names = "unadj",
#'     .stats = c("rate"),
#'     .labels = c(rate = "Rate")
#'   ) %>%
#'   summarize_glm_count(
#'     vars = "AVAL",
#'     variables = list(arm = "ARM", offset = "lgTMATRSK", covariates = c("REGION1")),
#'     conf_level = 0.95,
#'     distribution = "quasipoisson",
#'     rate_mean_method = "ppmeans",
#'     var_labels = "Adjusted (QP) exacerbation rate (per year)",
#'     table_names = "adj",
#'     .stats = c("rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
#'     .labels = c(
#'       rate = "Rate", rate_ci = "Rate CI", rate_ratio = "Rate Ratio",
#'       rate_ratio_ci = "Rate Ratio CI", pval = "p value"
#'     )
#'   )
#'
#' build_table(lyt = lyt, df = anl)
#'
#' @export
#' @order 2
summarize_glm_count <- function(lyt,
                                vars,
                                variables,
                                distribution,
                                conf_level,
                                rate_mean_method,
                                weights = stats::weights,
                                scale = 1,
                                var_labels,
                                na_str = default_na_str(),
                                nested = TRUE,
                                ...,
                                show_labels = "visible",
                                table_names = vars,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  extra_args <- list(
    variables = variables, distribution = distribution, conf_level = conf_level,
    rate_mean_method = rate_mean_method, weights = weights, scale = scale, ...
  )

  afun <- make_afun(
    a_glm_count,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Pairwise `CoxPH` model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize p-value, HR and CIs from stratified or unstratified `CoxPH` model.
#'
#' @inheritParams argument_convention
#' @inheritParams s_surv_time
#' @param strat (`character` or `NULL`)\cr variable names indicating stratification factors.
#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
#'   [control_coxph()]. Some possible parameter options are:
#'   * `pval_method` (`string`)\cr p-value method for testing hazard ratio = 1. Default method is `"log-rank"` which
#'     comes from [survival::survdiff()], can also be set to `"wald"` or `"likelihood"` (from [survival::coxph()]).
#'   * `ties` (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
#'     can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()]
#'   * `conf_level` (`proportion`)\cr confidence level of the interval for HR.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("coxph_pairwise")`
#'   to see available statistics for this function.
#'
#' @name survival_coxph_pairwise
#' @order 1
NULL

#' @describeIn survival_coxph_pairwise Statistics function which analyzes HR, CIs of HR and p-value of a `coxph` model.
#'
#' @return
#' * `s_coxph_pairwise()` returns the statistics:
#'   * `pvalue`: p-value to test HR = 1.
#'   * `hr`: Hazard ratio.
#'   * `hr_ci`: Confidence interval for hazard ratio.
#'   * `n_tot`: Total number of observations.
#'   * `n_tot_events`: Total number of events.
#'
#' @keywords internal
s_coxph_pairwise <- function(df,
                             .ref_group,
                             .in_ref_col,
                             .var,
                             is_event,
                             strat = NULL,
                             control = control_coxph()) {
  checkmate::assert_string(.var)
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[is_event]])
  assert_df_with_variables(df, list(tte = .var, is_event = is_event))
  pval_method <- control$pval_method
  ties <- control$ties
  conf_level <- control$conf_level

  if (.in_ref_col) {
    return(
      list(
        pvalue = formatters::with_label("", paste0("p-value (", pval_method, ")")),
        hr = formatters::with_label("", "Hazard Ratio"),
        hr_ci = formatters::with_label("", f_conf_level(conf_level)),
        n_tot = formatters::with_label("", "Total n"),
        n_tot_events = formatters::with_label("", "Total events")
      )
    )
  }
  data <- rbind(.ref_group, df)
  group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))

  df_cox <- data.frame(
    tte = data[[.var]],
    is_event = data[[is_event]],
    arm = group
  )
  if (is.null(strat)) {
    formula_cox <- survival::Surv(tte, is_event) ~ arm
  } else {
    formula_cox <- stats::as.formula(
      paste0(
        "survival::Surv(tte, is_event) ~ arm + strata(",
        paste(strat, collapse = ","),
        ")"
      )
    )
    df_cox <- cbind(df_cox, data[strat])
  }
  cox_fit <- survival::coxph(
    formula = formula_cox,
    data = df_cox,
    ties = ties
  )
  sum_cox <- summary(cox_fit, conf.int = conf_level, extend = TRUE)
  orginal_survdiff <- survival::survdiff(
    formula_cox,
    data = df_cox
  )
  log_rank_pvalue <- 1 - pchisq(orginal_survdiff$chisq, length(orginal_survdiff$n) - 1)

  pval <- switch(pval_method,
    "wald" = sum_cox$waldtest["pvalue"],
    "log-rank" = log_rank_pvalue, # pvalue from original log-rank test survival::survdiff()
    "likelihood" = sum_cox$logtest["pvalue"]
  )
  list(
    pvalue = formatters::with_label(unname(pval), paste0("p-value (", pval_method, ")")),
    hr = formatters::with_label(sum_cox$conf.int[1, 1], "Hazard Ratio"),
    hr_ci = formatters::with_label(unname(sum_cox$conf.int[1, 3:4]), f_conf_level(conf_level)),
    n_tot = formatters::with_label(sum_cox$n, "Total n"),
    n_tot_events = formatters::with_label(sum_cox$nevent, "Total events")
  )
}

#' @describeIn survival_coxph_pairwise Formatted analysis function which is used as `afun` in `coxph_pairwise()`.
#'
#' @return
#' * `a_coxph_pairwise()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_coxph_pairwise <- make_afun(
  s_coxph_pairwise,
  .indent_mods = c(pvalue = 0L, hr = 0L, hr_ci = 1L, n_tot = 0L, n_tot_events = 0L),
  .formats = c(
    pvalue = "x.xxxx | (<0.0001)",
    hr = "xx.xx",
    hr_ci = "(xx.xx, xx.xx)",
    n_tot = "xx.xx",
    n_tot_events = "xx.xx"
  )
)

#' @describeIn survival_coxph_pairwise Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `coxph_pairwise()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_coxph_pairwise()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(is_event = CNSR == 0)
#'
#' df <- adtte_f %>% filter(ARMCD == "ARM A")
#' df_ref_group <- adtte_f %>% filter(ARMCD == "ARM B")
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Unstratified Analysis"
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
#'   add_colcounts() %>%
#'   coxph_pairwise(
#'     vars = "AVAL",
#'     is_event = "is_event",
#'     var_labels = "Stratified Analysis",
#'     strat = "SEX",
#'     control = control_coxph(pval_method = "wald")
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
#' @order 2
coxph_pairwise <- function(lyt,
                           vars,
                           na_str = default_na_str(),
                           nested = TRUE,
                           ...,
                           var_labels = "CoxPH",
                           show_labels = "visible",
                           table_names = vars,
                           .stats = c("pvalue", "hr", "hr_ci"),
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  extra_args <- list(...)

  afun <- make_afun(
    a_coxph_pairwise,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Missing Data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Substitute missing data with a string or factor level.
#'
#' @param x (`factor` or `character` vector)\cr values for which any missing values should be substituted.
#' @param label (`character`)\cr string that missing data should be replaced with.
#'
#' @return `x` with any `NA` values substituted by `label`.
#'
#' @examples
#' explicit_na(c(NA, "a", "b"))
#' is.na(explicit_na(c(NA, "a", "b")))
#'
#' explicit_na(factor(c(NA, "a", "b")))
#' is.na(explicit_na(factor(c(NA, "a", "b"))))
#'
#' explicit_na(sas_na(c("a", "")))
#'
#' @export
explicit_na <- function(x, label = "<Missing>") {
  checkmate::assert_string(label)

  if (is.factor(x)) {
    x <- forcats::fct_na_value_to_level(x, label)
    forcats::fct_drop(x, only = label)
  } else if (is.character(x)) {
    x[is.na(x)] <- label
    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Convert Strings to `NA`
#'
#' @description `r lifecycle::badge("stable")`
#'
#' SAS imports missing data as empty strings or strings with whitespaces only. This helper function can be used to
#' convert these values to `NA`s.
#'
#' @inheritParams explicit_na
#' @param empty (`logical`)\cr if `TRUE` empty strings get replaced by `NA`.
#' @param whitespaces (`logical`)\cr if `TRUE` then strings made from whitespaces only get replaced with `NA`.
#'
#' @return `x` with `""` and/or whitespace-only values substituted by `NA`, depending on the values of
#'   `empty` and `whitespaces`.
#'
#' @examples
#' sas_na(c("1", "", " ", "   ", "b"))
#' sas_na(factor(c("", " ", "b")))
#'
#' is.na(sas_na(c("1", "", " ", "   ", "b")))
#'
#' @export
sas_na <- function(x, empty = TRUE, whitespaces = TRUE) {
  checkmate::assert_flag(empty)
  checkmate::assert_flag(whitespaces)

  if (is.factor(x)) {
    empty_levels <- levels(x) == ""
    if (empty && any(empty_levels)) levels(x)[empty_levels] <- NA

    ws_levels <- grepl("^\\s+$", levels(x))
    if (whitespaces && any(ws_levels)) levels(x)[ws_levels] <- NA

    x
  } else if (is.character(x)) {
    if (empty) x[x == ""] <- NA_character_

    if (whitespaces) x[grepl("^\\s+$", x)] <- NA_character_

    x
  } else {
    stop("only factors and character vectors allowed")
  }
}

#' Count the Number of Patients with Particular Flags
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#' @param flag_variables (`character`)\cr a character vector specifying the names of `logical`
#'   variables from analysis dataset used for counting the number of unique identifiers.
#' @param flag_labels (`character`)\cr vector of labels to use for flag variables.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("count_patients_with_flags")`
#'   to see available statistics for this function.
#'
#' @seealso [count_patients_with_event]
#'
#' @name count_patients_with_flags
#' @order 1
NULL

#' @describeIn count_patients_with_flags Statistics function which counts the number of patients for which
#'   a particular flag variable is `TRUE`.
#'
#' @inheritParams analyze_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#'
#' @note If `flag_labels` is not specified, variables labels will be extracted from `df`. If variables are not
#'   labeled, variable names will be used instead. Alternatively, a named `vector` can be supplied to
#'   `flag_variables` such that within each name-value pair the name corresponds to the variable name and the value is
#'   the label to use for this variable.
#'
#' @return
#' * `s_count_patients_with_flags()` returns the count and the fraction of unique identifiers with each particular
#'   flag as a list of statistics `n`, `count`, `count_fraction`, and `n_blq`, with one element per flag.
#'
#' @examples
#' # `s_count_patients_with_flags()`
#'
#' s_count_patients_with_flags(
#'   adae,
#'   "SUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4"),
#'   denom = "N_col",
#'   .N_col = 1000
#' )
#'
#' @export
s_count_patients_with_flags <- function(df,
                                        .var,
                                        flag_variables,
                                        flag_labels = NULL,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  checkmate::assert_character(flag_variables)
  if (!is.null(flag_labels)) {
    checkmate::assert_character(flag_labels, len = length(flag_variables), any.missing = FALSE)
    flag_names <- flag_labels
  } else {
    if (is.null(names(flag_variables))) {
      flag_names <- formatters::var_labels(df[flag_variables], fill = TRUE)
    } else {
      flag_names <- unname(flag_variables)
      flag_variables <- names(flag_variables)
    }
  }

  checkmate::assert_subset(flag_variables, colnames(df))
  temp <- sapply(flag_variables, function(x) {
    tmp <- Map(function(y) which(df[[y]]), x)
    position_satisfy_flags <- Reduce(intersect, tmp)
    id_satisfy_flags <- as.character(unique(df[position_satisfy_flags, ][[.var]]))
    s_count_values(
      as.character(unique(df[[.var]])),
      id_satisfy_flags,
      denom = denom,
      .N_col = .N_col,
      .N_row = .N_row
    )
  })
  colnames(temp) <- flag_names
  temp <- data.frame(t(temp))
  result <- temp %>% as.list()
  if (length(flag_variables) == 1) {
    for (i in 1:3) names(result[[i]]) <- flag_names[1]
  }
  result
}

#' @describeIn count_patients_with_flags Formatted analysis function which is used as `afun`
#'   in `count_patients_with_flags()`.
#'
#' @return
#' * `a_count_patients_with_flags()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#'
#' # `a_count_patients_with_flags()`
#'
#' afun <- make_afun(a_count_patients_with_flags,
#'   .stats = "count_fraction",
#'   .ungroup_stats = "count_fraction"
#' )
#' afun(
#'   adae,
#'   .N_col = 10L,
#'   .N_row = 10L,
#'   .var = "USUBJID",
#'   flag_variables = c("fl1", "fl2", "fl3", "fl4")
#' )
#'
#' @export
a_count_patients_with_flags <- make_afun(
  s_count_patients_with_flags,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_flags Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_flags()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_flags()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # Add labelled flag variables to analysis dataset.
#' adae <- tern_ex_adae %>%
#'   mutate(
#'     fl1 = TRUE %>% with_label("Total AEs"),
#'     fl2 = (TRTEMFL == "Y") %>%
#'       with_label("Total number of patients with at least one adverse event"),
#'     fl3 = (TRTEMFL == "Y" & AEOUT == "FATAL") %>%
#'       with_label("Total number of patients with fatal AEs"),
#'     fl4 = (TRTEMFL == "Y" & AEOUT == "FATAL" & AEREL == "Y") %>%
#'       with_label("Total number of patients with related fatal AEs")
#'   )
#'
#' # `count_patients_with_flags()`
#'
#' lyt2 <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_patients_with_flags(
#'     "SUBJID",
#'     flag_variables = c("fl1", "fl2", "fl3", "fl4"),
#'     denom = "N_col"
#'   )
#'
#' build_table(lyt2, adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_patients_with_flags <- function(lyt,
                                      var,
                                      flag_variables,
                                      flag_labels = NULL,
                                      var_labels = var,
                                      show_labels = "hidden",
                                      riskdiff = FALSE,
                                      na_str = default_na_str(),
                                      nested = TRUE,
                                      ...,
                                      table_names = paste0("tbl_flags_", var),
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .indent_mods = NULL) {
  checkmate::assert_flag(riskdiff)

  s_args <- list(flag_variables = flag_variables, flag_labels = flag_labels, ...)

  afun <- make_afun(
    a_count_patients_with_flags,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = .stats
  )

  extra_args <- if (isFALSE(riskdiff)) {
    s_args
  } else {
    list(
      afun = list("s_count_patients_with_flags" = afun),
      .stats = .stats,
      .indent_mods = .indent_mods,
      s_args = s_args
    )
  }

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff),
    table_names = table_names,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )

  lyt
}

#' Occurrence Counts
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences for patients with occurrence
#' data. Primary analysis variables are the dictionary terms. All occurrences are counted for total
#' counts. Multiple occurrences within patient at the lowest term level displayed in the table are
#' counted only once.
#'
#' @inheritParams argument_convention
#' @param drop (`flag`)\cr should non appearing occurrence levels be dropped from the resulting table.
#'   Note that in that case the remaining occurrence levels in the table are sorted alphabetically.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("count_occurrences")`
#'   to see available statistics for this function.
#'
#' @note By default, occurrences which don't appear in a given row split are dropped from the table and
#'   the occurrences in the table are sorted alphabetically per row split. Therefore, the corresponding layout
#'   needs to use `split_fun = drop_split_levels` in the `split_rows_by` calls. Use `drop = FALSE` if you would
#'   like to show all occurrences.
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(
#'   USUBJID = as.character(c(
#'     1, 1, 2, 4, 4, 4,
#'     6, 6, 6, 7, 7, 8
#'   )),
#'   MHDECOD = c(
#'     "MH1", "MH2", "MH1", "MH1", "MH1", "MH3",
#'     "MH2", "MH2", "MH3", "MH1", "MH2", "MH4"
#'   ),
#'   ARM = rep(c("A", "B"), each = 6),
#'   SEX = c("F", "F", "M", "M", "M", "M", "F", "F", "F", "M", "M", "F")
#' )
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' @name count_occurrences
#' @order 1
NULL

#' @describeIn count_occurrences Statistics function which counts number of patients that report an
#' occurrence.
#'
#' @param denom (`string`)\cr choice of denominator for patient proportions. Can be:
#'   - `N_col`: total number of patients in this column across rows
#'   - `n`: number of patients with any occurrences
#'
#' @return
#' * `s_count_occurrences()` returns a list with:
#'   * `count`: list of counts with one element per occurrence.
#'   * `count_fraction`: list of counts and fractions with one element per occurrence.
#'   * `fraction`: list of numerators and denominators with one element per occurrence.
#'
#' @examples
#' # Count unique occurrences per subject.
#' s_count_occurrences(
#'   df,
#'   .N_col = 4L,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
s_count_occurrences <- function(df,
                                denom = c("N_col", "n"),
                                .N_col, # nolint
                                .df_row,
                                drop = TRUE,
                                .var = "MHDECOD",
                                id = "USUBJID") {
  checkmate::assert_flag(drop)
  assert_df_with_variables(df, list(range = .var, id = id))
  checkmate::assert_count(.N_col)
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[id]], classes = c("factor", "character"))
  denom <- match.arg(denom)

  occurrences <- if (drop) {
    # Note that we don't try to preserve original level order here since a) that would required
    # more time to look up in large original levels and b) that would fail for character input variable.
    occurrence_levels <- sort(unique(.df_row[[.var]]))
    if (length(occurrence_levels) == 0) {
      stop(
        "no empty `.df_row` input allowed when `drop = TRUE`,",
        " please use `split_fun = drop_split_levels` in the `rtables` `split_rows_by` calls"
      )
    }
    factor(df[[.var]], levels = occurrence_levels)
  } else {
    df[[.var]]
  }
  ids <- factor(df[[id]])
  dn <- switch(denom,
    n = nlevels(ids),
    N_col = .N_col
  )
  has_occurrence_per_id <- table(occurrences, ids) > 0
  n_ids_per_occurrence <- as.list(rowSums(has_occurrence_per_id))
  list(
    count = n_ids_per_occurrence,
    count_fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) {
        if (i == 0 && denom == 0) {
          c(0, 0)
        } else {
          c(i, i / denom)
        }
      },
      denom = dn
    ),
    fraction = lapply(
      n_ids_per_occurrence,
      function(i, denom) c("num" = i, "denom" = denom),
      denom = dn
    )
  )
}

#' @describeIn count_occurrences Formatted analysis function which is used as `afun`
#'   in `count_occurrences()`.
#'
#' @return
#' * `a_count_occurrences()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_count_occurrences(
#'   df,
#'   .N_col = 4L,
#'   .df_row = df,
#'   .var = "MHDECOD",
#'   id = "USUBJID"
#' )
#'
#' @export
a_count_occurrences <- function(df,
                                labelstr = "",
                                id = "USUBJID",
                                denom = c("N_col", "n"),
                                drop = TRUE,
                                .N_col, # nolint
                                .var = NULL,
                                .df_row = NULL,
                                .stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL,
                                na_str = default_na_str()) {
  denom <- match.arg(denom)
  x_stats <- s_count_occurrences(
    df = df, denom = denom, .N_col = .N_col, .df_row = .df_row, drop = drop, .var = .var, id = id
  )
  if (is.null(unlist(x_stats))) {
    return(NULL)
  }
  x_lvls <- names(x_stats[[1]])

  # Fill in with formatting defaults if needed
  .stats <- get_stats("count_occurrences", stats_in = .stats)
  .formats <- get_formats_from_stats(.stats, .formats)
  .labels <- get_labels_from_stats(.stats, .labels, row_nms = x_lvls)
  .indent_mods <- get_indents_from_stats(.stats, .indent_mods, row_nms = x_lvls)

  if ("count_fraction_fixed_dp" %in% .stats) x_stats[["count_fraction_fixed_dp"]] <- x_stats[["count_fraction"]]
  x_stats <- x_stats[.stats]

  # Ungroup statistics with values for each level of x
  x_ungrp <- ungroup_stats(x_stats, .formats, list(), list())
  x_stats <- x_ungrp[["x"]]
  .formats <- x_ungrp[[".formats"]]

  # Auto format handling
  .formats <- apply_auto_formatting(.formats, x_stats, .df_row, .var)

  in_rows(
    .list = x_stats,
    .formats = .formats,
    .names = .labels,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .format_na_strs = na_str
  )
}

#' @describeIn count_occurrences Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_occurrences()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences()` to the table layout.
#'
#' @examples
#' # Create table layout
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences(vars = "MHDECOD", .stats = c("count_fraction"))
#'
#' # Apply table layout to data and produce `rtable` object
#' tbl <- lyt %>%
#'   build_table(df, alt_counts_df = df_adsl) %>%
#'   prune_table()
#'
#' tbl
#'
#' @export
#' @order 2
count_occurrences <- function(lyt,
                              vars,
                              id = "USUBJID",
                              drop = TRUE,
                              var_labels = vars,
                              show_labels = "hidden",
                              riskdiff = FALSE,
                              na_str = default_na_str(),
                              nested = TRUE,
                              ...,
                              table_names = vars,
                              .stats = "count_fraction_fixed_dp",
                              .formats = NULL,
                              .labels = NULL,
                              .indent_mods = NULL) {
  checkmate::assert_flag(riskdiff)

  extra_args <- list(
    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str
  )
  s_args <- list(id = id, drop = drop, ...)

  if (isFALSE(riskdiff)) {
    extra_args <- c(extra_args, s_args)
  } else {
    extra_args <- c(
      extra_args,
      list(
        afun = list("s_count_occurrences" = a_count_occurrences),
        s_args = s_args
      )
    )
  }

  analyze(
    lyt = lyt,
    vars = vars,
    afun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff),
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' @describeIn count_occurrences Layout-creating function which can take content function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_occurrences()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_occurrences()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("SEX", child_labels = "visible") %>%
#'   summarize_occurrences(
#'     var = "MHDECOD",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
#' @order 3
summarize_occurrences <- function(lyt,
                                  var,
                                  id = "USUBJID",
                                  drop = TRUE,
                                  riskdiff = FALSE,
                                  na_str = default_na_str(),
                                  ...,
                                  .stats = "count_fraction_fixed_dp",
                                  .formats = NULL,
                                  .indent_mods = NULL,
                                  .labels = NULL) {
  checkmate::assert_flag(riskdiff)

  extra_args <- list(
    .stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str
  )
  s_args <- list(id = id, drop = drop, ...)

  if (isFALSE(riskdiff)) {
    extra_args <- c(extra_args, s_args)
  } else {
    extra_args <- c(
      extra_args,
      list(
        afun = list("s_count_occurrences" = a_count_occurrences),
        s_args = s_args
      )
    )
  }

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = ifelse(isFALSE(riskdiff), a_count_occurrences, afun_riskdiff),
    na_str = na_str,
    extra_args = extra_args
  )
}

#' Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Compares bivariate responses between two groups in terms of odds ratios
#' along with a confidence interval.
#'
#' @inheritParams split_cols_by_groups
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_odds_ratio")`
#'   to see available statistics for this function.
#'
#' @details This function uses either logistic regression for unstratified
#'   analyses, or conditional logistic regression for stratified analyses.
#'   The Wald confidence interval with the specified confidence level is
#'   calculated.
#'
#' @note For stratified analyses, there is currently no implementation for conditional
#'   likelihood confidence intervals, therefore the likelihood confidence interval is not
#'   yet available as an option. Besides, when `rsp` contains only responders or non-responders,
#'   then the result values will be `NA`, because no odds ratio estimation is possible.
#'
#' @seealso Relevant helper function [h_odds_ratio()].
#'
#' @name odds_ratio
#' @order 1
NULL

#' @describeIn odds_ratio Statistics function which estimates the odds ratio
#'   between a treatment and a control. A `variables` list with `arm` and `strata`
#'   variable names must be passed if a stratified analysis is required.
#'
#' @return
#' * `s_odds_ratio()` returns a named list with the statistics `or_ci`
#'   (containing `est`, `lcl`, and `ucl`) and `n_tot`.
#'
#' @examples
#' # Unstratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' # Stratified analysis.
#' s_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta,
#'   variables = list(arm = "grp", strata = "strata")
#' )
#'
#' @export
s_odds_ratio <- function(df,
                         .var,
                         .ref_group,
                         .in_ref_col,
                         .df_row,
                         variables = list(arm = NULL, strata = NULL),
                         conf_level = 0.95,
                         groups_list = NULL) {
  y <- list(or_ci = "", n_tot = "")

  if (!.in_ref_col) {
    assert_proportion_value(conf_level)
    assert_df_with_variables(df, list(rsp = .var))
    assert_df_with_variables(.ref_group, list(rsp = .var))

    if (is.null(variables$strata)) {
      data <- data.frame(
        rsp = c(.ref_group[[.var]], df[[.var]]),
        grp = factor(
          rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
          levels = c("ref", "Not-ref")
        )
      )
      y <- or_glm(data, conf_level = conf_level)
    } else {
      assert_df_with_variables(.df_row, c(list(rsp = .var), variables))

      # The group variable prepared for clogit must be synchronised with combination groups definition.
      if (is.null(groups_list)) {
        ref_grp <- as.character(unique(.ref_group[[variables$arm]]))
        trt_grp <- as.character(unique(df[[variables$arm]]))
        grp <- stats::relevel(factor(.df_row[[variables$arm]]), ref = ref_grp)
      } else {
        # If more than one level in reference col.
        reference <- as.character(unique(.ref_group[[variables$arm]]))
        grp_ref_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(reference %in% x)
        )
        ref_grp <- names(groups_list)[grp_ref_flag]

        # If more than one level in treatment col.
        treatment <- as.character(unique(df[[variables$arm]]))
        grp_trt_flag <- vapply(
          X = groups_list,
          FUN.VALUE = TRUE,
          FUN = function(x) all(treatment %in% x)
        )
        trt_grp <- names(groups_list)[grp_trt_flag]

        grp <- combine_levels(.df_row[[variables$arm]], levels = reference, new_level = ref_grp)
        grp <- combine_levels(grp, levels = treatment, new_level = trt_grp)
      }

      # The reference level in `grp` must be the same as in the `rtables` column split.
      data <- data.frame(
        rsp = .df_row[[.var]],
        grp = grp,
        strata = interaction(.df_row[variables$strata])
      )
      y_all <- or_clogit(data, conf_level = conf_level)
      checkmate::assert_string(trt_grp)
      checkmate::assert_subset(trt_grp, names(y_all$or_ci))
      y$or_ci <- y_all$or_ci[[trt_grp]]
      y$n_tot <- y_all$n_tot
    }
  }

  y$or_ci <- formatters::with_label(
    x = y$or_ci,
    label = paste0("Odds Ratio (", 100 * conf_level, "% CI)")
  )

  y$n_tot <- formatters::with_label(
    x = y$n_tot,
    label = "Total n"
  )

  y
}

#' @describeIn odds_ratio Formatted analysis function which is used as `afun` in `estimate_odds_ratio()`.
#'
#' @return
#' * `a_odds_ratio()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_odds_ratio(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   .df_row = dta
#' )
#'
#' @export
a_odds_ratio <- make_afun(
  s_odds_ratio,
  .formats = c(or_ci = "xx.xx (xx.xx - xx.xx)"),
  .indent_mods = c(or_ci = 1L)
)

#' @describeIn odds_ratio Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_odds_ratio()`.
#'
#' @return
#' * `estimate_odds_ratio()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_odds_ratio()` to the table layout.
#'
#' @examples
#' set.seed(12)
#' dta <- data.frame(
#'   rsp = sample(c(TRUE, FALSE), 100, TRUE),
#'   grp = factor(rep(c("A", "B"), each = 50), levels = c("A", "B")),
#'   strata = factor(sample(c("C", "D"), 100, TRUE))
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_odds_ratio(vars = "rsp")
#'
#' build_table(l, df = dta)
#'
#' @export
#' @order 2
estimate_odds_ratio <- function(lyt,
                                vars,
                                variables = list(arm = NULL, strata = NULL),
                                conf_level = 0.95,
                                groups_list = NULL,
                                na_str = default_na_str(),
                                nested = TRUE,
                                ...,
                                show_labels = "hidden",
                                table_names = vars,
                                .stats = "or_ci",
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  extra_args <- list(variables = variables, conf_level = conf_level, groups_list = groups_list, ...)

  afun <- make_afun(
    a_odds_ratio,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Helper Functions for Odds Ratio Estimation
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to calculate odds ratios in [estimate_odds_ratio()].
#'
#' @inheritParams argument_convention
#' @param data (`data.frame`)\cr data frame containing at least the variables `rsp` and `grp`, and optionally
#'   `strata` for [or_clogit()].
#'
#' @return A named `list` of elements `or_ci` and `n_tot`.
#'
#' @seealso [odds_ratio]
#'
#' @name h_odds_ratio
NULL

#' @describeIn h_odds_ratio Estimates the odds ratio based on [stats::glm()]. Note that there must be
#'   exactly 2 groups in `data` as specified by the `grp` variable.
#'
#' @examples
#' # Data with 2 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 1, 2)],
#'   strata = letters[c(1, 2, 1, 2, 2, 2, 1, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on glm.
#' or_glm(data, conf_level = 0.95)
#'
#' @export
or_glm <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  assert_df_with_factors(data, list(val = "grp"), min.levels = 2, max.levels = 2)
  formula <- stats::as.formula("rsp ~ grp")
  model_fit <- stats::glm(
    formula = formula, data = data,
    family = stats::binomial(link = "logit")
  )

  # Note that here we need to discard the intercept.
  or <- exp(stats::coef(model_fit)[-1])
  or_ci <- exp(
    stats::confint.default(model_fit, level = conf_level)[-1, , drop = FALSE]
  )

  values <- stats::setNames(c(or, or_ci), c("est", "lcl", "ucl"))
  n_tot <- stats::setNames(nrow(model_fit$model), "n_tot")

  list(or_ci = values, n_tot = n_tot)
}

#' @describeIn h_odds_ratio estimates the odds ratio based on [survival::clogit()]. This is done for
#'   the whole data set including all groups, since the results are not the same as when doing
#'   pairwise comparisons between the groups.
#'
#' @examples
#' # Data with 3 groups.
#' data <- data.frame(
#'   rsp = as.logical(c(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0)),
#'   grp = letters[c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3)],
#'   strata = LETTERS[c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)],
#'   stringsAsFactors = TRUE
#' )
#'
#' # Odds ratio based on stratified estimation by conditional logistic regression.
#' or_clogit(data, conf_level = 0.95)
#'
#' @export
or_clogit <- function(data, conf_level) {
  checkmate::assert_logical(data$rsp)
  assert_proportion_value(conf_level)
  assert_df_with_variables(data, list(rsp = "rsp", grp = "grp", strata = "strata"))
  checkmate::assert_multi_class(data$grp, classes = c("factor", "character"))
  checkmate::assert_multi_class(data$strata, classes = c("factor", "character"))

  data$grp <- as_factor_keep_attributes(data$grp)
  data$strata <- as_factor_keep_attributes(data$strata)

  # Deviation from convention: `survival::strata` must be simply `strata`.
  formula <- stats::as.formula("rsp ~ grp + strata(strata)")
  model_fit <- clogit_with_tryCatch(formula = formula, data = data)

  # Create a list with one set of OR estimates and CI per coefficient, i.e.
  # comparison of one group vs. the reference group.
  coef_est <- stats::coef(model_fit)
  ci_est <- stats::confint(model_fit, level = conf_level)
  or_ci <- list()
  for (coef_name in names(coef_est)) {
    grp_name <- gsub("^grp", "", x = coef_name)
    or_ci[[grp_name]] <- stats::setNames(
      object = exp(c(coef_est[coef_name], ci_est[coef_name, , drop = TRUE])),
      nm = c("est", "lcl", "ucl")
    )
  }
  list(or_ci = or_ci, n_tot = c(n_tot = model_fit$n))
}

#' Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the event rate adjusted for person-years at risk, otherwise known
#' as incidence rate. Primary analysis variable is the person-years at risk.
#'
#' @inheritParams argument_convention
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level` (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type` (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `input_time_unit` (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `num_pt_year` (`numeric`)\cr time unit for desired output (in person-years).
#' @param n_events (`integer`)\cr number of events observed.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_incidence_rate")`
#'   to see available statistics for this function.
#'
#' @seealso [control_incidence_rate()] and helper functions [h_incidence_rate].
#'
#' @name incidence_rate
#' @order 1
NULL

#' @describeIn incidence_rate Statistics function which estimates the incidence rate and the
#'   associated confidence interval.
#'
#' @return
#' * `s_incidence_rate()` returns the following statistics:
#'   - `person_years`: Total person-years at risk.
#'   - `n_events`: Total number of events observed.
#'   - `rate`: Estimated incidence rate.
#'   - `rate_ci`: Confidence interval for the incidence rate.
#'
#' @keywords internal
s_incidence_rate <- function(df,
                             .var,
                             n_events,
                             is_event,
                             control = control_incidence_rate()) {
  if (!missing(is_event)) {
    warning("argument is_event will be deprecated. Please use n_events.")

    if (missing(n_events)) {
      assert_df_with_variables(df, list(tte = .var, is_event = is_event))
      checkmate::assert_string(.var)
      checkmate::assert_logical(df[[is_event]], any.missing = FALSE)
      checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
      n_events <- is_event
    }
  } else {
    assert_df_with_variables(df, list(tte = .var, n_events = n_events))
    checkmate::assert_string(.var)
    checkmate::assert_numeric(df[[.var]], any.missing = FALSE)
    checkmate::assert_integer(df[[n_events]], any.missing = FALSE)
  }

  input_time_unit <- control$input_time_unit
  num_pt_year <- control$num_pt_year
  conf_level <- control$conf_level
  person_years <- sum(df[[.var]], na.rm = TRUE) * (
    1 * (input_time_unit == "year") +
      1 / 12 * (input_time_unit == "month") +
      1 / 52.14 * (input_time_unit == "week") +
      1 / 365.24 * (input_time_unit == "day")
  )
  n_events <- sum(df[[n_events]], na.rm = TRUE)

  result <- h_incidence_rate(
    person_years,
    n_events,
    control
  )
  list(
    person_years = formatters::with_label(person_years, "Total patient-years at risk"),
    n_events = formatters::with_label(n_events, "Number of adverse events observed"),
    rate = formatters::with_label(result$rate, paste("AE rate per", num_pt_year, "patient-years")),
    rate_ci = formatters::with_label(result$rate_ci, f_conf_level(conf_level))
  )
}

#' @describeIn incidence_rate Formatted analysis function which is used as `afun`
#'   in `estimate_incidence_rate()`.
#'
#' @return
#' * `a_incidence_rate()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_incidence_rate <- make_afun(
  s_incidence_rate,
  .formats = c(
    "person_years" = "xx.x",
    "n_events" = "xx",
    "rate" = "xx.xx",
    "rate_ci" = "(xx.xx, xx.xx)"
  )
)

#' @describeIn incidence_rate Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_incidence_rate()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_incidence_rate()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(seq(6)),
#'   CNSR = c(0, 1, 1, 0, 0, 0),
#'   AVAL = c(10.1, 20.4, 15.3, 20.8, 18.7, 23.4),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B"))
#' ) %>%
#'   mutate(is_event = CNSR == 0) %>%
#'   mutate(n_events = as.integer(is_event))
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   estimate_incidence_rate(
#'     vars = "AVAL",
#'     n_events = "n_events",
#'     control = control_incidence_rate(
#'       input_time_unit = "month",
#'       num_pt_year = 100
#'     )
#'   ) %>%
#'   build_table(df)
#'
#' @export
#' @order 2
estimate_incidence_rate <- function(lyt,
                                    vars,
                                    n_events,
                                    control = control_incidence_rate(),
                                    na_str = default_na_str(),
                                    nested = TRUE,
                                    ...,
                                    show_labels = "hidden",
                                    table_names = vars,
                                    .stats = NULL,
                                    .formats = NULL,
                                    .labels = NULL,
                                    .indent_mods = NULL) {
  extra_args <- list(n_events = n_events, control = control, ...)

  afun <- make_afun(
    a_incidence_rate,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Helper Functions for Incidence Rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param control (`list`)\cr parameters for estimation details, specified by using
#'   the helper function [control_incidence_rate()]. Possible parameter options are:
#'   * `conf_level`: (`proportion`)\cr confidence level for the estimated incidence rate.
#'   * `conf_type`: (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'     for confidence interval type.
#'   * `input_time_unit`: (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'     indicating time unit for data input.
#'   * `num_pt_year`: (`numeric`)\cr time unit for desired output (in person-years).
#' @param person_years (`numeric`)\cr total person-years at risk.
#' @param alpha (`numeric`)\cr two-sided alpha-level for confidence interval.
#' @param n_events (`integer`)\cr number of events observed.
#'
#' @return Estimated incidence rate `rate` and associated confidence interval `rate_ci`.
#'
#' @seealso [incidence_rate]
#'
#' @name h_incidence_rate
NULL

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal(200, 2)
#'
#' @export
h_incidence_rate_normal <- function(person_years,
                                    n_events,
                                    alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  se <- sqrt(est / person_years)
  ci <- est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * se

  list(rate = est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval based on the normal approximation for the
#'   logarithm of the incidence rate. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_normal_log(200, 2)
#'
#' @export
h_incidence_rate_normal_log <- function(person_years,
                                        n_events,
                                        alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  rate_est <- n_events / person_years
  rate_se <- sqrt(rate_est / person_years)
  lrate_est <- log(rate_est)
  lrate_se <- rate_se / rate_est
  ci <- exp(lrate_est + c(-1, 1) * stats::qnorm(1 - alpha / 2) * lrate_se)

  list(rate = rate_est, rate_ci = ci)
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated exact confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_exact(200, 2)
#'
#' @export
h_incidence_rate_exact <- function(person_years,
                                   n_events,
                                   alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  lcl <- stats::qchisq(p = (alpha) / 2, df = 2 * n_events) / (2 * person_years)
  ucl <- stats::qchisq(p = 1 - (alpha) / 2, df = 2 * n_events + 2) / (2 * person_years)

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated `Byar`'s confidence interval. Unit is one person-year.
#'
#' @examples
#' h_incidence_rate_byar(200, 2)
#'
#' @export
h_incidence_rate_byar <- function(person_years,
                                  n_events,
                                  alpha = 0.05) {
  checkmate::assert_number(person_years)
  checkmate::assert_number(n_events)
  assert_proportion_value(alpha)

  est <- n_events / person_years
  seg_1 <- n_events + 0.5
  seg_2 <- 1 - 1 / (9 * (n_events + 0.5))
  seg_3 <- stats::qnorm(1 - alpha / 2) * sqrt(1 / (n_events + 0.5)) / 3
  lcl <- seg_1 * ((seg_2 - seg_3)^3) / person_years
  ucl <- seg_1 * ((seg_2 + seg_3) ^ 3) / person_years # styler: off

  list(rate = est, rate_ci = c(lcl, ucl))
}

#' @describeIn h_incidence_rate Helper function to estimate the incidence rate and
#'   associated confidence interval.
#'
#' @keywords internal
h_incidence_rate <- function(person_years,
                             n_events,
                             control = control_incidence_rate()) {
  alpha <- 1 - control$conf_level
  est <- switch(control$conf_type,
    normal = h_incidence_rate_normal(person_years, n_events, alpha),
    normal_log = h_incidence_rate_normal_log(person_years, n_events, alpha),
    exact = h_incidence_rate_exact(person_years, n_events, alpha),
    byar = h_incidence_rate_byar(person_years, n_events, alpha)
  )

  num_pt_year <- control$num_pt_year
  list(
    rate = est$rate * num_pt_year,
    rate_ci = est$rate_ci * num_pt_year
  )
}

#' Patient Counts for Laboratory Events (Worsen From Baseline) by Highest Grade Post-Baseline
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Patient count and fraction for laboratory events (worsen from baseline) shift table.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `string`)\cr list of additional analysis variables including:
#'   * `id` (`string`)\cr subject variable name.
#'   * `baseline_var` (`string`)\cr name of the data column containing baseline toxicity variable.
#'   * `direction_var` (`string`)\cr see `direction_var` for more details.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal_by_worst_grade_worsen")`
#'   to see all available statistics.
#'
#' @seealso Relevant helper functions [h_adlb_worsen()] and [h_worsen_counter()]
#'
#' @name abnormal_by_worst_grade_worsen
#' @order 1
NULL

#' Helper Function to Prepare `ADLB` with Worst Labs
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to prepare a `df` for generate the patient count shift table
#'
#' @param adlb (`data.frame`)\cr `ADLB` dataframe
#' @param worst_flag_low (named `vector`)\cr Worst low post-baseline lab grade flag variable
#' @param worst_flag_high (named `vector`)\cr Worst high post-baseline lab grade flag variable
#' @param direction_var (`string`)\cr Direction variable specifying the direction of the shift table of interest.
#'   Only lab records flagged by `L`, `H` or `B` are included in the shift table.
#'   * `L`: low direction only
#'   * `H`: high direction only
#'   * `B`: both low and high directions
#'
#' @return `h_adlb_worsen()` returns the `adlb` `data.frame` containing only the
#'   worst labs specified according to `worst_flag_low` or `worst_flag_high` for the
#'   direction specified according to `direction_var`. For instance, for a lab that is
#'   needed for the low direction only, only records flagged by `worst_flag_low` are
#'   selected. For a lab that is needed for both low and high directions, the worst
#'   low records are selected for the low direction, and the worst high record are selected
#'   for the high direction.
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_adlb_worsen <- function(adlb,
                          worst_flag_low = NULL,
                          worst_flag_high = NULL,
                          direction_var) {
  checkmate::assert_string(direction_var)
  checkmate::assert_subset(as.character(unique(adlb[[direction_var]])), c("B", "L", "H"))
  assert_df_with_variables(adlb, list("Col" = direction_var))

  if (any(unique(adlb[[direction_var]]) == "H")) {
    assert_df_with_variables(adlb, list("High" = names(worst_flag_high)))
  }

  if (any(unique(adlb[[direction_var]]) == "L")) {
    assert_df_with_variables(adlb, list("Low" = names(worst_flag_low)))
  }

  if (any(unique(adlb[[direction_var]]) == "B")) {
    assert_df_with_variables(
      adlb,
      list(
        "Low" = names(worst_flag_low),
        "High" = names(worst_flag_high)
      )
    )
  }

  # extract patients with worst post-baseline lab, either low or high or both
  worst_flag <- c(worst_flag_low, worst_flag_high)
  col_names <- names(worst_flag)
  filter_values <- worst_flag
  temp <- Map(
    function(x, y) which(adlb[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(union, temp)

  # select variables of interest
  adlb_f <- adlb[position_satisfy_filters, ]

  # generate subsets for different directionality
  adlb_f_h <- adlb_f[which(adlb_f[[direction_var]] == "H"), ]
  adlb_f_l <- adlb_f[which(adlb_f[[direction_var]] == "L"), ]
  adlb_f_b <- adlb_f[which(adlb_f[[direction_var]] == "B"), ]

  # for labs requiring both high and low, data is duplicated and will be stacked on top of each other
  adlb_f_b_h <- adlb_f_b
  adlb_f_b_l <- adlb_f_b

  # extract data with worst lab
  if (!is.null(worst_flag_high) && !is.null(worst_flag_low)) {
    # change H to High, L to Low
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))

    # change, B to High and Low
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_h, adlb_out_b_h, adlb_out_l, adlb_out_b_l)
  } else if (!is.null(worst_flag_high)) {
    adlb_f_h[[direction_var]] <- rep("High", nrow(adlb_f_h))
    adlb_f_b_h[[direction_var]] <- rep("High", nrow(adlb_f_b_h))

    adlb_out_h <- adlb_f_h[which(adlb_f_h[[names(worst_flag_high)]] == worst_flag_high), ]
    adlb_out_b_h <- adlb_f_b_h[which(adlb_f_b_h[[names(worst_flag_high)]] == worst_flag_high), ]

    out <- rbind(adlb_out_h, adlb_out_b_h)
  } else if (!is.null(worst_flag_low)) {
    adlb_f_l[[direction_var]] <- rep("Low", nrow(adlb_f_l))
    adlb_f_b_l[[direction_var]] <- rep("Low", nrow(adlb_f_b_l))

    adlb_out_l <- adlb_f_l[which(adlb_f_l[[names(worst_flag_low)]] == worst_flag_low), ]
    adlb_out_b_l <- adlb_f_b_l[which(adlb_f_b_l[[names(worst_flag_low)]] == worst_flag_low), ]

    out <- rbind(adlb_out_l, adlb_out_b_l)
  }

  # label
  formatters::var_labels(out) <- formatters::var_labels(adlb_f, fill = FALSE)
  # NA
  out
}

#' Helper Function to Analyze Patients for [s_count_abnormal_lab_worsen_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to count the number of patients and the fraction of patients according to
#' highest post-baseline lab grade variable `.var`, baseline lab grade variable `baseline_var`,
#' and the direction of interest specified in `direction_var`.
#'
#' @inheritParams argument_convention
#' @inheritParams h_adlb_worsen
#' @param baseline_var (`string`)\cr baseline lab grade variable
#'
#' @return `h_worsen_counter()` returns the counts and fraction of patients
#'   whose worst post-baseline lab grades are worse than their baseline grades, for
#'   post-baseline worst grades "1", "2", "3", "4" and "Any".
#'
#' @seealso [abnormal_by_worst_grade_worsen]
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' # `h_worsen_counter`
#' h_worsen_counter(
#'   df %>% filter(PARAMCD == "CRP" & GRADDR == "Low"),
#'   id = "USUBJID",
#'   .var = "ATOXGR",
#'   baseline_var = "BTOXGR",
#'   direction_var = "GRADDR"
#' )
#'
#' @export
h_worsen_counter <- function(df, id, .var, baseline_var, direction_var) {
  checkmate::assert_string(id)
  checkmate::assert_string(.var)
  checkmate::assert_string(baseline_var)
  checkmate::assert_scalar(unique(df[[direction_var]]))
  checkmate::assert_subset(unique(df[[direction_var]]), c("High", "Low"))
  assert_df_with_variables(df, list(val = c(id, .var, baseline_var, direction_var)))

  # remove post-baseline missing
  df <- df[df[[.var]] != "<Missing>", ]

  # obtain directionality
  direction <- unique(df[[direction_var]])

  if (direction == "Low") {
    grade <- -1:-4
    worst_grade <- -4
  } else if (direction == "High") {
    grade <- 1:4
    worst_grade <- 4
  }

  if (nrow(df) > 0) {
    by_grade <- lapply(grade, function(i) {
      # filter baseline values that is less than i or <Missing>
      df_temp <- df[df[[baseline_var]] %in% c((i + sign(i) * -1):(-1 * worst_grade), "<Missing>"), ]
      # num: number of patients with post-baseline worst lab equal to i
      num <- length(unique(df_temp[df_temp[[.var]] %in% i, id, drop = TRUE]))
      # denom: number of patients with baseline values less than i or <missing> and post-baseline in the same direction
      denom <- length(unique(df_temp[[id]]))
      rm(df_temp)
      c(num = num, denom = denom)
    })
  } else {
    by_grade <- lapply(1, function(i) {
      c(num = 0, denom = 0)
    })
  }

  names(by_grade) <- as.character(seq_along(by_grade))

  # baseline grade less 4 or missing
  df_temp <- df[!df[[baseline_var]] %in% worst_grade, ]

  # denom: number of patients with baseline values less than 4 or <missing> and post-baseline in the same direction
  denom <- length(unique(df_temp[, id, drop = TRUE]))

  # condition 1: missing baseline and in the direction of abnormality
  con1 <- which(df_temp[[baseline_var]] == "<Missing>" & df_temp[[.var]] %in% grade)
  df_temp_nm <- df_temp[which(df_temp[[baseline_var]] != "<Missing>" & df_temp[[.var]] %in% grade), ]

  # condition 2: if post-baseline values are present then post-baseline values must be worse than baseline
  if (direction == "Low") {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) < as.numeric(as.character(df_temp_nm[[baseline_var]])))
  } else {
    con2 <- which(as.numeric(as.character(df_temp_nm[[.var]])) > as.numeric(as.character(df_temp_nm[[baseline_var]])))
  }

  # number of patients satisfy either conditions 1 or 2
  num <- length(unique(df_temp[union(con1, con2), id, drop = TRUE]))

  list(fraction = c(by_grade, list("Any" = c(num = num, denom = denom))))
}

#' @describeIn abnormal_by_worst_grade_worsen Statistics function for patients whose worst post-baseline
#'   lab grades are worse than their baseline grades.
#'
#' @return
#' * `s_count_abnormal_lab_worsen_by_baseline()` returns the counts and fraction of patients whose worst
#'   post-baseline lab grades are worse than their baseline grades, for post-baseline worst grades
#'   "1", "2", "3", "4" and "Any".
#'
#' @keywords internal
s_count_abnormal_lab_worsen_by_baseline <- function(df, # nolint
                                                    .var = "ATOXGR",
                                                    variables = list(
                                                      id = "USUBJID",
                                                      baseline_var = "BTOXGR",
                                                      direction_var = "GRADDR"
                                                    )) {
  checkmate::assert_string(.var)
  checkmate::assert_set_equal(names(variables), c("id", "baseline_var", "direction_var"))
  checkmate::assert_string(variables$id)
  checkmate::assert_string(variables$baseline_var)
  checkmate::assert_string(variables$direction_var)
  assert_df_with_variables(df, c(aval = .var, variables[1:3]))
  assert_list_of_variables(variables)

  h_worsen_counter(df, variables$id, .var, variables$baseline_var, variables$direction_var)
}


#' @describeIn abnormal_by_worst_grade_worsen Formatted analysis function which is used as `afun`
#'   in `count_abnormal_lab_worsen_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_lab_worsen_by_baseline()` returns the corresponding list with
#'   formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_lab_worsen_by_baseline <- make_afun( # nolint
  s_count_abnormal_lab_worsen_by_baseline,
  .formats = c(fraction = format_fraction),
  .ungroup_stats = "fraction"
)

#' @describeIn abnormal_by_worst_grade_worsen Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_lab_worsen_by_baseline()` returns a layout object suitable for passing to further layouting
#'   functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted
#'   rows containing the statistics from `s_count_abnormal_lab_worsen_by_baseline()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # The direction variable, GRADDR, is based on metadata
#' adlb <- tern_ex_adlb %>%
#'   mutate(
#'     GRADDR = case_when(
#'       PARAMCD == "ALT" ~ "B",
#'       PARAMCD == "CRP" ~ "L",
#'       PARAMCD == "IGA" ~ "H"
#'     )
#'   ) %>%
#'   filter(SAFFL == "Y" & ONTRTFL == "Y" & GRADDR != "")
#'
#' df <- h_adlb_worsen(
#'   adlb,
#'   worst_flag_low = c("WGRLOFL" = "Y"),
#'   worst_flag_high = c("WGRHIFL" = "Y"),
#'   direction_var = "GRADDR"
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   add_colcounts() %>%
#'   split_rows_by("PARAMCD") %>%
#'   split_rows_by("GRADDR") %>%
#'   count_abnormal_lab_worsen_by_baseline(
#'     var = "ATOXGR",
#'     variables = list(
#'       id = "USUBJID",
#'       baseline_var = "BTOXGR",
#'       direction_var = "GRADDR"
#'     )
#'   ) %>%
#'   append_topleft("Direction of Abnormality") %>%
#'   build_table(df = df, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_abnormal_lab_worsen_by_baseline <- function(lyt, # nolint
                                                  var,
                                                  variables = list(
                                                    id = "USUBJID",
                                                    baseline_var = "BTOXGR",
                                                    direction_var = "GRADDR"
                                                  ),
                                                  na_str = default_na_str(),
                                                  nested = TRUE,
                                                  ...,
                                                  table_names = NULL,
                                                  .stats = NULL,
                                                  .formats = NULL,
                                                  .labels = NULL,
                                                  .indent_mods = NULL) {
  checkmate::assert_string(var)

  extra_args <- list(variables = variables, ...)

  afun <- make_afun(
    a_count_abnormal_lab_worsen_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  lyt <- analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = "hidden"
  )

  lyt
}

#' Compare Variables Between Groups
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Comparison with a reference group for different `x` objects.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("analyze_vars_numeric")` to see
#'   statistics available for numeric variables, and `get_stats("analyze_vars_counts")` for statistics available
#'   for non-numeric variables.
#'
#' @note
#' * For factor variables, `denom` for factor proportions can only be `n` since the purpose is to compare proportions
#'   between columns, therefore a row-based proportion would not make sense. Proportion based on `N_col` would
#'   be difficult since we use counts for the chi-squared test statistic, therefore missing values should be accounted
#'   for as explicit factor levels.
#' * If factor variables contain `NA`, these `NA` values are excluded by default. To include `NA` values
#'   set `na.rm = FALSE` and missing values will be displayed as an `NA` level. Alternatively, an explicit
#'   factor level can be defined for `NA` values during pre-processing via [df_explicit_na()] - the
#'   default `na_level` (`"<Missing>"`) will also be excluded when `na.rm` is set to `TRUE`.
#' * For character variables, automatic conversion to factor does not guarantee that the table
#'   will be generated correctly. In particular for sparse tables this very likely can fail.
#'   Therefore it is always better to manually convert character variables to factors during pre-processing.
#' * For `compare_vars()`, the column split must define a reference group via `ref_group` so that the comparison
#'   is well defined.
#'
#' @seealso Relevant constructor function [create_afun_compare()], [s_summary()] which is used internally
#'   to compute a summary within `s_compare()`, and [a_compare()] which is used (with `compare = TRUE`) as the analysis
#'   function for `compare_vars()`.
#'
#' @name compare_variables
#' @include analyze_variables.R
#' @order 1
NULL

#' @describeIn compare_variables S3 generic function to produce a comparison summary.
#'
#' @return
#' * `s_compare()` returns output of [s_summary()] and comparisons versus the reference group in the form of p-values.
#'
#' @export
s_compare <- function(x,
                      .ref_group,
                      .in_ref_col,
                      ...) {
  UseMethod("s_compare", x)
}

#' @describeIn compare_variables Method for `numeric` class. This uses the standard t-test
#'   to calculate the p-value.
#'
#' @method s_compare numeric
#'
#' @examples
#' # `s_compare.numeric`
#'
#' ## Usual case where both this and the reference group vector have more than 1 value.
#' s_compare(rnorm(10, 5, 1), .ref_group = rnorm(5, -5, 1), .in_ref_col = FALSE)
#'
#' ## If one group has not more than 1 value, then p-value is not calculated.
#' s_compare(rnorm(10, 5, 1), .ref_group = 1, .in_ref_col = FALSE)
#'
#' ## Empty numeric does not fail, it returns NA-filled items and no p-value.
#' s_compare(numeric(), .ref_group = numeric(), .in_ref_col = FALSE)
#'
#' @export
s_compare.numeric <- function(x,
                              .ref_group,
                              .in_ref_col,
                              ...) {
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(.ref_group)
  checkmate::assert_flag(.in_ref_col)

  y <- s_summary.numeric(x = x, ...)

  y$pval <- if (!.in_ref_col && n_available(x) > 1 && n_available(.ref_group) > 1) {
    stats::t.test(x, .ref_group)$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `factor` class. This uses the chi-squared test
#'   to calculate the p-value.
#'
#' @param denom (`string`)\cr choice of denominator for factor proportions,
#'   can only be `n` (number of values in this row and column intersection).
#'
#' @method s_compare factor
#'
#' @examples
#' # `s_compare.factor`
#'
#' ## Basic usage:
#' x <- factor(c("a", "a", "b", "c", "a"))
#' y <- factor(c("a", "b", "c"))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- explicit_na(factor(c("a", "a", "b", "c", "a", NA, NA)))
#' y <- explicit_na(factor(c("a", "b", "c", NA)))
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x = x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.factor <- function(x,
                             .ref_group,
                             .in_ref_col,
                             denom = "n",
                             na.rm = TRUE, # nolint
                             ...) {
  checkmate::assert_flag(.in_ref_col)
  assert_valid_factor(x)
  assert_valid_factor(.ref_group)
  denom <- match.arg(denom)

  y <- s_summary.factor(
    x = x,
    denom = denom,
    na.rm = na.rm,
    ...
  )

  if (na.rm) {
    x <- x[!is.na(x)] %>% fct_discard("<Missing>")
    .ref_group <- .ref_group[!is.na(.ref_group)] %>% fct_discard("<Missing>")
  } else {
    x <- x %>% explicit_na(label = "NA")
    .ref_group <- .ref_group %>% explicit_na(label = "NA")
  }

  if ("NA" %in% levels(x)) levels(.ref_group) <- c(levels(.ref_group), "NA")
  checkmate::assert_factor(x, levels = levels(.ref_group), min.levels = 2)

  y$pval_counts <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    tab <- rbind(table(x), table(.ref_group))
    res <- suppressWarnings(stats::chisq.test(tab))
    res$p.value
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Method for `character` class. This makes an automatic
#'   conversion to `factor` (with a warning) and then forwards to the method for factors.
#'
#' @param verbose (`logical`)\cr Whether warnings and messages should be printed. Mainly used
#'   to print out information about factor casting. Defaults to `TRUE`.
#'
#' @method s_compare character
#'
#' @examples
#' # `s_compare.character`
#'
#' ## Basic usage:
#' x <- c("a", "a", "b", "c", "a")
#' y <- c("a", "b", "c")
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, .var = "x", verbose = FALSE)
#'
#' ## Note that missing values handling can make a large difference:
#' x <- c("a", "a", "b", "c", "a", NA)
#' y <- c("a", "b", "c", rep(NA, 20))
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE,
#'   .var = "x", verbose = FALSE
#' )
#' s_compare(x,
#'   .ref_group = y, .in_ref_col = FALSE, .var = "x",
#'   na.rm = FALSE, verbose = FALSE
#' )
#'
#' @export
s_compare.character <- function(x,
                                .ref_group,
                                .in_ref_col,
                                denom = "n",
                                na.rm = TRUE, # nolint
                                .var,
                                verbose = TRUE,
                                ...) {
  x <- as_factor_keep_attributes(x, verbose = verbose)
  .ref_group <- as_factor_keep_attributes(.ref_group, verbose = verbose)
  s_compare(
    x = x,
    .ref_group = .ref_group,
    .in_ref_col = .in_ref_col,
    denom = denom,
    na.rm = na.rm,
    ...
  )
}

#' @describeIn compare_variables Method for `logical` class. A chi-squared test
#'   is used. If missing values are not removed, then they are counted as `FALSE`.
#'
#' @method s_compare logical
#'
#' @examples
#' # `s_compare.logical`
#'
#' ## Basic usage:
#' x <- c(TRUE, FALSE, TRUE, TRUE)
#' y <- c(FALSE, FALSE, TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE)
#'
#' ## Management of NA values.
#' x <- c(NA, TRUE, FALSE)
#' y <- c(NA, NA, NA, NA, FALSE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = TRUE)
#' s_compare(x, .ref_group = y, .in_ref_col = FALSE, na.rm = FALSE)
#'
#' @export
s_compare.logical <- function(x,
                              .ref_group,
                              .in_ref_col,
                              na.rm = TRUE, # nolint
                              denom = "n",
                              ...) {
  denom <- match.arg(denom)

  y <- s_summary.logical(
    x = x,
    na.rm = na.rm,
    denom = denom,
    ...
  )

  if (na.rm) {
    x <- stats::na.omit(x)
    .ref_group <- stats::na.omit(.ref_group)
  } else {
    x[is.na(x)] <- FALSE
    .ref_group[is.na(.ref_group)] <- FALSE
  }

  y$pval_counts <- if (!.in_ref_col && length(x) > 0 && length(.ref_group) > 0) {
    x <- factor(x, levels = c(TRUE, FALSE))
    .ref_group <- factor(.ref_group, levels = c(TRUE, FALSE))
    tbl <- rbind(table(x), table(.ref_group))
    suppressWarnings(prop_chisq(tbl))
  } else {
    character()
  }

  y
}

#' @describeIn compare_variables Formatted analysis function which is used as `afun`
#'   in `compare_vars()`.
#'
#' @return
#' * `a_compare()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @note `a_compare()` has been deprecated in favor of `a_summary()` with argument `compare` set to `TRUE`.
#'
#' @examples
#' # `a_compare` deprecated - use `a_summary()` instead
#' a_compare(rnorm(10, 5, 1), .ref_group = rnorm(20, -5, 1), .stats = c("n", "pval"))
#'
#' @export
a_compare <- function(x,
                      .N_col, # nolint
                      .N_row, # nolint
                      .var = NULL,
                      .df_row = NULL,
                      .ref_group = NULL,
                      .in_ref_col = FALSE,
                      ...) {
  lifecycle::deprecate_warn(
    "0.8.3",
    "a_compare()",
    details = "Please use a_summary() with argument `compare` set to TRUE instead."
  )
  a_summary(
    x = x,
    .N_col = .N_col,
    .N_row = .N_row,
    .var = .var,
    .df_row = .df_row,
    .ref_group = .ref_group,
    .in_ref_col = .in_ref_col,
    compare = TRUE,
    ...
  )
}

#' Constructor Function for [compare_vars()]
#'
#' @description `r lifecycle::badge("deprecated")`
#'
#' Constructor function which creates a combined formatted analysis function.
#'
#' @inheritParams argument_convention
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return Combined formatted analysis function for use in [compare_vars()].
#'
#' @note This function has been deprecated in favor of direct implementation of `a_summary()` with argument `compare`
#'   set to `TRUE`.
#'
#' @seealso [compare_vars()]
#'
#' @export
create_afun_compare <- function(.stats = NULL,
                                .formats = NULL,
                                .labels = NULL,
                                .indent_mods = NULL) {
  lifecycle::deprecate_warn(
    "0.8.5.9010",
    "create_afun_compare()",
    details = "Please use a_summary(compare = TRUE) directly instead."
  )
  function(x,
           .ref_group,
           .in_ref_col,
           ...,
           .var) {
    a_summary(x,
      compare = TRUE,
      .stats = .stats,
      .formats = .formats,
      .labels = .labels,
      .indent_mods = .indent_mods,
      .ref_group = .ref_group,
      .in_ref_col = .in_ref_col,
      .var = .var, ...
    )
  }
}

#' @describeIn compare_variables Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @param ... arguments passed to `s_compare()`.
#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
#'   should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
#'   for that statistic's row label.
#'
#' @return
#' * `compare_vars()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_compare()` to the table layout.
#'
#' @examples
#' # `compare_vars()` in `rtables` pipelines
#'
#' ## Default output within a `rtables` pipeline.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM B") %>%
#'   compare_vars(c("AGE", "SEX"))
#' build_table(lyt, tern_ex_adsl)
#'
#' ## Select and format statistics output.
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", ref_group = "ARM C") %>%
#'   compare_vars(
#'     vars = "AGE",
#'     .stats = c("mean_sd", "pval"),
#'     .formats = c(mean_sd = "xx.x, xx.x"),
#'     .labels = c(mean_sd = "Mean, SD")
#'   )
#' build_table(lyt, df = tern_ex_adsl)
#'
#' @export
#' @order 2
compare_vars <- function(lyt,
                         vars,
                         var_labels = vars,
                         na_level = lifecycle::deprecated(),
                         na_str = default_na_str(),
                         nested = TRUE,
                         ...,
                         na.rm = TRUE, # nolint
                         show_labels = "default",
                         table_names = vars,
                         section_div = NA_character_,
                         .stats = c("n", "mean_sd", "count_fraction", "pval"),
                         .formats = NULL,
                         .labels = NULL,
                         .indent_mods = NULL) {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "compare_vars(na_level)", "compare_vars(na_str)")
    na_str <- na_level
  }

  extra_args <- list(.stats = .stats, na.rm = na.rm, na_str = na_str, compare = TRUE, ...)
  if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
  if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
  if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods

  analyze(
    lyt = lyt,
    vars = vars,
    var_labels = var_labels,
    afun = a_summary,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    inclNAs = TRUE,
    show_labels = show_labels,
    table_names = table_names,
    section_div = section_div
  )
}

#' Helper Functions for Tabulating Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams response_biomarkers_subgroups
#' @inheritParams extract_rsp_biomarkers
#' @inheritParams argument_convention
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#'   filter(PARAMCD == "BESRSPI") %>%
#'   mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' @name h_response_biomarkers_subgroups
NULL

#' @describeIn h_response_biomarkers_subgroups helps with converting the "response" function variable list
#'   to the "logistic regression" variable list. The reason is that currently there is an
#'   inconsistency between the variable names accepted by `extract_rsp_subgroups()` and `fit_logistic()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_rsp_to_logistic_variables()` returns a named `list` of elements `response`, `arm`, `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_rsp_to_logistic_variables(
#'   variables = list(
#'     rsp = "RSP",
#'     covariates = c("A", "B"),
#'     strat = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_rsp_to_logistic_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$rsp)
  checkmate::assert_string(biomarker)
  list(
    response = variables$rsp,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strat
  )
}

#' @describeIn h_response_biomarkers_subgroups prepares estimates for number of responses, patients and
#'   overall response rate, as well as odds ratio estimates, confidence intervals and p-values, for multiple
#'   biomarkers in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `rsp` and `biomarkers` (vector of continuous biomarker variables) and optionally `covariates`
#'   and `strat`.
#'
#' @return
#' * `h_logistic_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_logistic_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX"
#'   ),
#'   data = adrs_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     rsp = "rsp",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strat = "STRATA1"
#'   ),
#'   data = adrs_f[NULL, ]
#' )
#'
#' @export
h_logistic_mult_cont_df <- function(variables,
                                    data,
                                    control = control_logistic()) {
  assert_df_with_variables(data, variables)

  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  checkmate::assert_list(control, names = "named")

  conf_level <- control[["conf_level"]]
  pval_label <- "p-value (Wald)"

  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      model_fit <- fit_logistic(
        variables = h_rsp_to_logistic_variables(variables, bm),
        data = data,
        response_definition = control$response_definition
      )
      result <- h_logistic_simple_terms(
        x = bm,
        fit_glm = model_fit,
        conf_level = control$conf_level
      )
      resp_vector <- if (inherits(model_fit, "glm")) {
        model_fit$model[[variables$rsp]]
      } else {
        as.logical(as.matrix(model_fit$y)[, "status"])
      }
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = length(resp_vector),
        n_rsp = sum(resp_vector),
        prop = mean(resp_vector),
        or = as.numeric(result[1L, "odds_ratio"]),
        lcl = as.numeric(result[1L, "lcl"]),
        ucl = as.numeric(result[1L, "ucl"]),
        conf_level = conf_level,
        pval = as.numeric(result[1L, "pvalue"]),
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_rsp = 0L,
      prop = NA,
      or = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_response_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_rsp_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_logistic_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_rsp_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#'
#' h_tab_rsp_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval")
#' )
#'
#' @export
h_tab_rsp_one_biomarker <- function(df,
                                    vars,
                                    na_str = default_na_str(),
                                    .indent_mods = 0L) {
  afuns <- a_response_subgroups(na_str = na_str)[vars]
  colvars <- d_rsp_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1]
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars,
    na_str = na_str,
    .indent_mods = .indent_mods
  )
}

#' Counting Patients and Events in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of unique patients and the total number of all and specific events
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#' @param filters_list (named `list` of `character`)\cr each element in this list describes one
#'   type of event describe by filters, in the same format as [s_count_patients_with_event()].
#'   If it has a label, then this will be used for the column title.
#' @param empty_stats (`character`)\cr optional names of the statistics that should be returned empty such
#'   that corresponding table cells will stay blank.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
#'   be used as label.
#' @param .stats (`character`)\cr statistics to select for the table. Run
#'   `get_stats("summarize_patients_events_in_cols")` to see available statistics for this function, in addition
#'   to any added using `filters_list`.
#'
#' @name count_patients_events_in_cols
#' @order 1
NULL

#' @describeIn count_patients_events_in_cols Statistics function which counts numbers of patients and multiple
#'   events defined by filters. Used as analysis function `afun` in `summarize_patients_events_in_cols()`.
#'
#' @return
#' * `s_count_patients_and_multiple_events()` returns a list with the statistics:
#'   - `unique`: number of unique patients in `df`.
#'   - `all`: number of rows in `df`.
#'   - one element with the same name as in `filters_list`: number of rows in `df`,
#'     i.e. events, fulfilling the filter condition.
#'
#' @keywords internal
s_count_patients_and_multiple_events <- function(df, # nolint
                                                 id,
                                                 filters_list,
                                                 empty_stats = character(),
                                                 labelstr = "",
                                                 custom_label = NULL) {
  checkmate::assert_list(filters_list, names = "named")
  checkmate::assert_data_frame(df)
  checkmate::assert_string(id)
  checkmate::assert_disjunct(c("unique", "all"), names(filters_list))
  checkmate::assert_character(empty_stats)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)

  # Below we want to count each row in `df` once, therefore introducing this helper index column.
  df$.row_index <- as.character(seq_len(nrow(df)))
  y <- list()
  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "counts"
  }
  y$unique <- formatters::with_label(
    s_num_patients_content(df = df, .N_col = 1, .var = id, required = NULL)$unique[1L],
    row_label
  )
  y$all <- formatters::with_label(
    nrow(df),
    row_label
  )
  events <- Map(
    function(filters) {
      formatters::with_label(
        s_count_patients_with_event(df = df, .var = ".row_index", filters = filters, .N_col = 1, .N_row = 1)$count,
        row_label
      )
    },
    filters = filters_list
  )
  y_complete <- c(y, events)
  y <- if (length(empty_stats) > 0) {
    y_reduced <- y_complete
    for (stat in intersect(names(y_complete), empty_stats)) {
      y_reduced[[stat]] <- formatters::with_label(character(), obj_label(y_reduced[[stat]]))
    }
    y_reduced
  } else {
    y_complete
  }
  y
}

#' @describeIn count_patients_events_in_cols Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split.
#'   Set to `FALSE` when the required column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `summarize_patients_events_in_cols()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_patients_and_multiple_events()` to the table layout.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = rep(c("id1", "id2", "id3", "id4"), c(2, 3, 1, 1)),
#'   ARM = c("A", "A", "B", "B", "B", "B", "A"),
#'   AESER = rep("Y", 7),
#'   AESDTH = c("Y", "Y", "N", "Y", "Y", "N", "N"),
#'   AEREL = c("Y", "Y", "N", "Y", "Y", "N", "Y"),
#'   AEDECOD = c("A", "A", "A", "B", "B", "C", "D"),
#'   AEBODSYS = rep(c("SOC1", "SOC2", "SOC3"), c(3, 3, 1))
#' )
#'
#' # `summarize_patients_events_in_cols()`
#' basic_table() %>%
#'   summarize_patients_events_in_cols(
#'     filters_list = list(
#'       related = formatters::with_label(c(AEREL = "Y"), "Events (Related)"),
#'       fatal = c(AESDTH = "Y"),
#'       fatal_related = c(AEREL = "Y", AESDTH = "Y")
#'     ),
#'     custom_label = "%s Total number of patients and events"
#'   ) %>%
#'   build_table(df)
#'
#' @export
#' @order 2
summarize_patients_events_in_cols <- function(lyt, # nolint
                                              id = "USUBJID",
                                              filters_list = list(),
                                              empty_stats = character(),
                                              na_str = default_na_str(),
                                              ...,
                                              .stats = c(
                                                "unique",
                                                "all",
                                                names(filters_list)
                                              ),
                                              .labels = c(
                                                unique = "Patients (All)",
                                                all = "Events (All)",
                                                labels_or_names(filters_list)
                                              ),
                                              col_split = TRUE) {
  extra_args <- list(id = id, filters_list = filters_list, empty_stats = empty_stats, ...)

  afun_list <- Map(
    function(stat) {
      make_afun(
        s_count_patients_and_multiple_events,
        .stats = stat,
        .formats = "xx."
      )
    },
    stat = .stats
  )
  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(id, length(.stats)),
      varlabels = .labels[.stats]
    )
  }
  summarize_row_groups(
    lyt = lyt,
    cfun = afun_list,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' Helper Function for Tabulation of a Single Biomarker Result
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Please see [h_tab_surv_one_biomarker()] and [h_tab_rsp_one_biomarker()], which use this function for examples.
#' This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr results for a single biomarker.
#' @param afuns (named `list` of `function`)\cr analysis functions.
#' @param colvars (`list` with `vars` and `labels`)\cr variables to tabulate and their labels.
#'
#' @return An `rtables` table object with statistics in columns.
#'
#' @export
h_tab_one_biomarker <- function(df,
                                afuns,
                                colvars,
                                na_str = default_na_str(),
                                .indent_mods = 0L,
                                ...) {
  extra_args <- list(...)

  lyt <- basic_table()

  # Row split by row type - only keep the content rows here.
  lyt <- split_rows_by(
    lyt = lyt,
    var = "row_type",
    split_fun = keep_split_levels("content"),
    nested = FALSE
  )

  # Summarize rows with all patients.
  lyt <- summarize_row_groups(
    lyt = lyt,
    var = "var_label",
    cfun = afuns,
    na_str = na_str,
    indent_mod = .indent_mods,
    extra_args = extra_args
  )

  # Split cols by the multiple variables to populate into columns.
  lyt <- split_cols_by_multivar(
    lyt = lyt,
    vars = colvars$vars,
    varlabels = colvars$labels
  )

  # If there is any subgroup variables, we extend the layout accordingly.
  if ("analysis" %in% df$row_type) {
    # Now only continue with the subgroup rows.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "row_type",
      split_fun = keep_split_levels("analysis"),
      nested = FALSE,
      child_labels = "hidden"
    )

    # Split by the subgroup variable.
    lyt <- split_rows_by(
      lyt = lyt,
      var = "var",
      labels_var = "var_label",
      nested = TRUE,
      child_labels = "visible",
      indent_mod = .indent_mods * 2
    )

    # Then analyze colvars for each subgroup.
    lyt <- summarize_row_groups(
      lyt = lyt,
      cfun = afuns,
      var = "subgroup",
      na_str = na_str,
      extra_args = extra_args
    )
  }
  build_table(lyt, df = df)
}

#' Estimation of Proportions per Level of Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Estimate the proportion along with confidence interval of a proportion
#' regarding the level of a factor.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_multinomial_response")`
#'   to see available statistics for this function.
#'
#' @seealso Relevant description function [d_onco_rsp_label()].
#'
#' @name estimate_multinomial_rsp
#' @order 1
NULL

#' Description of Standard Oncology Response
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Describe the oncology response in a standard way.
#'
#' @param x (`character`)\cr the standard oncology code to be described.
#'
#' @return Response labels.
#'
#' @seealso [estimate_multinomial_rsp()]
#'
#' @examples
#' d_onco_rsp_label(
#'   c("CR", "PR", "SD", "NON CR/PD", "PD", "NE", "Missing", "<Missing>", "NE/Missing")
#' )
#'
#' # Adding some values not considered in d_onco_rsp_label
#'
#' d_onco_rsp_label(
#'   c("CR", "PR", "hello", "hi")
#' )
#'
#' @export
d_onco_rsp_label <- function(x) {
  x <- as.character(x)
  desc <- c(
    CR           = "Complete Response (CR)",
    PR           = "Partial Response (PR)",
    MR           = "Minimal/Minor Response (MR)",
    MRD          = "Minimal Residual Disease (MRD)",
    SD           = "Stable Disease (SD)",
    PD           = "Progressive Disease (PD)",
    `NON CR/PD`  = "Non-CR or Non-PD (NON CR/PD)",
    NE           = "Not Evaluable (NE)",
    `NE/Missing` = "Missing or unevaluable",
    Missing      = "Missing",
    `NA`         = "Not Applicable (NA)",
    ND           = "Not Done (ND)"
  )

  values_label <- vapply(
    X = x,
    FUN.VALUE = character(1),
    function(val) {
      if (val %in% names(desc)) desc[val] else val
    }
  )

  return(factor(values_label, levels = c(intersect(desc, values_label), setdiff(values_label, desc))))
}

#' @describeIn estimate_multinomial_rsp Statistics function which feeds the length of `x` as number
#'   of successes, and `.N_col` as total number of successes and failures into [s_proportion()].
#'
#' @return
#' * `s_length_proportion()` returns statistics from [s_proportion()].
#'
#' @examples
#' s_length_proportion(rep("CR", 10), .N_col = 100)
#' s_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
s_length_proportion <- function(x,
                                .N_col, # nolint
                                ...) {
  checkmate::assert_multi_class(x, classes = c("factor", "character"))
  checkmate::assert_vector(x, min.len = 0, max.len = .N_col)
  checkmate::assert_vector(unique(x), min.len = 0, max.len = 1)

  n_true <- length(x)
  n_false <- .N_col - n_true
  x_logical <- rep(c(TRUE, FALSE), c(n_true, n_false))
  s_proportion(df = x_logical, ...)
}

#' @describeIn estimate_multinomial_rsp Formatted analysis function which is used as `afun`
#'   in `estimate_multinomial_response()`.
#'
#' @return
#' * `a_length_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_length_proportion(rep("CR", 10), .N_col = 100)
#' a_length_proportion(factor(character(0)), .N_col = 100)
#'
#' @export
a_length_proportion <- make_afun(
  s_length_proportion,
  .formats = c(
    n_prop = "xx (xx.x%)",
    prop_ci = "(xx.xx, xx.xx)"
  )
)

#' @describeIn estimate_multinomial_rsp Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()] and
#'   [rtables::summarize_row_groups()].
#'
#' @return
#' * `estimate_multinomial_response()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_length_proportion()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' # Use of the layout creating function.
#' dta_test <- data.frame(
#'   USUBJID = paste0("S", 1:12),
#'   ARM     = factor(rep(LETTERS[1:3], each = 4)),
#'   AVAL    = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
#' ) %>% mutate(
#'   AVALC = factor(AVAL,
#'     levels = c(0, 1),
#'     labels = c("Complete Response (CR)", "Partial Response (PR)")
#'   )
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   estimate_multinomial_response(var = "AVALC")
#'
#' tbl <- build_table(lyt, dta_test)
#'
#' tbl
#'
#' @export
#' @order 2
estimate_multinomial_response <- function(lyt,
                                          var,
                                          na_str = default_na_str(),
                                          nested = TRUE,
                                          ...,
                                          show_labels = "hidden",
                                          table_names = var,
                                          .stats = "prop_ci",
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  extra_args <- list(...)

  afun <- make_afun(
    a_length_proportion,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
  lyt <- split_rows_by(lyt, var = var)
  lyt <- summarize_row_groups(lyt, na_str = na_str)

  analyze(
    lyt,
    vars = var,
    afun = afun,
    show_labels = show_labels,
    table_names = table_names,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Helper Functions for Cox Proportional Hazards Regression
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions used in [fit_coxreg_univar()] and [fit_coxreg_multivar()].
#'
#' @inheritParams argument_convention
#' @inheritParams h_coxreg_univar_extract
#' @inheritParams cox_regression_inter
#' @inheritParams control_coxreg
#'
#' @seealso [cox_regression]
#'
#' @name h_cox_regression
NULL

#' @describeIn h_cox_regression Helper for Cox regression formula. Creates a list of formulas. It is used
#'   internally by [fit_coxreg_univar()] for the comparison of univariate Cox regression models.
#'
#' @return
#' * `h_coxreg_univar_formulas()` returns a `character` vector coercible into formulas (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_univar_formulas`
#'
#' ## Simple formulas.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y")
#'   )
#' )
#'
#' ## Addition of an optional strata.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   )
#' )
#'
#' ## Inclusion of the interaction term.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", arm = "armcd", covariates = c("X", "y"),
#'     strata = "SITE"
#'   ),
#'   interaction = TRUE
#' )
#'
#' ## Only covariates fitted in separate models.
#' h_coxreg_univar_formulas(
#'   variables = list(
#'     time = "time", event = "status", covariates = c("X", "y")
#'   )
#' )
#'
#' @export
h_coxreg_univar_formulas <- function(variables,
                                     interaction = FALSE) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  checkmate::assert_flag(interaction)

  if (!has_arm || is.null(variables$covariates)) {
    checkmate::assert_false(interaction)
  }

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  if (!is.null(variables$covariates)) {
    forms <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      ifelse(has_arm, variables$arm, "1"),
      ifelse(interaction, " * ", " + "),
      variables$covariates,
      ifelse(
        !is.null(variables$strata),
        paste0(" + strata(", paste0(variables$strata, collapse = ", "), ")"),
        ""
      )
    )
  } else {
    forms <- NULL
  }
  nams <- variables$covariates
  if (has_arm) {
    ref <- paste0(
      "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
      variables$arm,
      ifelse(
        !is.null(variables$strata),
        paste0(
          " + strata(", paste0(variables$strata, collapse = ", "), ")"
        ),
        ""
      )
    )
    forms <- c(ref, forms)
    nams <- c("ref", nams)
  }
  stats::setNames(forms, nams)
}

#' @describeIn h_cox_regression Helper for multivariate Cox regression formula. Creates a formulas
#'   string. It is used internally by [fit_coxreg_multivar()] for the comparison of multivariate Cox
#'   regression models. Interactions will not be included in multivariate Cox regression model.
#'
#' @return
#' * `h_coxreg_multivar_formula()` returns a `string` coercible into a formula (e.g [stats::as.formula()]).
#'
#' @examples
#' # `h_coxreg_multivar_formula`
#'
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE")
#'   )
#' )
#'
#' # Addition of an optional strata.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", arm = "ARMCD", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' # Example without treatment arm.
#' h_coxreg_multivar_formula(
#'   variables = list(
#'     time = "AVAL", event = "event", covariates = c("RACE", "AGE"),
#'     strata = "SITE"
#'   )
#' )
#'
#' @export
h_coxreg_multivar_formula <- function(variables) {
  checkmate::assert_list(variables, names = "named")
  has_arm <- "arm" %in% names(variables)
  arm_name <- if (has_arm) "arm" else NULL

  checkmate::assert_character(variables$covariates, null.ok = TRUE)

  assert_list_of_variables(variables[c(arm_name, "event", "time")])

  y <- paste0(
    "survival::Surv(", variables$time, ", ", variables$event, ") ~ ",
    ifelse(has_arm, variables$arm, "1")
  )
  if (length(variables$covariates) > 0) {
    y <- paste(y, paste(variables$covariates, collapse = " + "), sep = " + ")
  }
  if (!is.null(variables$strata)) {
    y <- paste0(y, " + strata(", paste0(variables$strata, collapse = ", "), ")")
  }
  y
}

#' @describeIn h_cox_regression Utility function to help tabulate the result of
#'   a univariate Cox regression model.
#'
#' @param effect (`string`)\cr the treatment variable.
#' @param mod (`coxph`)\cr Cox regression model fitted by [survival::coxph()].
#'
#' @return
#' * `h_coxreg_univar_extract()` returns a `data.frame` with variables `effect`, `term`, `term_label`, `level`,
#'   `n`, `hr`, `lcl`, `ucl`, and `pval`.
#'
#' @examples
#' library(survival)
#'
#' dta_simple <- data.frame(
#'   time = c(5, 5, 10, 10, 5, 5, 10, 10),
#'   status = c(0, 0, 1, 0, 0, 1, 1, 1),
#'   armcd = factor(LETTERS[c(1, 1, 1, 1, 2, 2, 2, 2)], levels = c("A", "B")),
#'   var1 = c(45, 55, 65, 75, 55, 65, 85, 75),
#'   var2 = c("F", "M", "F", "M", "F", "M", "F", "U")
#' )
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_univar_extract(
#'   effect = "armcd", covar = "armcd", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_univar_extract <- function(effect,
                                    covar,
                                    data,
                                    mod,
                                    control = control_coxreg()) {
  checkmate::assert_string(covar)
  checkmate::assert_string(effect)
  checkmate::assert_class(mod, "coxph")
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]

  mod_aov <- muffled_car_anova(mod, test_statistic)
  msum <- summary(mod, conf.int = control$conf_level)
  sum_cox <- broom::tidy(msum)

  # Combine results together.
  effect_aov <- mod_aov[effect, , drop = TRUE]
  pval <- effect_aov[[grep(pattern = "Pr", x = names(effect_aov)), drop = TRUE]]
  sum_main <- sum_cox[grepl(effect, sum_cox$level), ]

  term_label <- if (effect == covar) {
    paste0(
      levels(data[[covar]])[2],
      " vs control (",
      levels(data[[covar]])[1],
      ")"
    )
  } else {
    unname(labels_or_names(data[covar]))
  }
  data.frame(
    effect = ifelse(covar == effect, "Treatment:", "Covariate:"),
    term = covar,
    term_label = term_label,
    level = levels(data[[effect]])[2],
    n = mod[["n"]],
    hr = unname(sum_main["exp(coef)"]),
    lcl = unname(sum_main[grep("lower", names(sum_main))]),
    ucl = unname(sum_main[grep("upper", names(sum_main))]),
    pval = pval,
    stringsAsFactors = FALSE
  )
}

#' @describeIn h_cox_regression Tabulation of multivariate Cox regressions. Utility function to help
#'   tabulate the result of a multivariate Cox regression model for a treatment/covariate variable.
#'
#' @return
#' * `h_coxreg_multivar_extract()` returns a `data.frame` with variables `pval`, `hr`, `lcl`, `ucl`, `level`,
#'   `n`, `term`, and `term_label`.
#'
#' @examples
#' mod <- coxph(Surv(time, status) ~ armcd + var1, data = dta_simple)
#' result <- h_coxreg_multivar_extract(
#'   var = "var1", mod = mod, data = dta_simple
#' )
#' result
#'
#' @export
h_coxreg_multivar_extract <- function(var,
                                      data,
                                      mod,
                                      control = control_coxreg()) {
  test_statistic <- c(wald = "Wald", likelihood = "LR")[control$pval_method]
  mod_aov <- muffled_car_anova(mod, test_statistic)

  msum <- summary(mod, conf.int = control$conf_level)
  sum_anova <- broom::tidy(mod_aov)
  sum_cox <- broom::tidy(msum)

  ret_anova <- sum_anova[sum_anova$term == var, c("term", "p.value")]
  names(ret_anova)[2] <- "pval"
  if (is.factor(data[[var]])) {
    ret_cox <- sum_cox[startsWith(prefix = var, x = sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  } else {
    ret_cox <- sum_cox[(var == sum_cox$level), !(names(sum_cox) %in% "exp(-coef)")]
  }
  names(ret_cox)[1:4] <- c("pval", "hr", "lcl", "ucl")
  varlab <- unname(labels_or_names(data[var]))
  ret_cox$term <- varlab

  if (is.numeric(data[[var]])) {
    ret <- ret_cox
    ret$term_label <- ret$term
  } else if (length(levels(data[[var]])) <= 2) {
    ret_anova$pval <- NA
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  } else {
    ret_anova$term_label <- paste0(varlab, " (reference = ", levels(data[[var]])[1], ")")
    ret_cox$level <- gsub(var, "", ret_cox$level)
    ret_cox$term_label <- ret_cox$level
    ret <- dplyr::bind_rows(ret_anova, ret_cox)
  }

  as.data.frame(ret)
}

#' Add Titles, Footnotes, Page Number, and a Bounding Box to a Grid Grob
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This function is useful to label grid grobs (also `ggplot2`, and `lattice` plots)
#' with title, footnote, and page numbers.
#'
#' @inheritParams grid::grob
#' @param grob a grid grob object, optionally `NULL` if only a `grob` with the decoration should be shown.
#' @param titles vector of character strings. Vector elements are separated by a newline and strings are wrapped
#'   according to the page width.
#' @param footnotes vector of character string. Same rules as for `titles`.
#' @param page string with page numeration, if `NULL` then no page number is displayed.
#' @param width_titles unit object
#' @param width_footnotes unit object
#' @param border boolean, whether a a border should be drawn around the plot or not.
#' @param margins unit object of length 4
#' @param padding  unit object of length 4
#' @param outer_margins  unit object of length 4
#' @param gp_titles a `gpar` object
#' @param gp_footnotes a `gpar` object
#'
#' @return A grid grob (`gTree`).
#'
#' @details The titles and footnotes will be ragged, i.e. each title will be wrapped individually.
#'
#' @examples
#' library(grid)
#'
#' titles <- c(
#'   "Edgar Anderson's Iris Data",
#'   paste(
#'     "This famous (Fisher's or Anderson's) iris data set gives the measurements",
#'     "in centimeters of the variables sepal length and width and petal length",
#'     "and width, respectively, for 50 flowers from each of 3 species of iris."
#'   )
#' )
#'
#' footnotes <- c(
#'   "The species are Iris setosa, versicolor, and virginica.",
#'   paste(
#'     "iris is a data frame with 150 cases (rows) and 5 variables (columns) named",
#'     "Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, and Species."
#'   )
#' )
#'
#' ## empty plot
#' grid.newpage()
#'
#' grid.draw(
#'   decorate_grob(
#'     NULL,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 4 of 10"
#'   )
#' )
#'
#' # grid
#' p <- gTree(
#'   children = gList(
#'     rectGrob(),
#'     xaxisGrob(),
#'     yaxisGrob(),
#'     textGrob("Sepal.Length", y = unit(-4, "lines")),
#'     textGrob("Petal.Length", x = unit(-3.5, "lines"), rot = 90),
#'     pointsGrob(iris$Sepal.Length, iris$Petal.Length, gp = gpar(col = iris$Species), pch = 16)
#'   ),
#'   vp = vpStack(plotViewport(), dataViewport(xData = iris$Sepal.Length, yData = iris$Petal.Length))
#' )
#' grid.newpage()
#' grid.draw(p)
#'
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with ggplot2
#' library(ggplot2)
#'
#' p_gg <- ggplot2::ggplot(iris, aes(Sepal.Length, Sepal.Width, col = Species)) +
#'   ggplot2::geom_point()
#' p_gg
#' p <- ggplotGrob(p_gg)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' ## with lattice
#' library(lattice)
#'
#' xyplot(Sepal.Length ~ Petal.Length, data = iris, col = iris$Species)
#' p <- grid.grab()
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     grob = p,
#'     titles = titles,
#'     footnotes = footnotes,
#'     page = "Page 6 of 129"
#'   )
#' )
#'
#' # with gridExtra - no borders
#' library(gridExtra)
#' grid.newpage()
#' grid.draw(
#'   decorate_grob(
#'     tableGrob(
#'       head(mtcars)
#'     ),
#'     titles = "title",
#'     footnotes = "footnote",
#'     border = FALSE
#'   )
#' )
#'
#' @export
decorate_grob <- function(grob,
                          titles,
                          footnotes,
                          page = "",
                          width_titles = grid::unit(1, "npc") - grid::unit(1.5, "cm"),
                          width_footnotes = grid::unit(1, "npc") - grid::unit(1.5, "cm"),
                          border = TRUE,
                          margins = grid::unit(c(1, 0, 1, 0), "lines"),
                          padding = grid::unit(rep(1, 4), "lines"),
                          outer_margins = grid::unit(c(2, 1.5, 3, 1.5), "cm"),
                          gp_titles = grid::gpar(),
                          gp_footnotes = grid::gpar(fontsize = 8),
                          name = NULL,
                          gp = grid::gpar(),
                          vp = NULL) {
  st_titles <- split_text_grob(
    titles,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_titles,
    vp = grid::viewport(layout.pos.row = 1, layout.pos.col = 1),
    gp = gp_titles
  )

  st_footnotes <- split_text_grob(
    footnotes,
    x = 0, y = 1,
    just = c("left", "top"),
    width = width_footnotes,
    vp = grid::viewport(layout.pos.row = 3, layout.pos.col = 1),
    gp = gp_footnotes
  )

  pg_footnote <- grid::textGrob(
    paste("\n", page),
    x = 1, y = 0,
    just = c("right", "bottom"),
    vp = grid::viewport(layout.pos.row = 4, layout.pos.col = 1),
    gp = gp_footnotes
  )

  grid::gTree(
    grob = grob,
    titles = titles,
    footnotes = footnotes,
    page = page,
    width_titles = width_titles,
    width_footnotes = width_footnotes,
    border = border,
    margins = margins,
    padding = padding,
    outer_margins = outer_margins,
    gp_titles = gp_titles,
    gp_footnotes = gp_footnotes,
    children = grid::gList(
      grid::gTree(
        children = grid::gList(
          st_titles,
          grid::gTree(
            children = grid::gList(
              if (border) grid::rectGrob(),
              grid::gTree(
                children = grid::gList(
                  grob
                ),
                vp = grid::plotViewport(margins = padding)
              )
            ),
            vp = grid::vpStack(
              grid::viewport(layout.pos.row = 2, layout.pos.col = 1),
              grid::plotViewport(margins = margins)
            )
          ),
          st_footnotes,
          pg_footnote
        ),
        childrenvp = NULL,
        name = "titles_grob_footnotes",
        vp = grid::vpStack(
          grid::plotViewport(margins = outer_margins),
          grid::viewport(
            layout = grid::grid.layout(
              nrow = 4, ncol = 1,
              heights = grid::unit.c(
                grid::grobHeight(st_titles),
                grid::unit(1, "null"),
                grid::grobHeight(st_footnotes),
                grid::grobHeight(pg_footnote)
              )
            )
          )
        )
      )
    ),
    name = name,
    gp = gp,
    vp = vp,
    cl = "decoratedGrob"
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.decoratedGrob <- function(x) {
  checkmate::assert_character(x$titles)
  checkmate::assert_character(x$footnotes)

  if (!is.null(x$grob)) {
    checkmate::assert_true(grid::is.grob(x$grob))
  }
  if (length(x$page) == 1) {
    checkmate::assert_character(x$page)
  }
  if (!grid::is.unit(x$outer_margins)) {
    checkmate::assert_vector(x$outer_margins, len = 4)
  }
  if (!grid::is.unit(x$margins)) {
    checkmate::assert_vector(x$margins, len = 4)
  }
  if (!grid::is.unit(x$padding)) {
    checkmate::assert_vector(x$padding, len = 4)
  }

  x
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.decoratedGrob <- function(x) {
  grid::unit(1, "null")
}

# Adapted from Paul Murell R Graphics 2nd Edition
# https://www.stat.auckland.ac.nz/~paul/RG2e/interactgrid-splittext.R
split_string <- function(text, width) {
  strings <- strsplit(text, " ")
  out_string <- NA
  for (string_i in seq_along(strings)) {
    newline_str <- strings[[string_i]]
    if (length(newline_str) == 0) newline_str <- ""
    if (is.na(out_string[string_i])) {
      out_string[string_i] <- newline_str[[1]][[1]]
      linewidth <- grid::stringWidth(out_string[string_i])
    }
    gapwidth <- grid::stringWidth(" ")
    availwidth <- as.numeric(width)
    if (length(newline_str) > 1) {
      for (i in seq(2, length(newline_str))) {
        width_i <- grid::stringWidth(newline_str[i])
        if (grid::convertWidth(linewidth + gapwidth + width_i, grid::unitType(width), valueOnly = TRUE) < availwidth) {
          sep <- " "
          linewidth <- linewidth + gapwidth + width_i
        } else {
          sep <- "\n"
          linewidth <- width_i
        }
        out_string[string_i] <- paste(out_string[string_i], newline_str[i], sep = sep)
      }
    }
  }
  paste(out_string, collapse = "\n")
}

#' Split Text According To Available Text Width
#'
#' Dynamically wrap text.
#'
#' @inheritParams grid::grid.text
#' @param text character string
#' @param width a unit object specifying max width of text
#'
#' @return A text grob.
#'
#' @details This code is taken from `R Graphics by Paul Murell, 2nd edition`
#'
#' @keywords internal
split_text_grob <- function(text,
                            x = grid::unit(0.5, "npc"),
                            y = grid::unit(0.5, "npc"),
                            width = grid::unit(1, "npc"),
                            just = "centre",
                            hjust = NULL,
                            vjust = NULL,
                            default.units = "npc", # nolint
                            name = NULL,
                            gp = grid::gpar(),
                            vp = NULL) {
  if (!grid::is.unit(x)) x <- grid::unit(x, default.units)
  if (!grid::is.unit(y)) y <- grid::unit(y, default.units)
  if (!grid::is.unit(width)) width <- grid::unit(width, default.units)
  if (grid::unitType(x) %in% c("sum", "min", "max")) x <- grid::convertUnit(x, default.units)
  if (grid::unitType(y) %in% c("sum", "min", "max")) y <- grid::convertUnit(y, default.units)
  if (grid::unitType(width) %in% c("sum", "min", "max")) width <- grid::convertUnit(width, default.units)

  if (length(gp) > 0) { # account for effect of gp on text width
    width <- width * grid::convertWidth(grid::grobWidth(grid::textGrob(text)), "npc", valueOnly = TRUE) /
      grid::convertWidth(grid::grobWidth(grid::textGrob(text, gp = gp)), "npc", valueOnly = TRUE)
  }

  ## if it is a fixed unit then we do not need to recalculate when viewport resized
  if (!inherits(width, "unit.arithmetic") && !is.null(attr(width, "unit")) &&
    attr(width, "unit") %in% c("cm", "inches", "mm", "points", "picas", "bigpts", "dida", "cicero", "scaledpts")) { # nolint
    attr(text, "fixed_text") <- paste(vapply(text, split_string, character(1), width = width), collapse = "\n")
  }

  grid::grid.text(
    label = split_string(text, width),
    x = x, y = y,
    just = just,
    hjust = hjust,
    vjust = vjust,
    rot = 0,
    check.overlap = FALSE,
    name = name,
    gp = gp,
    vp = vp,
    draw = FALSE
  )
}

#' @importFrom grid validDetails
#' @noRd
validDetails.dynamicSplitText <- function(x) {
  checkmate::assert_character(x$text)
  checkmate::assert_true(grid::is.unit(x$width))
  checkmate::assert_vector(x$width, len = 1)
  x
}

#' @importFrom grid heightDetails
#' @noRd
heightDetails.dynamicSplitText <- function(x) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }
  grid::stringHeight(txt)
}

#' @importFrom grid widthDetails
#' @noRd
widthDetails.dynamicSplitText <- function(x) {
  x$width
}

#' @importFrom grid drawDetails
#' @noRd
drawDetails.dynamicSplitText <- function(x, recording) {
  txt <- if (!is.null(attr(x$text, "fixed_text"))) {
    attr(x$text, "fixed_text")
  } else {
    paste(vapply(x$text, split_string, character(1), width = x$width), collapse = "\n")
  }

  x$width <- NULL
  x$label <- txt
  x$text <- NULL
  class(x) <- c("text", class(x)[-1])

  grid::grid.draw(x)
}

#' Update Page Number
#'
#' Automatically updates page number.
#'
#' @param npages number of pages in total
#' @param ... passed on to [decorate_grob()]
#'
#' @return Closure that increments the page number.
#'
#' @keywords internal
decorate_grob_factory <- function(npages, ...) {
  current_page <- 0
  function(grob) {
    current_page <<- current_page + 1
    if (current_page > npages) {
      stop(paste("current page is", current_page, "but max.", npages, "specified."))
    }
    decorate_grob(grob = grob, page = paste("Page", current_page, "of", npages), ...)
  }
}

#' Decorate Set of `grobs` and Add Page Numbering
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Note that this uses the [decorate_grob_factory()] function.
#'
#' @param grobs a list of grid grobs
#' @param ... arguments passed on to [decorate_grob()].
#'
#' @return A decorated grob.
#'
#' @examples
#' library(ggplot2)
#' library(grid)
#' g <- with(data = iris, {
#'   list(
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Sepal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Length, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Sepal.Width, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     ),
#'     ggplot2::ggplotGrob(
#'       ggplot2::ggplot(mapping = aes(Petal.Length, Petal.Width, col = Species)) +
#'         ggplot2::geom_point()
#'     )
#'   )
#' })
#' lg <- decorate_grob_set(grobs = g, titles = "Hello\nOne\nTwo\nThree", footnotes = "")
#'
#' draw_grob(lg[[1]])
#' draw_grob(lg[[2]])
#' draw_grob(lg[[6]])
#'
#' @export
decorate_grob_set <- function(grobs, ...) {
  n <- length(grobs)
  lgf <- decorate_grob_factory(npages = n, ...)
  lapply(grobs, lgf)
}

#' Tabulate Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' across population subgroups.
#'
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#'   [extract_survival_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#'   * `n_tot_events`: Total number of events per group.
#'   * `n_tot`: Total number of observations per group.
#'   * `median`: Median survival time.
#'   * `hr`: Hazard ratio.
#'   * `ci`: Confidence interval of hazard ratio.
#'   * `pval`: p-value of the effect.
#'   Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci` are required.
#'
#' @details These functions create a layout starting from a data frame which contains
#'   the required statistics. The tables are then typically used as input for forest plots.
#'
#' @examples
#' library(dplyr)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in multiple regression models containing one covariate `RACE`,
#' # as well as one stratification variable `STRATA1`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#'
#' df <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually.
#' df_grouped <- extract_survival_biomarkers(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     strata = "STRATA1",
#'     covariates = "SEX",
#'     subgroups = "BMRKR2"
#'   ),
#'   data = adtte_f,
#'   groups_lists = list(
#'     BMRKR2 = list(
#'       "low" = "LOW",
#'       "low/medium" = c("LOW", "MEDIUM"),
#'       "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#'     )
#'   )
#' )
#' df_grouped
#'
#' @name survival_biomarkers_subgroups
#' @order 1
NULL

#' Prepares Survival Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of events, patients and median survival times, as well as hazard ratio estimates,
#' confidence intervals and p-values, for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a named `list` and requires elements
#' `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables), and optionally `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#' @inheritParams survival_duration_subgroups
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_tot_events`,
#'   `median`, `hr`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#'   `var_label`, and `row_type`.
#'
#' @seealso [h_coxreg_mult_cont_df()] which is used internally, [tabulate_survival_biomarkers()].
#'
#' @export
extract_survival_biomarkers <- function(variables,
                                        data,
                                        groups_lists = list(),
                                        control = control_coxreg(),
                                        label_all = "All Patients") {
  checkmate::assert_list(variables)
  checkmate::assert_character(variables$subgroups, null.ok = TRUE)
  checkmate::assert_string(label_all)

  # Start with all patients.
  result_all <- h_coxreg_mult_cont_df(
    variables = variables,
    data = data,
    control = control
  )
  result_all$subgroup <- label_all
  result_all$var <- "ALL"
  result_all$var_label <- label_all
  result_all$row_type <- "content"
  if (is.null(variables$subgroups)) {
    # Only return result for all patients.
    result_all
  } else {
    # Add subgroups results.
    l_data <- h_split_by_subgroups(
      data,
      variables$subgroups,
      groups_lists = groups_lists
    )
    l_result <- lapply(l_data, function(grp) {
      result <- h_coxreg_mult_cont_df(
        variables = variables,
        data = grp$df,
        control = control
      )
      result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
      cbind(result, result_labels)
    })
    result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
    result_subgroups$row_type <- "analysis"
    rbind(
      result_all,
      result_subgroups
    )
  }
}

#' @describeIn survival_biomarkers_subgroups Table-creating function which creates a table
#'   summarizing biomarker effects on survival by subgroup.
#'
#' @return An `rtables` table summarizing biomarker effects on survival by subgroup.
#'
#' @note In contrast to [tabulate_survival_subgroups()] this tabulation function does
#'   not start from an input layout `lyt`. This is because internally the table is
#'   created by combining multiple subtables.
#'
#' @seealso [h_tab_surv_one_biomarker()] which is used internally, [extract_survival_biomarkers()].
#'
#' @examples
#' ## Table with default columns.
#' tabulate_survival_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_survival_biomarkers(
#'   df = df,
#'   vars = c("n_tot_events", "ci", "n_tot", "median", "hr"),
#'   time_unit = as.character(adtte_f$AVALU[1])
#' )
#'
#' ## Finally produce the forest plot.
#' \donttest{
#' g_forest(tab, xlim = c(0.8, 1.2))
#' }
#'
#' @export
#' @order 2
tabulate_survival_biomarkers <- function(df,
                                         vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
                                         groups_lists = list(),
                                         control = control_coxreg(),
                                         label_all = "All Patients",
                                         time_unit = NULL,
                                         na_str = default_na_str(),
                                         .indent_mods = 0L) {
  checkmate::assert_data_frame(df)
  checkmate::assert_character(df$biomarker)
  checkmate::assert_character(df$biomarker_label)
  checkmate::assert_subset(vars, get_stats("tabulate_survival_biomarkers"))

  extra_args <- list(groups_lists = groups_lists, control = control, label_all = label_all)

  df_subs <- split(df, f = df$biomarker)
  tabs <- lapply(df_subs, FUN = function(df_sub) {
    tab_sub <- h_tab_surv_one_biomarker(
      df = df_sub,
      vars = vars,
      time_unit = time_unit,
      na_str = na_str,
      .indent_mods = .indent_mods,
      extra_args = extra_args
    )
    # Insert label row as first row in table.
    label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
    tab_sub
  })
  result <- do.call(rbind, tabs)

  n_tot_ids <- grep("^n_tot", vars)
  hr_id <- match("hr", vars)
  ci_id <- match("ci", vars)
  structure(
    result,
    forest_header = paste0(c("Higher", "Lower"), "\nBetter"),
    col_x = hr_id,
    col_ci = ci_id,
    col_symbol_size = n_tot_ids[1]
  )
}

#' Convert `rtable` object to `ggplot` object
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Given a [rtables::rtable()] object, performs basic conversion to a [ggplot2::ggplot()] object built using
#' functions from the `ggplot2` package. Any table titles and/or footnotes are ignored.
#'
#' @param tbl (`rtable`)\cr a `rtable` object.
#' @param fontsize (`numeric`)\cr font size.
#' @param colwidths (`vector` of `numeric`)\cr a vector of column widths. Each element's position in
#'   `colwidths` corresponds to the column of `tbl` in the same position. If `NULL`, column widths
#'   are calculated according to maximum number of characters per column.
#' @param lbl_col_padding (`numeric`)\cr additional padding to use when calculating spacing between
#'   the first (label) column and the second column of `tbl`. If `colwidths` is specified,
#'   the width of the first column becomes `colwidths[1] + lbl_col_padding`. Defaults to 0.
#'
#' @return a `ggplot` object.
#'
#' @examples
#' dta <- data.frame(
#'   ARM     = rep(LETTERS[1:3], rep(6, 3)),
#'   AVISIT  = rep(paste0("V", 1:3), 6),
#'   AVAL    = c(9:1, rep(NA, 9))
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by(var = "ARM") %>%
#'   split_rows_by(var = "AVISIT") %>%
#'   analyze_vars(vars = "AVAL")
#'
#' tbl <- build_table(lyt, df = dta)
#'
#' rtable2gg(tbl)
#'
#' rtable2gg(tbl, fontsize = 5, colwidths = c(2, 1, 1, 1))
#'
#' @export
rtable2gg <- function(tbl, fontsize = 4, colwidths = NULL, lbl_col_padding = 0) {
  mat <- rtables::matrix_form(tbl)
  mat_strings <- formatters::mf_strings(mat)
  mat_aligns <- formatters::mf_aligns(mat)
  mat_indent <- formatters::mf_rinfo(mat)$indent
  mat_display <- formatters::mf_display(mat)
  nlines_hdr <- formatters::mf_nlheader(mat)
  shared_hdr_rows <- which(apply(mat_display, 1, function(x) (any(!x))))

  tbl_df <- data.frame(mat_strings)
  body_rows <- seq(nlines_hdr + 1, nrow(tbl_df))
  mat_aligns <- apply(mat_aligns, 1:2, function(x) if (x == "left") 0 else if (x == "right") 1 else 0.5)

  # Apply indentation in first column
  tbl_df[body_rows, 1] <- sapply(body_rows, function(i) {
    ind_i <- mat_indent[i - nlines_hdr] * 4
    if (ind_i > 0) paste0(paste(rep(" ", ind_i), collapse = ""), tbl_df[i, 1]) else tbl_df[i, 1]
  })

  # Get column widths
  if (is.null(colwidths)) {
    colwidths <- apply(tbl_df, 2, function(x) max(nchar(x))) + 1
  }
  tot_width <- sum(colwidths) + lbl_col_padding

  if (length(shared_hdr_rows) > 0) {
    tbl_df <- tbl_df[-shared_hdr_rows, ]
    mat_aligns <- mat_aligns[-shared_hdr_rows, ]
  }

  res <- ggplot(data = tbl_df) +
    theme_void() +
    scale_x_continuous(limits = c(0, tot_width)) +
    scale_y_continuous(limits = c(0, nrow(mat_strings))) +
    geom_segment(aes(
      x = 0, xend = tot_width,
      y = nrow(mat_strings) - nlines_hdr + 0.5, yend = nrow(mat_strings) - nlines_hdr + 0.5
    ))

  # If header content spans multiple columns, center over these columns
  if (length(shared_hdr_rows) > 0) {
    mat_strings[shared_hdr_rows, ] <- trimws(mat_strings[shared_hdr_rows, ])
    for (hr in shared_hdr_rows) {
      hdr_lbls <- mat_strings[1:hr, mat_display[hr, -1]]
      hdr_lbls <- matrix(hdr_lbls[nzchar(hdr_lbls)], nrow = hr)
      for (idx_hl in seq_len(ncol(hdr_lbls))) {
        cur_lbl <- tail(hdr_lbls[, idx_hl], 1)
        which_cols <- if (hr == 1) {
          which(mat_strings[hr, ] == hdr_lbls[idx_hl])
        } else { # for >2 col splits, only print labels for each unique combo of nested columns
          which(
            apply(mat_strings[1:hr, ], 2, function(x) all(x == hdr_lbls[1:hr, idx_hl]))
          )
        }
        line_pos <- c(
          sum(colwidths[1:(which_cols[1] - 1)]) + 1 + lbl_col_padding,
          sum(colwidths[1:max(which_cols)]) - 1 + lbl_col_padding
        )

        res <- res +
          geom_text(
            x = mean(line_pos),
            y = nrow(mat_strings) + 1 - hr,
            label = cur_lbl,
            size = fontsize
          ) +
          geom_segment(
            x = line_pos[1],
            xend = line_pos[2],
            y = nrow(mat_strings) - hr + 0.5,
            yend = nrow(mat_strings) - hr + 0.5
          )
      }
    }
  }

  # Add table columns
  for (i in seq_len(ncol(tbl_df))) {
    res <- res + geom_text(
      x = if (i == 1) 0 else sum(colwidths[1:i]) - 0.5 * colwidths[i] + lbl_col_padding,
      y = rev(seq_len(nrow(tbl_df))),
      label = tbl_df[, i],
      hjust = mat_aligns[, i],
      size = fontsize
    )
  }

  res
}

#' Summary for analysis of covariance (`ANCOVA`).
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize results of `ANCOVA`. This can be used to analyze multiple endpoints and/or
#' multiple timepoints within the same response variable `.var`.
#'
#' @inheritParams h_ancova
#' @inheritParams argument_convention
#' @param interaction_y (`character`)\cr a selected item inside of the interaction_item column which will be used
#'   to select the specific `ANCOVA` results. if the interaction is not needed, the default option is `FALSE`.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("summarize_ancova")`
#'   to see available statistics for this function.
#'
#' @name summarize_ancova
#' @order 1
NULL

#' Helper Function to Return Results of a Linear Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param .df_row (`data.frame`)\cr data set that includes all the variables that are called in `.var` and `variables`.
#' @param variables (named `list` of `strings`)\cr list of additional analysis variables, with expected elements:
#'   * `arm` (`string`)\cr group variable, for which the covariate adjusted means of multiple groups will be
#'     summarized. Specifically, the first level of `arm` variable is taken as the reference group.
#'   * `covariates` (`character`)\cr a vector that can contain single variable names (such as `"X1"`), and/or
#'     interaction terms indicated by `"X1 * X2"`.
#' @param interaction_item (`character`)\cr name of the variable that should have interactions
#'   with arm. if the interaction is not needed, the default option is `NULL`.
#'
#' @return The summary of a linear model.
#'
#' @examples
#' h_ancova(
#'   .var = "Sepal.Length",
#'   .df_row = iris,
#'   variables = list(arm = "Species", covariates = c("Petal.Length * Petal.Width", "Sepal.Width"))
#' )
#'
#' @export
h_ancova <- function(.var,
                     .df_row,
                     variables,
                     interaction_item = NULL) {
  checkmate::assert_string(.var)
  checkmate::assert_list(variables)
  checkmate::assert_subset(names(variables), c("arm", "covariates"))
  assert_df_with_variables(.df_row, list(rsp = .var))

  arm <- variables$arm
  covariates <- variables$covariates
  if (!is.null(covariates) && length(covariates) > 0) {
    # Get all covariate variable names in the model.
    var_list <- get_covariates(covariates)
    assert_df_with_variables(.df_row, var_list)
  }

  covariates_part <- paste(covariates, collapse = " + ")
  if (covariates_part != "") {
    formula <- stats::as.formula(paste0(.var, " ~ ", covariates_part, " + ", arm))
  } else {
    formula <- stats::as.formula(paste0(.var, " ~ ", arm))
  }

  if (is.null(interaction_item)) {
    specs <- arm
  } else {
    specs <- c(arm, interaction_item)
  }

  lm_fit <- stats::lm(
    formula = formula,
    data = .df_row
  )
  emmeans_fit <- emmeans::emmeans(
    lm_fit,
    # Specify here the group variable over which EMM are desired.
    specs = specs,
    # Pass the data again so that the factor levels of the arm variable can be inferred.
    data = .df_row
  )

  emmeans_fit
}

#' @describeIn summarize_ancova Statistics function that produces a named list of results
#'   of the investigated linear model.
#'
#' @return
#' * `s_ancova()` returns a named list of 5 statistics:
#'   * `n`: Count of complete sample size for the group.
#'   * `lsmean`: Estimated marginal means in the group.
#'   * `lsmean_diff`: Difference in estimated marginal means in comparison to the reference group.
#'     If working with the reference group, this will be empty.
#'   * `lsmean_diff_ci`: Confidence level for difference in estimated marginal means in comparison
#'     to the reference group.
#'   * `pval`: p-value (not adjusted for multiple comparisons).
#'
#' @keywords internal
s_ancova <- function(df,
                     .var,
                     .df_row,
                     variables,
                     .ref_group,
                     .in_ref_col,
                     conf_level,
                     interaction_y = FALSE,
                     interaction_item = NULL) {
  emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row, interaction_item = interaction_item)

  sum_fit <- summary(
    emmeans_fit,
    level = conf_level
  )

  arm <- variables$arm

  sum_level <- as.character(unique(df[[arm]]))

  # Ensure that there is only one element in sum_level.
  checkmate::assert_scalar(sum_level)

  sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]

  # Get the index of the ref arm
  if (interaction_y != FALSE) {
    y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
    # convert characters selected in interaction_y into the numeric order
    interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
    sum_fit_level <- sum_fit_level[interaction_y, ]
    # if interaction is called, reset the index
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
    ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
  } else {
    y <- df[[.var]]
    # Get the index of the ref arm when interaction is not called
    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
    ref_key <- tail(ref_key, n = 1)
  }

  if (.in_ref_col) {
    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(character(), "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(character(), f_conf_level(conf_level)),
      pval = formatters::with_label(character(), "p-value")
    )
  } else {
    # Estimate the differences between the marginal means.
    emmeans_contrasts <- emmeans::contrast(
      emmeans_fit,
      # Compare all arms versus the control arm.
      method = "trt.vs.ctrl",
      # Take the arm factor from .ref_group as the control arm.
      ref = ref_key,
      level = conf_level
    )
    sum_contrasts <- summary(
      emmeans_contrasts,
      # Derive confidence intervals, t-tests and p-values.
      infer = TRUE,
      # Do not adjust the p-values for multiplicity.
      adjust = "none"
    )

    sum_contrasts_level <- sum_contrasts[grepl(sum_level, sum_contrasts$contrast), ]
    if (interaction_y != FALSE) {
      sum_contrasts_level <- sum_contrasts_level[interaction_y, ]
    }

    list(
      n = length(y[!is.na(y)]),
      lsmean = formatters::with_label(sum_fit_level$emmean, "Adjusted Mean"),
      lsmean_diff = formatters::with_label(sum_contrasts_level$estimate, "Difference in Adjusted Means"),
      lsmean_diff_ci = formatters::with_label(
        c(sum_contrasts_level$lower.CL, sum_contrasts_level$upper.CL),
        f_conf_level(conf_level)
      ),
      pval = formatters::with_label(sum_contrasts_level$p.value, "p-value")
    )
  }
}

#' @describeIn summarize_ancova Formatted analysis function which is used as `afun` in `summarize_ancova()`.
#'
#' @return
#' * `a_ancova()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_ancova <- make_afun(
  s_ancova,
  .indent_mods = c("n" = 0L, "lsmean" = 0L, "lsmean_diff" = 0L, "lsmean_diff_ci" = 1L, "pval" = 1L),
  .formats = c(
    "n" = "xx",
    "lsmean" = "xx.xx",
    "lsmean_diff" = "xx.xx",
    "lsmean_diff_ci" = "(xx.xx, xx.xx)",
    "pval" = "x.xxxx | (<0.0001)"
  ),
  .null_ref_cells = FALSE
)

#' @describeIn summarize_ancova Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_ancova()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_ancova()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("Species", ref_group = "setosa") %>%
#'   add_colcounts() %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = NULL),
#'     table_names = "unadj",
#'     conf_level = 0.95, var_labels = "Unadjusted comparison",
#'     .labels = c(lsmean = "Mean", lsmean_diff = "Difference in Means")
#'   ) %>%
#'   summarize_ancova(
#'     vars = "Petal.Length",
#'     variables = list(arm = "Species", covariates = c("Sepal.Length", "Sepal.Width")),
#'     table_names = "adj",
#'     conf_level = 0.95, var_labels = "Adjusted comparison (covariates: Sepal.Length and Sepal.Width)"
#'   ) %>%
#'   build_table(iris)
#'
#' @export
#' @order 2
summarize_ancova <- function(lyt,
                             vars,
                             variables,
                             conf_level,
                             interaction_y = FALSE,
                             interaction_item = NULL,
                             var_labels,
                             na_str = default_na_str(),
                             nested = TRUE,
                             ...,
                             show_labels = "visible",
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  extra_args <- list(
    variables = variables, conf_level = conf_level, interaction_y = interaction_y,
    interaction_item = interaction_item, ...
  )

  afun <- make_afun(
    a_ancova,
    interaction_y = interaction_y,
    interaction_item = interaction_item,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    var_labels = var_labels,
    show_labels = show_labels,
    table_names = table_names,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Occurrence Counts by Grade
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions for analyzing frequencies and fractions of occurrences by grade for patients
#' with occurrence data. Multiple occurrences within one individual are counted once at the
#' greatest intensity/highest grade level.
#'
#' @inheritParams argument_convention
#' @param grade_groups (named `list` of `character`)\cr containing groupings of grades.
#' @param remove_single (`logical`)\cr `TRUE` to not include the elements of one-element grade groups
#'   in the the output list; in this case only the grade groups names will be included in the output.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("count_occurrences_by_grade")`
#'   to see available statistics for this function.
#'
#' @seealso Relevant helper function [h_append_grade_groups()].
#'
#' @name count_occurrences_by_grade
#' @order 1
NULL

#' Helper function for [s_count_occurrences_by_grade()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function for [s_count_occurrences_by_grade()] to insert grade groupings into list with
#' individual grade frequencies. The order of the final result follows the order of `grade_groups`.
#' The elements under any-grade group (if any), i.e. the grade group equal to `refs` will be moved to
#' the end. Grade groups names must be unique.
#'
#' @inheritParams count_occurrences_by_grade
#' @param refs (named `list` of `numeric`)\cr where each name corresponds to a reference grade level
#'   and each entry represents a count.
#'
#' @return Formatted list of grade groupings.
#'
#' @examples
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(5:1),
#'     "Grade A" = "5",
#'     "Grade B" = c("4", "3")
#'   ),
#'   list("1" = 10, "2" = 20, "3" = 30, "4" = 40, "5" = 50)
#' )
#'
#' h_append_grade_groups(
#'   list(
#'     "Any Grade" = as.character(1:5),
#'     "Grade 1-2" = c("1", "2"),
#'     "Grade 3-4" = c("3", "4")
#'   ),
#'   list("1" = 10, "2" = 5, "3" = 0)
#' )
#'
#' @export
h_append_grade_groups <- function(grade_groups, refs, remove_single = TRUE) {
  checkmate::assert_list(grade_groups)
  checkmate::assert_list(refs)
  refs_orig <- refs
  elements <- unique(unlist(grade_groups))

  ### compute sums in groups
  grp_sum <- lapply(grade_groups, function(i) do.call(sum, refs[i]))
  if (!checkmate::test_subset(elements, names(refs))) {
    padding_el <- setdiff(elements, names(refs))
    refs[padding_el] <- 0
  }
  result <- c(grp_sum, refs)

  ### order result while keeping grade_groups's ordering
  ordr <- grade_groups

  # elements of any-grade group (if any) will be moved to the end
  is_any <- sapply(grade_groups, setequal, y = names(refs))
  ordr[is_any] <- list(character(0)) # hide elements under any-grade group

  # groups-elements combined sequence
  ordr <- c(lapply(names(ordr), function(g) c(g, ordr[[g]])), recursive = TRUE, use.names = FALSE)
  ordr <- ordr[!duplicated(ordr)]

  # append remaining elements (if any)
  ordr <- union(ordr, unlist(grade_groups[is_any])) # from any-grade group
  ordr <- union(ordr, names(refs)) # from refs

  # remove elements of single-element groups, if any
  if (remove_single) {
    is_single <- sapply(grade_groups, length) == 1L
    ordr <- setdiff(ordr, unlist(grade_groups[is_single]))
  }

  # apply the order
  result <- result[ordr]

  # remove groups without any elements in the original refs
  # note: it's OK if groups have 0 value
  keep_grp <- vapply(grade_groups, function(x, rf) {
    any(x %in% rf)
  }, rf = names(refs_orig), logical(1))

  keep_el <- names(result) %in% names(refs_orig) | names(result) %in% names(keep_grp)[keep_grp]
  result <- result[keep_el]

  result
}

#' @describeIn count_occurrences_by_grade Statistics function which counts the
#'  number of patients by highest grade.
#'
#' @return
#' * `s_count_occurrences_by_grade()` returns a list of counts and fractions with one element per grade level or
#'   grade level grouping.
#'
#' @examples
#' s_count_occurrences_by_grade(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
s_count_occurrences_by_grade <- function(df,
                                         .var,
                                         .N_col, # nolint
                                         id = "USUBJID",
                                         grade_groups = list(),
                                         remove_single = TRUE,
                                         labelstr = "") {
  assert_valid_factor(df[[.var]])
  assert_df_with_variables(df, list(grade = .var, id = id))

  if (nrow(df) < 1) {
    grade_levels <- levels(df[[.var]])
    l_count <- as.list(rep(0, length(grade_levels)))
    names(l_count) <- grade_levels
  } else {
    if (isTRUE(is.factor(df[[id]]))) {
      assert_valid_factor(df[[id]], any.missing = FALSE)
    } else {
      checkmate::assert_character(df[[id]], min.chars = 1, any.missing = FALSE)
    }
    checkmate::assert_count(.N_col)

    id <- df[[id]]
    grade <- df[[.var]]

    if (!is.ordered(grade)) {
      grade_lbl <- obj_label(grade)
      lvls <- levels(grade)
      if (sum(grepl("^\\d+$", lvls)) %in% c(0, length(lvls))) {
        lvl_ord <- lvls
      } else {
        lvls[!grepl("^\\d+$", lvls)] <- min(as.numeric(lvls[grepl("^\\d+$", lvls)])) - 1
        lvl_ord <- levels(grade)[order(as.numeric(lvls))]
      }
      grade <- formatters::with_label(factor(grade, levels = lvl_ord, ordered = TRUE), grade_lbl)
    }

    missing_lvl <- grepl("missing", tolower(levels(grade)))
    if (any(missing_lvl)) {
      grade <- factor(
        grade,
        levels = c(levels(grade)[!missing_lvl], levels(grade)[missing_lvl]),
        ordered = is.ordered(grade)
      )
    }
    df_max <- stats::aggregate(grade ~ id, FUN = max, drop = FALSE)
    l_count <- as.list(table(df_max$grade))
  }

  if (length(grade_groups) > 0) {
    l_count <- h_append_grade_groups(grade_groups, l_count, remove_single)
  }

  l_count_fraction <- lapply(l_count, function(i, denom) c(i, i / denom), denom = .N_col)

  list(
    count_fraction = l_count_fraction
  )
}

#' @describeIn count_occurrences_by_grade Formatted analysis function which is used as `afun`
#'   in `count_occurrences_by_grade()`.
#'
#' @return
#' * `a_count_occurrences_by_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' #  We need to ungroup `count_fraction` first so that the `rtables` formatting
#' # function `format_count_fraction()` can be applied correctly.
#' afun <- make_afun(a_count_occurrences_by_grade, .ungroup_stats = "count_fraction")
#' afun(
#'   df,
#'   .N_col = 10L,
#'   .var = "AETOXGR",
#'   id = "USUBJID",
#'   grade_groups = list("ANY" = levels(df$AETOXGR))
#' )
#'
#' @export
a_count_occurrences_by_grade <- make_afun(
  s_count_occurrences_by_grade,
  .formats = c("count_fraction" = format_count_fraction_fixed_dp)
)

#' @describeIn count_occurrences_by_grade Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6, 1)),
#'   ARM = factor(c("A", "A", "A", "B", "B", "B", "A"), levels = c("A", "B")),
#'   AETOXGR = factor(c(1, 2, 3, 4, 1, 2, 3), levels = c(1:5)),
#'   AESEV = factor(
#'     x = c("MILD", "MODERATE", "SEVERE", "MILD", "MILD", "MODERATE", "SEVERE"),
#'     levels = c("MILD", "MODERATE", "SEVERE")
#'   ),
#'   stringsAsFactors = FALSE
#' )
#'
#' df_adsl <- df %>%
#'   select(USUBJID, ARM) %>%
#'   unique()
#'
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' # Define additional grade groupings.
#' grade_groups <- list(
#'   "-Any-" = c("1", "2", "3", "4", "5"),
#'   "Grade 1-2" = c("1", "2"),
#'   "Grade 3-5" = c("3", "4", "5")
#' )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
#' @order 2
count_occurrences_by_grade <- function(lyt,
                                       var,
                                       id = "USUBJID",
                                       grade_groups = list(),
                                       remove_single = TRUE,
                                       var_labels = var,
                                       show_labels = "default",
                                       riskdiff = FALSE,
                                       na_str = default_na_str(),
                                       nested = TRUE,
                                       ...,
                                       table_names = var,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .indent_mods = NULL,
                                       .labels = NULL) {
  checkmate::assert_flag(riskdiff)

  s_args <- list(id = id, grade_groups = grade_groups, remove_single = remove_single, ...)

  afun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  extra_args <- if (isFALSE(riskdiff)) {
    s_args
  } else {
    list(
      afun = list("s_count_occurrences_by_grade" = afun),
      .stats = .stats,
      .indent_mods = .indent_mods,
      s_args = s_args
    )
  }

  analyze(
    lyt = lyt,
    vars = var,
    var_labels = var_labels,
    show_labels = show_labels,
    afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff),
    table_names = table_names,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' @describeIn count_occurrences_by_grade Layout-creating function which can take content function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_occurrences_by_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted content rows
#'   containing the statistics from `s_count_occurrences_by_grade()` to the table layout.
#'
#' @examples
#' # Layout creating function with custom format.
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AESEV",
#'     .formats = c("count_fraction" = "xx.xx (xx.xx%)")
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' basic_table() %>%
#'   add_colcounts() %>%
#'   split_rows_by("ARM", child_labels = "visible", nested = TRUE) %>%
#'   summarize_occurrences_by_grade(
#'     var = "AETOXGR",
#'     grade_groups = grade_groups
#'   ) %>%
#'   build_table(df, alt_counts_df = df_adsl)
#'
#' @export
#' @order 3
summarize_occurrences_by_grade <- function(lyt,
                                           var,
                                           id = "USUBJID",
                                           grade_groups = list(),
                                           remove_single = TRUE,
                                           na_str = default_na_str(),
                                           ...,
                                           .stats = NULL,
                                           .formats = NULL,
                                           .indent_mods = NULL,
                                           .labels = NULL) {
  extra_args <- list(id = id, grade_groups = grade_groups, remove_single = remove_single, ...)

  cfun <- make_afun(
    a_count_occurrences_by_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )

  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = cfun,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' Subgroup Treatment Effect Pattern (STEP) Fit for Survival Outcome
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This fits the Subgroup Treatment Effect Pattern models for a survival outcome. The treatment arm
#' variable must have exactly 2 levels, where the first one is taken as reference and the estimated
#' hazard ratios are for the comparison of the second level vs. the first one.
#'
#' The model which is fit is:
#'
#' `Surv(time, event) ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
#'
#' where `degree` is specified by `control_step()`.
#'
#' @inheritParams argument_convention
#' @param variables (named `list` of `character`)\cr list of analysis variables: needs `time`, `event`,
#'   `arm`, `biomarker`, and optional `covariates` and `strata`.
#' @param control (named `list`)\cr combined control list from [control_step()] and [control_coxph()].
#'
#' @return A matrix of class `step`. The first part of the columns describe the subgroup intervals used
#'   for the biomarker variable, including where the center of the intervals are and their bounds. The
#'   second part of the columns contain the estimates for the treatment arm comparison.
#'
#' @note For the default degree 0 the `biomarker` variable is not included in the model.
#'
#' @seealso [control_step()] and [control_coxph()] for the available customization options.
#'
#' @examples
#' # Testing dataset with just two treatment arms.
#' library(dplyr)
#'
#' adtte_f <- tern_ex_adtte %>%
#'   filter(
#'     PARAMCD == "OS",
#'     ARM %in% c("B: Placebo", "A: Drug X")
#'   ) %>%
#'   mutate(
#'     # Reorder levels of ARM to display reference arm before treatment arm.
#'     ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("ARM" = "Treatment Arm", "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' variables <- list(
#'   arm = "ARM",
#'   biomarker = "BMRKR1",
#'   covariates = c("AGE", "BMRKR2"),
#'   event = "is_event",
#'   time = "AVAL"
#' )
#'
#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
#' step_matrix <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f
#' )
#' dim(step_matrix)
#' head(step_matrix)
#'
#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
#' # models. Or specify different Cox regression options.
#' step_matrix2 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(conf_level = 0.9), control_step(degree = 2))
#' )
#'
#' # Use a global model with cubic interaction and only 5 points.
#' step_matrix3 <- fit_survival_step(
#'   variables = variables,
#'   data = adtte_f,
#'   control = c(control_coxph(), control_step(bandwidth = NULL, degree = 3, num_points = 5L))
#' )
#'
#' @export
fit_survival_step <- function(variables,
                              data,
                              control = c(control_step(), control_coxph())) {
  checkmate::assert_list(control)
  assert_df_with_variables(data, variables)
  data <- data[!is.na(data[[variables$biomarker]]), ]
  window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
  interval_center <- window_sel$interval[, "Interval Center"]
  form <- h_step_survival_formula(variables = variables, control = control)
  estimates <- if (is.null(control$bandwidth)) {
    h_step_survival_est(
      formula = form,
      data = data,
      variables = variables,
      x = interval_center,
      control = control
    )
  } else {
    tmp <- mapply(
      FUN = h_step_survival_est,
      x = interval_center,
      subset = as.list(as.data.frame(window_sel$sel)),
      MoreArgs = list(
        formula = form,
        data = data,
        variables = variables,
        control = control
      )
    )
    # Maybe we find a more elegant solution than this.
    rownames(tmp) <- c("n", "events", "loghr", "se", "ci_lower", "ci_upper")
    t(tmp)
  }
  result <- cbind(window_sel$interval, estimates)
  structure(
    result,
    class = c("step", "matrix"),
    variables = variables,
    control = control
  )
}

#' Cumulative Counts with Thresholds
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Summarize cumulative counts of a (`numeric`) vector that is less than, less or equal to,
#' greater than, or greater or equal to user-specific thresholds.
#'
#' @inheritParams h_count_cumulative
#' @inheritParams argument_convention
#' @param thresholds (`numeric`)\cr vector of cutoff value for the counts.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("count_cumulative")`
#'   to see available statistics for this function.
#'
#' @seealso Relevant helper function [h_count_cumulative()], and descriptive function [d_count_cumulative()].
#'
#' @name count_cumulative
#' @order 1
NULL

#' Helper Function for [s_count_cumulative()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to calculate count and fraction of `x` values in the lower or upper tail given a threshold.
#'
#' @inheritParams argument_convention
#' @param threshold (`number`)\cr a cutoff value as threshold to count values of `x`.
#' @param lower_tail (`logical`)\cr whether to count lower tail, default is `TRUE`.
#' @param include_eq (`logical`)\cr whether to include value equal to the `threshold` in
#'   count, default is `TRUE`.
#'
#' @return A named vector with items:
#'   * `count`: the count of values less than, less or equal to, greater than, or greater or equal to a threshold
#'     of user specification.
#'   * `fraction`: the fraction of the count.
#'
#' @seealso [count_cumulative]
#'
#' @examples
#' set.seed(1, kind = "Mersenne-Twister")
#' x <- c(sample(1:10, 10), NA)
#' .N_col <- length(x)
#'
#' h_count_cumulative(x, 5, .N_col = .N_col)
#' h_count_cumulative(x, 5, lower_tail = FALSE, include_eq = FALSE, na.rm = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 0, lower_tail = FALSE, .N_col = .N_col)
#' h_count_cumulative(x, 100, lower_tail = FALSE, .N_col = .N_col)
#'
#' @export
h_count_cumulative <- function(x,
                               threshold,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               na.rm = TRUE, # nolint
                               .N_col) { # nolint
  checkmate::assert_numeric(x)
  checkmate::assert_numeric(threshold)
  checkmate::assert_numeric(.N_col)
  checkmate::assert_flag(lower_tail)
  checkmate::assert_flag(include_eq)
  checkmate::assert_flag(na.rm)

  is_keep <- if (na.rm) !is.na(x) else rep(TRUE, length(x))
  count <- if (lower_tail && include_eq) {
    length(x[is_keep & x <= threshold])
  } else if (lower_tail && !include_eq) {
    length(x[is_keep & x < threshold])
  } else if (!lower_tail && include_eq) {
    length(x[is_keep & x >= threshold])
  } else if (!lower_tail && !include_eq) {
    length(x[is_keep & x > threshold])
  }

  result <- c(count = count, fraction = count / .N_col)
  result
}

#' Description of Cumulative Count
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is a helper function that describes the analysis in [s_count_cumulative()].
#'
#' @inheritParams h_count_cumulative
#'
#' @return Labels for [s_count_cumulative()].
#'
#' @export
d_count_cumulative <- function(threshold, lower_tail, include_eq) {
  checkmate::assert_numeric(threshold)
  lg <- if (lower_tail) "<" else ">"
  eq <- if (include_eq) "=" else ""
  paste0(lg, eq, " ", threshold)
}

#' @describeIn count_cumulative Statistics function that produces a named list given a numeric vector of thresholds.
#'
#' @return
#' * `s_count_cumulative()` returns a named list of `count_fraction`s: a list with each `thresholds` value as a
#'   component, each component containing a vector for the count and fraction.
#'
#' @keywords internal
s_count_cumulative <- function(x,
                               thresholds,
                               lower_tail = TRUE,
                               include_eq = TRUE,
                               .N_col, # nolint
                               ...) {
  checkmate::assert_numeric(thresholds, min.len = 1, any.missing = FALSE)

  count_fraction_list <- Map(function(thres) {
    result <- h_count_cumulative(x, thres, lower_tail, include_eq, .N_col = .N_col, ...)
    label <- d_count_cumulative(thres, lower_tail, include_eq)
    formatters::with_label(result, label)
  }, thresholds)

  names(count_fraction_list) <- thresholds
  list(count_fraction = count_fraction_list)
}

#' @describeIn count_cumulative Formatted analysis function which is used as `afun`
#'   in `count_cumulative()`.
#'
#' @return
#' * `a_count_cumulative()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_cumulative <- make_afun(
  s_count_cumulative,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn count_cumulative Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_cumulative()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_cumulative()` to the table layout.
#'
#' @examples
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_cumulative(
#'     vars = "AGE",
#'     thresholds = c(40, 60)
#'   ) %>%
#'   build_table(tern_ex_adsl)
#'
#' @export
#' @order 2
count_cumulative <- function(lyt,
                             vars,
                             thresholds,
                             lower_tail = TRUE,
                             include_eq = TRUE,
                             var_labels = vars,
                             show_labels = "visible",
                             na_str = default_na_str(),
                             nested = TRUE,
                             ...,
                             table_names = vars,
                             .stats = NULL,
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  extra_args <- list(thresholds = thresholds, lower_tail = lower_tail, include_eq = include_eq, ...)

  afun <- make_afun(
    a_count_cumulative,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt,
    vars,
    afun = afun,
    na_str = na_str,
    table_names = table_names,
    var_labels = var_labels,
    show_labels = show_labels,
    nested = nested,
    extra_args = extra_args
  )
}

#' Get default statistical methods and their associated formats, labels, and indent modifiers
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Utility functions to get valid statistic methods for different method groups
#' (`.stats`) and their associated formats (`.formats`), labels (`.labels`), and indent modifiers
#' (`.indent_mods`). This utility is used across `tern`, but some of its working principles can be
#' seen in [analyze_vars()]. See notes to understand why this is experimental.
#'
#' @param stats (`character`)\cr statistical methods to get defaults for.
#'
#' @details
#' Current choices for `type` are `counts` and `numeric` for [analyze_vars()] and affect `get_stats()`.
#'
#' @note
#' These defaults are experimental because we use the names of functions to retrieve the default
#' statistics. This should be generalized in groups of methods according to more reasonable groupings.
#'
#' @name default_stats_formats_labels
NULL

#' @describeIn default_stats_formats_labels Get statistics available for a given method
#'   group (analyze function).
#'
#' @param method_groups (`character`)\cr indicates the statistical method group (`tern` analyze function)
#'   to retrieve default statistics for. A character vector can be used to specify more than one statistical
#'   method group.
#' @param stats_in (`character`)\cr statistics to retrieve for the selected method group.
#' @param add_pval (`flag`)\cr should `"pval"` (or `"pval_counts"` if `method_groups` contains
#'   `"analyze_vars_counts"`) be added to the statistical methods?
#'
#' @return
#' * `get_stats()` returns a `character` vector of statistical methods.
#'
#' @examples
#' # analyze_vars is numeric
#' num_stats <- get_stats("analyze_vars_numeric") # also the default
#'
#' # Other type
#' cnt_stats <- get_stats("analyze_vars_counts")
#'
#' # Weirdly taking the pval from count_occurrences
#' only_pval <- get_stats("count_occurrences", add_pval = TRUE, stats_in = "pval")
#'
#' # All count_occurrences
#' all_cnt_occ <- get_stats("count_occurrences")
#'
#' # Multiple
#' get_stats(c("count_occurrences", "analyze_vars_counts"))
#'
#' @export
get_stats <- function(method_groups = "analyze_vars_numeric", stats_in = NULL, add_pval = FALSE) {
  checkmate::assert_character(method_groups)
  checkmate::assert_character(stats_in, null.ok = TRUE)
  checkmate::assert_flag(add_pval)

  # Default is still numeric
  if (any(method_groups == "analyze_vars")) {
    method_groups[method_groups == "analyze_vars"] <- "analyze_vars_numeric"
  }

  type_tmp <- ifelse(any(grepl("counts", method_groups)), "counts", "numeric") # for pval checks

  # Defaults for loop
  out <- NULL

  # Loop for multiple method groups
  for (mgi in method_groups) {
    out_tmp <- if (mgi %in% names(tern_default_stats)) {
      tern_default_stats[[mgi]]
    } else {
      stop("The selected method group (", mgi, ") has no default statistical method.")
    }
    out <- unique(c(out, out_tmp))
  }

  # If you added pval to the stats_in you certainly want it
  if (!is.null(stats_in) && any(grepl("^pval", stats_in))) {
    stats_in_pval_value <- stats_in[grepl("^pval", stats_in)]

    # Must be only one value between choices
    checkmate::assert_choice(stats_in_pval_value, c("pval", "pval_counts"))

    # Mismatch with counts and numeric
    if (any(grepl("counts", method_groups)) && stats_in_pval_value != "pval_counts" ||
      any(grepl("numeric", method_groups)) && stats_in_pval_value != "pval") { # nolint
      stop(
        "Inserted p-value (", stats_in_pval_value, ") is not valid for type ",
        type_tmp, ". Use ", paste(ifelse(stats_in_pval_value == "pval", "pval_counts", "pval")),
        " instead."
      )
    }

    # Lets add it even if present (thanks to unique)
    add_pval <- TRUE
  }

  # Mainly used in "analyze_vars" but it could be necessary elsewhere
  if (isTRUE(add_pval)) {
    if (any(grepl("counts", method_groups))) {
      out <- unique(c(out, "pval_counts"))
    } else {
      out <- unique(c(out, "pval"))
    }
  }

  # Filtering for stats_in (character vector)
  if (!is.null(stats_in)) {
    out <- intersect(stats_in, out) # It orders them too
  }

  # If intersect did not find matches (and no pval?) -> error
  if (length(out) == 0) {
    stop(
      "The selected method group(s) (", paste0(method_groups, collapse = ", "), ")",
      " do not have the required default statistical methods:\n",
      paste0(stats_in, collapse = " ")
    )
  }

  out
}

#' @describeIn default_stats_formats_labels Get formats corresponding to a list of statistics.
#'
#' @param formats_in (named `vector`) \cr inserted formats to replace defaults. It can be a
#'   character vector from [formatters::list_valid_format_labels()] or a custom format function.
#'
#' @return
#' * `get_formats_from_stats()` returns a named vector of formats (if present in either
#'   `tern_default_formats` or `formats_in`, otherwise `NULL`). Values can be taken from
#'   [formatters::list_valid_format_labels()] or a custom function (e.g. [formatting_functions]).
#'
#' @note Formats in `tern` and `rtables` can be functions that take in the table cell value and
#'   return a string. This is well documented in `vignette("custom_appearance", package = "rtables")`.
#'
#' @examples
#' # Defaults formats
#' get_formats_from_stats(num_stats)
#' get_formats_from_stats(cnt_stats)
#' get_formats_from_stats(only_pval)
#' get_formats_from_stats(all_cnt_occ)
#'
#' # Addition of customs
#' get_formats_from_stats(all_cnt_occ, formats_in = c("fraction" = c("xx")))
#' get_formats_from_stats(all_cnt_occ, formats_in = list("fraction" = c("xx.xx", "xx")))
#'
#' @seealso [formatting_functions]
#'
#' @export
get_formats_from_stats <- function(stats, formats_in = NULL) {
  checkmate::assert_character(stats, min.len = 1)
  # It may be a list if there is a function in the formats
  if (checkmate::test_list(formats_in, null.ok = TRUE)) {
    checkmate::assert_list(formats_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(formats_in, null.ok = TRUE)
  }

  # Extract global defaults
  which_fmt <- match(stats, names(tern_default_formats))

  # Select only needed formats from stats
  ret <- vector("list", length = length(stats)) # Returning a list is simpler
  ret[!is.na(which_fmt)] <- tern_default_formats[which_fmt[!is.na(which_fmt)]]

  out <- setNames(ret, stats)

  # Modify some with custom formats
  if (!is.null(formats_in)) {
    # Stats is the main
    common_names <- intersect(names(out), names(formats_in))
    out[common_names] <- formats_in[common_names]
  }

  out
}

#' @describeIn default_stats_formats_labels Get labels corresponding to a list of statistics.
#'
#' @param labels_in (named `vector` of `character`)\cr inserted labels to replace defaults.
#' @param row_nms (`character`)\cr row names. Levels of a `factor` or `character` variable, each
#'   of which the statistics in `.stats` will be calculated for. If this parameter is set, these
#'   variable levels will be used as the defaults, and the names of the given custom values should
#'   correspond to levels (or have format `statistic.level`) instead of statistics. Can also be
#'   variable names if rows correspond to different variables instead of levels. Defaults to `NULL`.
#'
#' @return
#' * `get_labels_from_stats()` returns a named `character` vector of labels (if present in either
#'   `tern_default_labels` or `labels_in`, otherwise `NULL`).
#'
#' @examples
#' # Defaults labels
#' get_labels_from_stats(num_stats)
#' get_labels_from_stats(cnt_stats)
#' get_labels_from_stats(only_pval)
#' get_labels_from_stats(all_cnt_occ)
#'
#' # Addition of customs
#' get_labels_from_stats(all_cnt_occ, labels_in = c("fraction" = "Fraction"))
#' get_labels_from_stats(all_cnt_occ, labels_in = list("fraction" = c("Some more fractions")))
#'
#' @export
get_labels_from_stats <- function(stats, labels_in = NULL, row_nms = NULL) {
  checkmate::assert_character(stats, min.len = 1)
  checkmate::assert_character(row_nms, null.ok = TRUE)
  # It may be a list
  if (checkmate::test_list(labels_in, null.ok = TRUE)) {
    checkmate::assert_list(labels_in, null.ok = TRUE)
    # Or it may be a vector of characters
  } else {
    checkmate::assert_character(labels_in, null.ok = TRUE)
  }

  if (!is.null(row_nms)) {
    ret <- rep(row_nms, length(stats))
    out <- setNames(ret, paste(rep(stats, each = length(row_nms)), ret, sep = "."))

    if (!is.null(labels_in)) {
      lvl_lbls <- intersect(names(labels_in), row_nms)
      for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- labels_in[[i]]
    }
  } else {
    which_lbl <- match(stats, names(tern_default_labels))

    ret <- vector("character", length = length(stats)) # it needs to be a character vector
    ret[!is.na(which_lbl)] <- tern_default_labels[which_lbl[!is.na(which_lbl)]]

    out <- setNames(ret, stats)
  }

  # Modify some with custom labels
  if (!is.null(labels_in)) {
    # Stats is the main
    common_names <- intersect(names(out), names(labels_in))
    out[common_names] <- labels_in[common_names]
  }

  out
}

#' @describeIn default_stats_formats_labels Format indent modifiers for a given vector/list of statistics.
#'
#' @param indents_in (named `vector`)\cr inserted indent modifiers to replace defaults (default is `0L`).
#'
#' @return
#' * `get_indents_from_stats()` returns a single indent modifier value to apply to all rows
#'   or a named numeric vector of indent modifiers (if present, otherwise `NULL`).
#'
#' @examples
#' get_indents_from_stats(all_cnt_occ, indents_in = 3L)
#' get_indents_from_stats(all_cnt_occ, indents_in = list(count = 2L, count_fraction = 5L))
#' get_indents_from_stats(
#'   all_cnt_occ,
#'   indents_in = list(a = 2L, count.a = 1L, count.b = 5L), row_nms = c("a", "b")
#' )
#'
#' @export
get_indents_from_stats <- function(stats, indents_in = NULL, row_nms = NULL) {
  checkmate::assert_character(stats, min.len = 1)
  checkmate::assert_character(row_nms, null.ok = TRUE)
  # It may be a list
  if (checkmate::test_list(indents_in, null.ok = TRUE)) {
    checkmate::assert_list(indents_in, null.ok = TRUE)
    # Or it may be a vector of integers
  } else {
    checkmate::assert_integerish(indents_in, null.ok = TRUE)
  }

  if (is.null(names(indents_in)) && length(indents_in) == 1) {
    out <- rep(indents_in, length(stats) * if (!is.null(row_nms)) length(row_nms) else 1)
    return(out)
  }

  if (!is.null(row_nms)) {
    ret <- rep(0L, length(stats) * length(row_nms))
    out <- setNames(ret, paste(rep(stats, each = length(row_nms)), rep(row_nms, length(stats)), sep = "."))

    if (!is.null(indents_in)) {
      lvl_lbls <- intersect(names(indents_in), row_nms)
      for (i in lvl_lbls) out[paste(stats, i, sep = ".")] <- indents_in[[i]]
    }
  } else {
    ret <- rep(0L, length(stats))
    out <- setNames(ret, stats)
  }

  # Modify some with custom labels
  if (!is.null(indents_in)) {
    # Stats is the main
    common_names <- intersect(names(out), names(indents_in))
    out[common_names] <- indents_in[common_names]
  }

  out
}

#' Update Labels According to Control Specifications
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Given a list of statistic labels and and a list of control parameters, updates labels with a relevant
#' control specification. For example, if control has element `conf_level` set to `0.9`, the default
#' label for statistic `mean_ci` will be updated to `"Mean 90% CI"`. Any labels that are supplied
#' via `labels_custom` will not be updated regardless of `control`.
#'
#' @param labels_default (named `vector` of `character`)\cr a named vector of statistic labels to modify
#'   according to the control specifications. Labels that are explicitly defined in `labels_custom` will
#'   not be affected.
#' @param labels_custom (named `vector` of `character`)\cr named vector of labels that are customized by
#'   the user and should not be affected by `control`.
#' @param control (named `list`)\cr list of control parameters to apply to adjust default labels.
#'
#' @return A named character vector of labels with control specifications applied to relevant labels.
#'
#' @examples
#' control <- list(conf_level = 0.80, quantiles = c(0.1, 0.83), test_mean = 0.57)
#' get_labels_from_stats(c("mean_ci", "quantiles", "mean_pval")) %>%
#'   labels_use_control(control = control)
#'
#' @export
labels_use_control <- function(labels_default, control, labels_custom = NULL) {
  if ("conf_level" %in% names(control)) {
    labels_default <- sapply(
      names(labels_default),
      function(x) {
        if (!x %in% names(labels_custom)) {
          gsub(labels_default[[x]], pattern = "[0-9]+% CI", replacement = f_conf_level(control[["conf_level"]]))
        } else {
          labels_default[[x]]
        }
      }
    )
  }
  if ("quantiles" %in% names(control) && "quantiles" %in% names(labels_default) &&
    !"quantiles" %in% names(labels_custom)) { # nolint
    labels_default["quantiles"] <- gsub(
      "[0-9]+% and [0-9]+", paste0(control[["quantiles"]][1] * 100, "% and ", control[["quantiles"]][2] * 100, ""),
      labels_default["quantiles"]
    )
  }
  if ("test_mean" %in% names(control) && "mean_pval" %in% names(labels_default) &&
    !"mean_pval" %in% names(labels_custom)) { # nolint
    labels_default["mean_pval"] <- gsub(
      "p-value \\(H0: mean = [0-9\\.]+\\)", f_pval(control[["test_mean"]]), labels_default["mean_pval"]
    )
  }

  labels_default
}

#' @describeIn default_stats_formats_labels Named list of available statistics by method group for `tern`.
#'
#' @format
#' * `tern_default_stats` is a named list of available statistics, with each element
#'   named for their corresponding statistical method group.
#'
#' @export
tern_default_stats <- list(
  abnormal = c("fraction"),
  abnormal_by_baseline = c("fraction"),
  abnormal_by_marked = c("count_fraction", "count_fraction_fixed_dp"),
  abnormal_by_worst_grade = c("count_fraction", "count_fraction_fixed_dp"),
  abnormal_by_worst_grade_worsen = c("fraction"),
  analyze_patients_exposure_in_cols = c("n_patients", "sum_exposure"),
  analyze_vars_counts = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  analyze_vars_numeric = c(
    "n", "sum", "mean", "sd", "se", "mean_sd", "mean_se", "mean_ci", "mean_sei", "mean_sdi", "mean_pval",
    "median", "mad", "median_ci", "quantiles", "iqr", "range", "min", "max", "median_range", "cv",
    "geom_mean", "geom_mean_ci", "geom_cv"
  ),
  count_cumulative = c("count_fraction", "count_fraction_fixed_dp"),
  count_missed_doses = c("n", "count_fraction", "count_fraction_fixed_dp"),
  count_occurrences = c("count", "count_fraction", "count_fraction_fixed_dp", "fraction"),
  count_occurrences_by_grade = c("count_fraction", "count_fraction_fixed_dp"),
  count_patients_with_event = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  count_patients_with_flags = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  count_values = c("n", "count", "count_fraction", "count_fraction_fixed_dp", "n_blq"),
  coxph_pairwise = c("pvalue", "hr", "hr_ci", "n_tot", "n_tot_events"),
  estimate_incidence_rate = c("person_years", "n_events", "rate", "rate_ci"),
  estimate_multinomial_response = c("n_prop", "prop_ci"),
  estimate_odds_ratio = c("or_ci", "n_tot"),
  estimate_proportion = c("n_prop", "prop_ci"),
  estimate_proportion_diff = c("diff", "diff_ci"),
  summarize_ancova = c("n", "lsmean", "lsmean_diff", "lsmean_diff_ci", "pval"),
  summarize_coxreg = c("n", "hr", "ci", "pval", "pval_inter"),
  summarize_glm_count = c("n", "rate", "rate_ci", "rate_ratio", "rate_ratio_ci", "pval"),
  summarize_num_patients = c("unique", "nonunique", "unique_count"),
  summarize_patients_events_in_cols = c("unique", "all"),
  surv_time = c("median", "median_ci", "quantiles", "range_censor", "range_event", "range"),
  surv_timepoint = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci", "rate_diff", "rate_diff_ci", "ztest_pval"),
  tabulate_rsp_biomarkers = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
  tabulate_rsp_subgroups = c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval"),
  tabulate_survival_biomarkers = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
  tabulate_survival_subgroups = c("n_tot_events", "n_events", "n_tot", "n", "median", "hr", "ci", "pval"),
  test_proportion_diff = c("pval")
)

#' @describeIn default_stats_formats_labels Named vector of default formats for `tern`.
#'
#' @format
#' * `tern_default_formats` is a named vector of available default formats, with each element
#'   named for their corresponding statistic.
#'
#' @export
tern_default_formats <- c(
  fraction = format_fraction_fixed_dp,
  unique = format_count_fraction_fixed_dp,
  nonunique = "xx",
  unique_count = "xx",
  n = "xx.",
  count = "xx.",
  count_fraction = format_count_fraction,
  count_fraction_fixed_dp = format_count_fraction_fixed_dp,
  n_blq = "xx.",
  sum = "xx.x",
  mean = "xx.x",
  sd = "xx.x",
  se = "xx.x",
  mean_sd = "xx.x (xx.x)",
  mean_se = "xx.x (xx.x)",
  mean_ci = "(xx.xx, xx.xx)",
  mean_sei = "(xx.xx, xx.xx)",
  mean_sdi = "(xx.xx, xx.xx)",
  mean_pval = "xx.xx",
  median = "xx.x",
  mad = "xx.x",
  median_ci = "(xx.xx, xx.xx)",
  quantiles = "xx.x - xx.x",
  iqr = "xx.x",
  range = "xx.x - xx.x",
  min = "xx.x",
  max = "xx.x",
  median_range = "xx.x (xx.x - xx.x)",
  cv = "xx.x",
  geom_mean = "xx.x",
  geom_mean_ci = "(xx.xx, xx.xx)",
  geom_cv = "xx.x",
  pval = "x.xxxx | (<0.0001)",
  pval_counts = "x.xxxx | (<0.0001)",
  range_censor = "xx.x to xx.x",
  range_event = "xx.x to xx.x"
)

#' @describeIn default_stats_formats_labels Named `character` vector of default labels for `tern`.
#'
#' @format
#' * `tern_default_labels` is a named `character` vector of available default labels, with each element
#'   named for their corresponding statistic.
#'
#' @export
tern_default_labels <- c(
  fraction = "fraction",
  unique = "Number of patients with at least one event",
  nonunique = "Number of events",
  n = "n",
  count = "count",
  count_fraction = "count_fraction",
  count_fraction_fixed_dp = "count_fraction",
  n_blq = "n_blq",
  sum = "Sum",
  mean = "Mean",
  sd = "SD",
  se = "SE",
  mean_sd = "Mean (SD)",
  mean_se = "Mean (SE)",
  mean_ci = "Mean 95% CI",
  mean_sei = "Mean -/+ 1xSE",
  mean_sdi = "Mean -/+ 1xSD",
  mean_pval = "Mean p-value (H0: mean = 0)",
  median = "Median",
  mad = "Median Absolute Deviation",
  median_ci = "Median 95% CI",
  quantiles = "25% and 75%-ile",
  iqr = "IQR",
  range = "Min - Max",
  min = "Minimum",
  max = "Maximum",
  median_range = "Median (Min - Max)",
  cv = "CV (%)",
  geom_mean = "Geometric Mean",
  geom_mean_ci = "Geometric Mean 95% CI",
  geom_cv = "CV % Geometric Mean",
  pval = "p-value (t-test)", # Default for numeric
  pval_counts = "p-value (chi-squared test)" # Default for counts
)

# To deprecate ---------

#' @describeIn default_stats_formats_labels Quick function to retrieve default formats for summary statistics:
#'   [analyze_vars()] and [analyze_vars_in_cols()] principally.
#'
#' @param type (`flag`)\cr is it going to be `"numeric"` or `"counts"`?
#'
#' @return
#' * `summary_formats()` returns a named `vector` of default statistic formats for the given data type.
#'
#' @examples
#' summary_formats()
#' summary_formats(type = "counts", include_pval = TRUE)
#'
#' @export
summary_formats <- function(type = "numeric", include_pval = FALSE) {
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  get_formats_from_stats(get_stats(met_grp, add_pval = include_pval))
}

#' @describeIn default_stats_formats_labels Quick function to retrieve default labels for summary statistics.
#'   Returns labels of descriptive statistics which are understood by `rtables`. Similar to `summary_formats`
#'
#' @param include_pval (`flag`)\cr deprecated parameter. Same as `add_pval`.
#' @return
#' * `summary_labels` returns a named `vector` of default statistic labels for the given data type.
#'
#' @examples
#' summary_labels()
#' summary_labels(type = "counts", include_pval = TRUE)
#'
#' @export
summary_labels <- function(type = "numeric", include_pval = FALSE) {
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  get_labels_from_stats(get_stats(met_grp, add_pval = include_pval))
}

#' @describeIn default_stats_formats_labels `r lifecycle::badge("deprecated")` Function to
#'   configure settings for default or custom summary statistics for a given data type. In
#'   addition to selecting a custom subset of statistics, the user can also set custom
#'   formats, labels, and indent modifiers for any of these statistics.
#'
#' @param stats_custom (`named vector` of `character`)\cr vector of statistics to include if
#'   not the defaults. This argument overrides `include_pval` and other custom value arguments
#'   such that only settings for these statistics will be returned.
#' @param formats_custom (`named vector` of `character`)\cr vector of custom statistics formats
#'   to use in place of the defaults defined in [`summary_formats()`]. Names should be a subset
#'   of the statistics defined in `stats_custom` (or default statistics if this is `NULL`).
#' @param labels_custom (`named vector` of `character`)\cr vector of custom statistics labels
#'   to use in place of the defaults defined in [`summary_labels()`]. Names should be a subset
#'   of the statistics defined in `stats_custom` (or default statistics if this is `NULL`).
#' @param indent_mods_custom (`integer` or `named vector` of `integer`)\cr vector of custom
#'   indentation modifiers for statistics to use instead of the default of `0L` for all statistics.
#'   Names should be a subset of the statistics defined in `stats_custom` (or default statistics
#'   if this is `NULL`). Alternatively, the same indentation modifier can be applied to all
#'   statistics by setting `indent_mods_custom` to a single integer value.
#'
#' @return
#' * `summary_custom` returns a `list` of 4 named elements: `stats`, `formats`, `labels`,
#'   and `indent_mods`.
#'
#' @examples
#' summary_custom()
#' summary_custom(type = "counts", include_pval = TRUE)
#' summary_custom(
#'   include_pval = TRUE, stats_custom = c("n", "mean", "sd", "pval"),
#'   labels_custom = c(sd = "Std. Dev."), indent_mods_custom = 3L
#' )
#'
#' @export
summary_custom <- function(type = "numeric",
                           include_pval = FALSE,
                           stats_custom = NULL,
                           formats_custom = NULL,
                           labels_custom = NULL,
                           indent_mods_custom = NULL) {
  lifecycle::deprecate_warn(
    "0.9.0.9001",
    "summary_custom()",
    details = "Please use `get_stats`, `get_formats_from_stats`, and `get_labels_from_stats` directly instead."
  )
  met_grp <- paste0(c("analyze_vars", type), collapse = "_")
  .stats <- get_stats(met_grp, stats_custom, add_pval = include_pval)
  .formats <- get_formats_from_stats(.stats, formats_custom)
  .labels <- get_labels_from_stats(.stats, labels_custom)
  .indent_mods <- stats::setNames(rep(0L, length(.stats)), .stats)

  if (!is.null(indent_mods_custom)) {
    if (is.null(names(indent_mods_custom)) && length(indent_mods_custom) == 1) {
      .indent_mods[names(.indent_mods)] <- indent_mods_custom
    } else {
      .indent_mods[names(indent_mods_custom)] <- indent_mods_custom
    }
  }

  list(
    stats = .stats,
    formats = .formats,
    labels = .labels,
    indent_mods = .indent_mods[.stats]
  )
}

#' Combine Factor Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Combine specified old factor Levels in a single new level.
#'
#' @param x factor
#' @param levels level names to be combined
#' @param new_level name of new level
#'
#' @return A `factor` with the new levels.
#'
#' @examples
#' x <- factor(letters[1:5], levels = letters[5:1])
#' combine_levels(x, levels = c("a", "b"))
#'
#' combine_levels(x, c("e", "b"))
#'
#' @export
combine_levels <- function(x, levels, new_level = paste(levels, collapse = "/")) {
  checkmate::assert_factor(x)
  checkmate::assert_subset(levels, levels(x))

  lvls <- levels(x)

  lvls[lvls %in% levels] <- new_level

  levels(x) <- lvls

  x
}

#' Conversion of a Vector to a Factor
#'
#' Converts `x` to a factor and keeps its attributes. Warns appropriately such that the user
#' can decide whether they prefer converting to factor manually (e.g. for full control of
#' factor levels).
#'
#' @param x (`atomic`)\cr object to convert.
#' @param x_name (`string`)\cr name of `x`.
#' @param na_level (`string`)\cr the explicit missing level which should be used when converting a character vector.
#' @param verbose defaults to `TRUE`. It prints out warnings and messages.
#'
#' @return A `factor` with same attributes (except class) as `x`. Does not modify `x` if already a `factor`.
#'
#' @keywords internal
as_factor_keep_attributes <- function(x,
                                      x_name = deparse(substitute(x)),
                                      na_level = "<Missing>",
                                      verbose = TRUE) {
  checkmate::assert_atomic(x)
  checkmate::assert_string(x_name)
  checkmate::assert_string(na_level)
  checkmate::assert_flag(verbose)
  if (is.factor(x)) {
    return(x)
  }
  x_class <- class(x)[1]
  if (verbose) {
    warning(paste(
      "automatically converting", x_class, "variable", x_name,
      "to factor, better manually convert to factor to avoid failures"
    ))
  }
  if (identical(length(x), 0L)) {
    warning(paste(
      x_name, "has length 0, this can lead to tabulation failures, better convert to factor"
    ))
  }
  if (is.character(x)) {
    x_no_na <- explicit_na(sas_na(x), label = na_level)
    if (any(na_level %in% x_no_na)) {
      do.call(
        structure,
        c(
          list(.Data = forcats::fct_relevel(x_no_na, na_level, after = Inf)),
          attributes(x)
        )
      )
    } else {
      do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
    }
  } else {
    do.call(structure, c(list(.Data = as.factor(x)), attributes(x)))
  }
}

#' Labels for Bins in Percent
#'
#' This creates labels for quantile based bins in percent. This assumes the right-closed
#' intervals as produced by [cut_quantile_bins()].
#'
#' @param probs (`proportion` vector)\cr the probabilities identifying the quantiles.
#'   This is a sorted vector of unique `proportion` values, i.e. between 0 and 1, where
#'   the boundaries 0 and 1 must not be included.
#' @param digits (`integer`)\cr number of decimal places to round the percent numbers.
#'
#' @return A `character` vector with labels in the format `[0%,20%]`, `(20%,50%]`, etc.
#'
#' @keywords internal
bins_percent_labels <- function(probs,
                                digits = 0) {
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  percent <- round(probs * 100, digits = digits)
  left <- paste0(utils::head(percent, -1), "%")
  right <- paste0(utils::tail(percent, -1), "%")
  without_left_bracket <- paste0(left, ",", right, "]")
  with_left_bracket <- paste0("[", utils::head(without_left_bracket, 1))
  if (length(without_left_bracket) > 1) {
    with_left_bracket <- c(
      with_left_bracket,
      paste0("(", utils::tail(without_left_bracket, -1))
    )
  }
  with_left_bracket
}

#' Cutting Numeric Vector into Empirical Quantile Bins
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This cuts a numeric vector into sample quantile bins.
#'
#' @inheritParams bins_percent_labels
#' @param x (`numeric`)\cr the continuous variable values which should be cut into
#'   quantile bins. This may contain `NA` values, which are then
#'   not used for the quantile calculations, but included in the return vector.
#' @param labels (`character`)\cr the unique labels for the quantile bins. When there are `n`
#'   probabilities in `probs`, then this must be `n + 1` long.
#' @param type (`integer`)\cr type of quantiles to use, see [stats::quantile()] for details.
#' @param ordered (`flag`)\cr should the result be an ordered factor.
#'
#' @return A `factor` variable with appropriately-labeled bins as levels.
#'
#' @note Intervals are closed on the right side. That is, the first bin is the interval
#'   `[-Inf, q1]` where `q1` is the first quantile, the second bin is then `(q1, q2]`, etc.,
#'   and the last bin is `(qn, +Inf]` where `qn` is the last quantile.
#'
#' @examples
#' # Default is to cut into quartile bins.
#' cut_quantile_bins(cars$speed)
#'
#' # Use custom quantiles.
#' cut_quantile_bins(cars$speed, probs = c(0.1, 0.2, 0.6, 0.88))
#'
#' # Use custom labels.
#' cut_quantile_bins(cars$speed, labels = paste0("Q", 1:4))
#'
#' # NAs are preserved in result factor.
#' ozone_binned <- cut_quantile_bins(airquality$Ozone)
#' which(is.na(ozone_binned))
#' # So you might want to make these explicit.
#' explicit_na(ozone_binned)
#'
#' @export
cut_quantile_bins <- function(x,
                              probs = c(0.25, 0.5, 0.75),
                              labels = NULL,
                              type = 7,
                              ordered = TRUE) {
  checkmate::assert_flag(ordered)
  checkmate::assert_numeric(x)
  if (isFALSE(0 %in% probs)) probs <- c(0, probs)
  if (isFALSE(1 %in% probs)) probs <- c(probs, 1)
  checkmate::assert_numeric(probs, lower = 0, upper = 1, unique = TRUE, sorted = TRUE)
  if (is.null(labels)) labels <- bins_percent_labels(probs)
  checkmate::assert_character(labels, len = length(probs) - 1, any.missing = FALSE, unique = TRUE)

  if (all(is.na(x))) {
    # Early return if there are only NAs in input.
    return(factor(x, ordered = ordered, levels = labels))
  }

  quantiles <- stats::quantile(
    x,
    probs = probs,
    type = type,
    na.rm = TRUE
  )

  checkmate::assert_numeric(quantiles, unique = TRUE)

  cut(
    x,
    breaks = quantiles,
    labels = labels,
    ordered_result = ordered,
    include.lowest = TRUE,
    right = TRUE
  )
}

#' Discard Certain Levels from a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This discards the observations as well as the levels specified from a factor.
#'
#' @param x (`factor`)\cr the original factor.
#' @param discard (`character`)\cr which levels to discard.
#'
#' @return A modified `factor` with observations as well as levels from `discard` dropped.
#'
#' @examples
#' fct_discard(factor(c("a", "b", "c")), "c")
#'
#' @export
fct_discard <- function(x, discard) {
  checkmate::assert_factor(x)
  checkmate::assert_character(discard, any.missing = FALSE)
  new_obs <- x[!(x %in% discard)]
  new_levels <- setdiff(levels(x), discard)
  factor(new_obs, levels = new_levels)
}

#' Insertion of Explicit Missings in a Factor
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This inserts explicit missings in a factor based on a condition. Additionally,
#' existing `NA` values will be explicitly converted to given `na_level`.
#'
#' @param x (`factor`)\cr the original factor.
#' @param condition (`logical`)\cr where to insert missings.
#' @param na_level (`string`)\cr which level to use for missings.
#'
#' @return A modified `factor` with inserted and existing `NA` converted to `na_level`.
#'
#' @seealso [forcats::fct_na_value_to_level()] which is used internally.
#'
#' @examples
#' fct_explicit_na_if(factor(c("a", "b", NA)), c(TRUE, FALSE, FALSE))
#'
#' @export
fct_explicit_na_if <- function(x, condition, na_level = "<Missing>") {
  checkmate::assert_factor(x, len = length(condition))
  checkmate::assert_logical(condition)
  x[condition] <- NA
  x <- forcats::fct_na_value_to_level(x, level = na_level)
  forcats::fct_drop(x, only = na_level)
}

#' Collapsing of Factor Levels and Keeping Only Those New Group Levels
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This collapses levels and only keeps those new group levels, in the order provided.
#' The returned factor has levels in the order given, with the possible missing level last (this will
#' only be included if there are missing values).
#'
#' @param .f (`factor` or `character`)\cr original vector.
#' @param ... (named `character` vectors)\cr levels in each vector provided will be collapsed into
#'   the new level given by the respective name.
#' @param .na_level (`string`)\cr which level to use for other levels, which should be missing in the
#'   new factor. Note that this level must not be contained in the new levels specified in `...`.
#'
#' @return A modified `factor` with collapsed levels. Values and levels which are not included
#'   in the given `character` vector input will be set to the missing level `.na_level`.
#'
#' @note Any existing `NA`s in the input vector will not be replaced by the missing level. If needed,
#'   [explicit_na()] can be called separately on the result.
#'
#' @seealso [forcats::fct_collapse()], [forcats::fct_relevel()] which are used internally.
#'
#' @examples
#' fct_collapse_only(factor(c("a", "b", "c", "d")), TRT = "b", CTRL = c("c", "d"))
#'
#' @export
fct_collapse_only <- function(.f, ..., .na_level = "<Missing>") {
  new_lvls <- names(list(...))
  if (checkmate::test_subset(.na_level, new_lvls)) {
    stop(paste0(".na_level currently set to '", .na_level, "' must not be contained in the new levels"))
  }
  x <- forcats::fct_collapse(.f, ..., other_level = .na_level)
  do.call(forcats::fct_relevel, args = c(list(.f = x), as.list(new_lvls)))
}

#' Ungroup Non-Numeric Statistics
#'
#' Ungroups grouped non-numeric statistics within input vectors `.formats`, `.labels`, and `.indent_mods`.
#'
#' @inheritParams argument_convention
#' @param x  (`named list` of `numeric`)\cr list of numeric statistics containing the statistics to ungroup.
#'
#' @return A `list` with modified elements `x`, `.formats`, `.labels`, and `.indent_mods`.
#'
#' @seealso [a_summary()] which uses this function internally.
#'
#' @keywords internal
ungroup_stats <- function(x,
                          .formats,
                          .labels,
                          .indent_mods) {
  checkmate::assert_list(x)
  empty_pval <- "pval" %in% names(x) && length(x[["pval"]]) == 0
  empty_pval_counts <- "pval_counts" %in% names(x) && length(x[["pval_counts"]]) == 0
  x <- unlist(x, recursive = FALSE)

  # If p-value is empty it is removed by unlist and needs to be re-added
  if (empty_pval) x[["pval"]] <- character()
  if (empty_pval_counts) x[["pval_counts"]] <- character()
  .stats <- names(x)

  # Ungroup stats
  .formats <- lapply(.stats, function(x) {
    .formats[[if (!grepl("\\.", x)) x else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][1]]]
  })
  .indent_mods <- sapply(.stats, function(x) {
    .indent_mods[[if (!grepl("\\.", x)) x else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][1]]]
  })
  .labels <- sapply(.stats, function(x) {
    if (!grepl("\\.", x)) .labels[[x]] else regmatches(x, regexpr("\\.", x), invert = TRUE)[[1]][2]
  })

  list(
    x = x,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )
}

#' Additional Assertions for `checkmate`
#'
#' Additional assertion functions which can be used together with the `checkmate` package.
#'
#' @inheritParams checkmate::assert_factor
#' @param x (`any`)\cr object to test.
#' @param df (`data.frame`)\cr data set to test.
#' @param variables (named `list` of `character`)\cr list of variables to test.
#' @param include_boundaries (`logical`)\cr whether to include boundaries when testing
#'   for proportions.
#' @param na_level (`character`)\cr the string you have been using to represent NA or
#'   missing data. For `NA` values please consider using directly [is.na()] or
#'   similar approaches.
#'
#' @return Nothing if assertion passes, otherwise prints the error message.
#'
#' @name assertions
NULL

check_list_of_variables <- function(x) {
  # drop NULL elements in list
  x <- Filter(Negate(is.null), x)

  res <- checkmate::check_list(x,
    names = "named",
    min.len = 1,
    any.missing = FALSE,
    types = "character"
  )
  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(unlist(x), min.chars = 1)
  }
  return(res)
}
#' @describeIn assertions Checks whether `x` is a valid list of variable names.
#'   `NULL` elements of the list `x` are dropped with `Filter(Negate(is.null), x)`.
#'
#' @keywords internal
assert_list_of_variables <- checkmate::makeAssertionFunction(check_list_of_variables)

check_df_with_variables <- function(df, variables, na_level = NULL) {
  checkmate::assert_data_frame(df)
  assert_list_of_variables(variables)

  # flag for equal variables and column names
  err_flag <- all(unlist(variables) %in% colnames(df))
  checkmate::assert_flag(err_flag)

  if (isFALSE(err_flag)) {
    vars <- setdiff(unlist(variables), colnames(df))
    return(paste(
      deparse(substitute(df)),
      "does not contain all specified variables as column names. Missing from dataframe:",
      paste(vars, collapse = ", ")
    ))
  }
  # checking if na_level is present and in which column
  if (!is.null(na_level)) {
    checkmate::assert_string(na_level)
    res <- unlist(lapply(as.list(df)[unlist(variables)], function(x) any(x == na_level)))
    if (any(res)) {
      return(paste0(
        deparse(substitute(df)), " contains explicit na_level (", na_level,
        ") in the following columns: ", paste0(unlist(variables)[res],
          collapse = ", "
        )
      ))
    }
  }
  return(TRUE)
}
#' @describeIn assertions Check whether `df` is a data frame with the analysis `variables`.
#'   Please notice how this produces an error when not all variables are present in the
#'   data.frame while the opposite is not required.
#'
#' @keywords internal
assert_df_with_variables <- checkmate::makeAssertionFunction(check_df_with_variables)

check_valid_factor <- function(x,
                               min.levels = 1, # nolint
                               max.levels = NULL, # nolint
                               null.ok = TRUE, # nolint
                               any.missing = TRUE, # nolint
                               n.levels = NULL, # nolint
                               len = NULL) {
  # checks on levels insertion
  checkmate::assert_int(min.levels, lower = 1)

  # main factor check
  res <- checkmate::check_factor(x,
    min.levels = min.levels,
    null.ok = null.ok,
    max.levels = max.levels,
    any.missing = any.missing,
    n.levels = n.levels
  )

  # no empty strings allowed
  if (isTRUE(res)) {
    res <- checkmate::check_character(levels(x), min.chars = 1)
  }

  return(res)
}
#' @describeIn assertions Check whether `x` is a valid factor (i.e. has levels and no empty
#'   string levels). Note that `NULL` and `NA` elements are allowed.
#'
#' @keywords internal
assert_valid_factor <- checkmate::makeAssertionFunction(check_valid_factor)


check_df_with_factors <- function(df,
                                  variables,
                                  min.levels = 1, # nolint
                                  max.levels = NULL, # nolint
                                  any.missing = TRUE, # nolint
                                  na_level = NULL) {
  res <- check_df_with_variables(df, variables, na_level)
  # checking if all the columns specified by variables are valid factors
  if (isTRUE(res)) {
    # searching the data.frame with selected columns (variables) as a list
    res <- lapply(
      X = as.list(df)[unlist(variables)],
      FUN = check_valid_factor,
      min.levels = min.levels,
      max.levels = max.levels,
      any.missing = any.missing
    )
    res_lo <- unlist(vapply(res, Negate(isTRUE), logical(1)))
    if (any(res_lo)) {
      return(paste0(
        deparse(substitute(df)), " does not contain only factor variables among:",
        "\n* Column `", paste0(unlist(variables)[res_lo],
          "` of the data.frame -> ", res[res_lo],
          collapse = "\n* "
        )
      ))
    } else {
      res <- TRUE
    }
  }
  return(res)
}
#' @describeIn assertions Check whether `df` is a data frame where the analysis `variables`
#'   are all factors. Note that the creation of `NA` by direct call of `factor()` will
#'   trim `NA` levels out of the vector list itself.
#'
#' @keywords internal
assert_df_with_factors <- checkmate::makeAssertionFunction(check_df_with_factors)

#' @describeIn assertions Check whether `x` is a proportion: number between 0 and 1.
#'
#' @keywords internal
assert_proportion_value <- function(x, include_boundaries = FALSE) {
  checkmate::assert_number(x, lower = 0, upper = 1)
  checkmate::assert_flag(include_boundaries)
  if (isFALSE(include_boundaries)) {
    checkmate::assert_true(x > 0)
    checkmate::assert_true(x < 1)
  }
}

#' Horizontal Waterfall Plot
#'
#' This basic waterfall plot visualizes a quantity `height` ordered by value with some markup.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param height (`numeric``)\cr vector containing values to be plotted as the waterfall bars.
#' @param id (`character`)\cr vector containing IDs to use as the x-axis label for the waterfall bars.
#' @param col (`character`)\cr colors.
#' @param col_var (`factor`, `character` or `NULL`)\cr categorical variable for bar coloring. `NULL` by default.
#' @param xlab (`character`)\cr x label. Default is `"ID"`.
#' @param ylab (`character`)\cr y label. Default is `"Value"`.
#' @param title (`character`)\cr text to be displayed as plot title.
#' @param col_legend_title (`character`)\cr text to be displayed as legend title.
#'
#' @return A `ggplot` waterfall plot.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' g_waterfall(height = c(3, 5, -1), id = letters[1:3])
#'
#' g_waterfall(
#'   height = c(3, 5, -1),
#'   id = letters[1:3],
#'   col_var = letters[1:3]
#' )
#'
#' adsl_f <- tern_ex_adsl %>%
#'   select(USUBJID, STUDYID, ARM, ARMCD, SEX)
#'
#' adrs_f <- tern_ex_adrs %>%
#'   filter(PARAMCD == "OVRINV") %>%
#'   mutate(pchg = rnorm(n(), 10, 50))
#'
#' adrs_f <- head(adrs_f, 30)
#' adrs_f <- adrs_f[!duplicated(adrs_f$USUBJID), ]
#' head(adrs_f)
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = adrs_f$USUBJID,
#'   col_var = adrs_f$AVALC
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   col_var = adrs_f$SEX
#' )
#'
#' g_waterfall(
#'   height = adrs_f$pchg,
#'   id = paste("asdfdsfdsfsd", adrs_f$USUBJID),
#'   xlab = "ID",
#'   ylab = "Percentage Change",
#'   title = "Waterfall plot"
#' )
#'
#' @export
g_waterfall <- function(height,
                        id,
                        col_var = NULL,
                        col = getOption("ggplot2.discrete.colour"),
                        xlab = NULL,
                        ylab = NULL,
                        col_legend_title = NULL,
                        title = NULL) {
  if (!is.null(col_var)) {
    check_same_n(height = height, id = id, col_var = col_var)
  } else {
    check_same_n(height = height, id = id)
  }

  checkmate::assert_multi_class(col_var, c("character", "factor"), null.ok = TRUE)
  checkmate::assert_character(col, null.ok = TRUE)

  xlabel <- deparse(substitute(id))
  ylabel <- deparse(substitute(height))

  col_label <- if (!missing(col_var)) {
    deparse(substitute(col_var))
  }

  xlab <- if (is.null(xlab)) xlabel else xlab
  ylab <- if (is.null(ylab)) ylabel else ylab
  col_legend_title <- if (is.null(col_legend_title)) col_label else col_legend_title

  plot_data <- data.frame(
    height = height,
    id = as.character(id),
    col_var = if (is.null(col_var)) "x" else to_n(col_var, length(height)),
    stringsAsFactors = FALSE
  )

  plot_data_ord <- plot_data[order(plot_data$height, decreasing = TRUE), ]

  p <- ggplot2::ggplot(plot_data_ord, ggplot2::aes(x = factor(id, levels = id), y = height)) +
    ggplot2::geom_col() +
    ggplot2::geom_text(
      label = format(plot_data_ord$height, digits = 2),
      vjust = ifelse(plot_data_ord$height >= 0, -0.5, 1.5)
    ) +
    ggplot2::xlab(xlab) +
    ggplot2::ylab(ylab) +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 0, vjust = .5))

  if (!is.null(col_var)) {
    p <- p +
      ggplot2::aes(fill = col_var) +
      ggplot2::labs(fill = col_legend_title) +
      ggplot2::theme(
        legend.position = "bottom",
        legend.background = ggplot2::element_blank(),
        legend.title = ggplot2::element_text(face = "bold"),
        legend.box.background = ggplot2::element_rect(colour = "black")
      )
  }

  if (!is.null(col)) {
    p <- p +
      ggplot2::scale_fill_manual(values = col)
  }

  if (!is.null(title)) {
    p <- p +
      ggplot2::labs(title = title) +
      ggplot2::theme(plot.title = ggplot2::element_text(face = "bold"))
  }

  p
}

#' Apply 1/3 or 1/2 Imputation Rule to Data
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @param x_stats (`named list`)\cr a named list of statistics, typically the results of [s_summary()].
#' @param stat (`character`)\cr statistic to return the value/NA level of according to the imputation
#'   rule applied.
#' @param imp_rule (`character`)\cr imputation rule setting. Set to `"1/3"` to implement 1/3 imputation
#'   rule or `"1/2"` to implement 1/2 imputation rule.
#' @param post (`flag`)\cr whether the data corresponds to a post-dose time-point (defaults to `FALSE`).
#'   This parameter is only used when `imp_rule` is set to `"1/3"`.
#' @param avalcat_var (`character`)\cr name of variable that indicates whether a row in `df` corresponds
#'   to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of the above
#'   (defaults to `"AVALCAT1"`). Variable `avalcat_var` must be present in `df`.
#'
#' @return A `list` containing statistic value (`val`) and NA level (`na_str`) that should be displayed
#'   according to the specified imputation rule.
#'
#' @seealso [analyze_vars_in_cols()] where this function can be implemented by setting the `imp_rule`
#'   argument.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#'   AVAL = runif(50, 0, 1),
#'   AVALCAT1 = sample(c(1, "BLQ"), 50, replace = TRUE)
#' )
#' x_stats <- s_summary(df$AVAL)
#' imputation_rule(df, x_stats, "max", "1/3")
#' imputation_rule(df, x_stats, "geom_mean", "1/3")
#' imputation_rule(df, x_stats, "mean", "1/2")
#'
#' @export
imputation_rule <- function(df, x_stats, stat, imp_rule, post = FALSE, avalcat_var = "AVALCAT1") {
  checkmate::assert_choice(avalcat_var, names(df))
  checkmate::assert_choice(imp_rule, c("1/3", "1/2"))
  n_blq <- sum(grepl("BLQ|LTR|<[1-9]|<PCLLOQ", df[[avalcat_var]]))
  ltr_blq_ratio <- n_blq / max(1, nrow(df))

  # defaults
  val <- x_stats[[stat]]
  na_str <- "NE"

  if (imp_rule == "1/3") {
    if (!post && stat == "geom_mean") val <- NA # 1/3_pre_LT, 1/3_pre_GT
    if (ltr_blq_ratio > 1 / 3) {
      if (stat != "geom_mean") na_str <- "ND" # 1/3_pre_GT, 1/3_post_GT
      if (!post && !stat %in% c("median", "max")) val <- NA # 1/3_pre_GT
      if (post && !stat %in% c("median", "max", "geom_mean")) val <- NA # 1/3_post_GT
    }
  } else if (imp_rule == "1/2") {
    if (ltr_blq_ratio > 1 / 2 && !stat == "max") {
      val <- NA # 1/2_GT
      na_str <- "ND" # 1/2_GT
    }
  }

  list(val = val, na_str = na_str)
}

#' Helper Function to create a new `SMQ` variable in `ADAE` by stacking `SMQ` and/or `CQ` records.
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper Function to create a new `SMQ` variable in `ADAE` that consists of all adverse events belonging to
#' selected Standardized/Customized queries. The new dataset will only contain records of the adverse events
#' belonging to any of the selected baskets. Remember that `na_str` must match the needed pre-processing
#' done with [df_explicit_na()] to have the desired output.
#'
#' @inheritParams argument_convention
#' @param baskets (`character`)\cr variable names of the selected Standardized/Customized queries.
#' @param smq_varlabel (`string`)\cr a label for the new variable created.
#' @param keys (`character`)\cr names of the key variables to be returned along with the new variable created.
#' @param aag_summary (`data.frame`)\cr containing the `SMQ` baskets and the levels of interest for the final `SMQ`
#'   variable. This is useful when there are some levels of interest that are not observed in the `df` dataset.
#'   The two columns of this dataset should be named `basket` and `basket_name`.
#'
#' @return `data.frame` with variables in `keys` taken from `df` and new variable `SMQ` containing
#'   records belonging to the baskets selected via the `baskets` argument.
#'
#' @examples
#' adae <- tern_ex_adae[1:20, ] %>% df_explicit_na()
#' h_stack_by_baskets(df = adae)
#'
#' aag <- data.frame(
#'   NAMVAR = c("CQ01NAM", "CQ02NAM", "SMQ01NAM", "SMQ02NAM"),
#'   REFNAME = c(
#'     "D.2.1.5.3/A.1.1.1.1 AESI", "X.9.9.9.9/Y.8.8.8.8 AESI",
#'     "C.1.1.1.3/B.2.2.3.1 AESI", "C.1.1.1.3/B.3.3.3.3 AESI"
#'   ),
#'   SCOPE = c("", "", "BROAD", "BROAD"),
#'   stringsAsFactors = FALSE
#' )
#'
#' basket_name <- character(nrow(aag))
#' cq_pos <- grep("^(CQ).+NAM$", aag$NAMVAR)
#' smq_pos <- grep("^(SMQ).+NAM$", aag$NAMVAR)
#' basket_name[cq_pos] <- aag$REFNAME[cq_pos]
#' basket_name[smq_pos] <- paste0(
#'   aag$REFNAME[smq_pos], "(", aag$SCOPE[smq_pos], ")"
#' )
#'
#' aag_summary <- data.frame(
#'   basket = aag$NAMVAR,
#'   basket_name = basket_name,
#'   stringsAsFactors = TRUE
#' )
#'
#' result <- h_stack_by_baskets(df = adae, aag_summary = aag_summary)
#' all(levels(aag_summary$basket_name) %in% levels(result$SMQ))
#'
#' h_stack_by_baskets(
#'   df = adae,
#'   aag_summary = NULL,
#'   keys = c("STUDYID", "USUBJID", "AEDECOD", "ARM"),
#'   baskets = "SMQ01NAM"
#' )
#'
#' @export
h_stack_by_baskets <- function(df,
                               baskets = grep("^(SMQ|CQ).+NAM$", names(df), value = TRUE),
                               smq_varlabel = "Standardized MedDRA Query",
                               keys = c("STUDYID", "USUBJID", "ASTDTM", "AEDECOD", "AESEQ"),
                               aag_summary = NULL,
                               na_level = lifecycle::deprecated(),
                               na_str = "<Missing>") {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "h_stack_by_baskets(na_level)", "h_stack_by_baskets(na_str)")
    na_str <- na_level
  }

  smq_nam <- baskets[startsWith(baskets, "SMQ")]
  # SC corresponding to NAM
  smq_sc <- gsub(pattern = "NAM", replacement = "SC", x = smq_nam, fixed = TRUE)
  smq <- stats::setNames(smq_sc, smq_nam)

  checkmate::assert_character(baskets)
  checkmate::assert_string(smq_varlabel)
  checkmate::assert_data_frame(df)
  checkmate::assert_true(all(startsWith(baskets, "SMQ") | startsWith(baskets, "CQ")))
  checkmate::assert_true(all(endsWith(baskets, "NAM")))
  checkmate::assert_subset(baskets, names(df))
  checkmate::assert_subset(keys, names(df))
  checkmate::assert_subset(smq_sc, names(df))
  checkmate::assert_string(na_str)

  if (!is.null(aag_summary)) {
    assert_df_with_variables(
      df = aag_summary,
      variables = list(val = c("basket", "basket_name"))
    )
    # Warning in case there is no match between `aag_summary$basket` and `baskets` argument.
    # Honestly, I think those should completely match. Target baskets should be the same.
    if (length(intersect(baskets, unique(aag_summary$basket))) == 0) {
      warning("There are 0 baskets in common between aag_summary$basket and `baskets` argument.")
    }
  }

  var_labels <- c(formatters::var_labels(df[, keys]), "SMQ" = smq_varlabel)

  # convert `na_str` records from baskets to NA for the later loop and from wide to long steps
  df[, c(baskets, smq_sc)][df[, c(baskets, smq_sc)] == na_str] <- NA

  if (all(is.na(df[, baskets]))) { # in case there is no level for the target baskets
    df_long <- df[-seq_len(nrow(df)), keys] # we just need an empty dataframe keeping all factor levels
  } else {
    # Concatenate SMQxxxNAM with corresponding SMQxxxSC
    df_cnct <- df[, c(keys, baskets[startsWith(baskets, "CQ")])]

    for (nam in names(smq)) {
      sc <- smq[nam] # SMQxxxSC corresponding to SMQxxxNAM
      nam_notna <- !is.na(df[[nam]])
      new_colname <- paste(nam, sc, sep = "_")
      df_cnct[nam_notna, new_colname] <- paste0(df[[nam]], "(", df[[sc]], ")")[nam_notna]
    }

    df_cnct$unique_id <- seq(1, nrow(df_cnct))
    var_cols <- names(df_cnct)[!(names(df_cnct) %in% c(keys, "unique_id"))]
    # have to convert df_cnct from tibble to dataframe
    # as it throws a warning otherwise about rownames.
    # tibble do not support rownames and reshape creates rownames

    df_long <- stats::reshape(
      data = as.data.frame(df_cnct),
      varying = var_cols,
      v.names = "SMQ",
      idvar = names(df_cnct)[names(df_cnct) %in% c(keys, "unique_id")],
      direction = "long",
      new.row.names = seq(prod(length(var_cols), nrow(df_cnct)))
    )

    df_long <- df_long[!is.na(df_long[, "SMQ"]), !(names(df_long) %in% c("time", "unique_id"))]
    df_long$SMQ <- as.factor(df_long$SMQ)
  }

  smq_levels <- setdiff(levels(df_long[["SMQ"]]), na_str)

  if (!is.null(aag_summary)) {
    # A warning in case there is no match between df and aag_summary records
    if (length(intersect(smq_levels, unique(aag_summary$basket_name))) == 0) {
      warning("There are 0 basket levels in common between aag_summary$basket_name and df.")
    }
    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(
        c(
          smq_levels,
          setdiff(unique(aag_summary$basket_name), smq_levels)
        )
      )
    )
  } else {
    all_na_basket_flag <- vapply(df[, baskets], function(x) {
      all(is.na(x))
    }, FUN.VALUE = logical(1))
    all_na_basket <- baskets[all_na_basket_flag]

    df_long[["SMQ"]] <- factor(
      df_long[["SMQ"]],
      levels = sort(c(smq_levels, all_na_basket))
    )
  }
  formatters::var_labels(df_long) <- var_labels
  tibble::tibble(df_long)
}

#' Patient Counts with Abnormal Range Values by Baseline Status
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`), and additional
#' analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or `factor`). For each
#' direction specified in `abnormal` (e.g. high or low) we condition on baseline range result and count
#' patients in the numerator and denominator as follows:
#'   * `Not <Abnormal>`
#'     * `denom`: the number of patients without abnormality at baseline (excluding those with missing baseline)
#'     * `num`:  the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `<Abnormal>`
#'     * `denom`: the number of patients with abnormality at baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'   * `Total`
#'     * `denom`: the number of patients with at least one valid measurement post-baseline
#'     * `num`: the number of patients in `denom` who also have at least one abnormality post-baseline
#'
#' @inheritParams argument_convention
#' @param abnormal (`character`)\cr identifying the abnormal range level(s) in `.var`.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal_by_baseline")`
#'   to see available statistics for this function.
#'
#' @note
#' * `df` should be filtered to include only post-baseline records.
#' * If the baseline variable or analysis variable contains `NA`, it is expected that `NA` has been
#'   conveyed to `na_level` appropriately beforehand with [df_explicit_na()] or [explicit_na()].
#'
#' @seealso Relevant description function [d_count_abnormal_by_baseline()].
#'
#' @name abnormal_by_baseline
#' @order 1
NULL

#' Description Function for [s_count_abnormal_by_baseline()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Description function that produces the labels for [s_count_abnormal_by_baseline()].
#'
#' @inheritParams abnormal_by_baseline
#'
#' @return Abnormal category labels for [s_count_abnormal_by_baseline()].
#'
#' @examples
#' d_count_abnormal_by_baseline("LOW")
#'
#' @export
d_count_abnormal_by_baseline <- function(abnormal) {
  not_abn_name <- paste("Not", tolower(abnormal))
  abn_name <- paste0(toupper(substr(abnormal, 1, 1)), tolower(substring(abnormal, 2)))
  total_name <- "Total"

  list(
    not_abnormal = not_abn_name,
    abnormal = abn_name,
    total = total_name
  )
}

#' @describeIn abnormal_by_baseline Statistics function for a single `abnormal` level.
#'
#' @param na_str (`string`)\cr the explicit `na_level` argument you used in the pre-processing steps (maybe with
#'   [df_explicit_na()]). The default is `"<Missing>"`.
#'
#' @return
#' * `s_count_abnormal_by_baseline()` returns statistic `fraction` which is a named list with 3 labeled elements:
#'   `not_abnormal`, `abnormal`, and `total`. Each element contains a vector with `num` and `denom` patient counts.
#'
#' @keywords internal
s_count_abnormal_by_baseline <- function(df,
                                         .var,
                                         abnormal,
                                         na_level = lifecycle::deprecated(),
                                         na_str = "<Missing>",
                                         variables = list(id = "USUBJID", baseline = "BNRIND")) {
  if (lifecycle::is_present(na_level)) {
    lifecycle::deprecate_warn("0.9.1", "s_count_abnormal_by_baseline(na_level)", "s_count_abnormal_by_baseline(na_str)")
    na_str <- na_level
  }

  checkmate::assert_string(.var)
  checkmate::assert_string(abnormal)
  checkmate::assert_string(na_str)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_subset(names(variables), c("id", "baseline"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))

  # If input is passed as character, changed to factor
  df[[.var]] <- as_factor_keep_attributes(df[[.var]], na_level = na_str)
  df[[variables$baseline]] <- as_factor_keep_attributes(df[[variables$baseline]], na_level = na_str)

  assert_valid_factor(df[[.var]], any.missing = FALSE)
  assert_valid_factor(df[[variables$baseline]], any.missing = FALSE)

  # Keep only records with valid analysis value.
  df <- df[df[[.var]] != na_str, ]

  anl <- data.frame(
    id = df[[variables$id]],
    var = df[[.var]],
    baseline = df[[variables$baseline]],
    stringsAsFactors = FALSE
  )

  # Total:
  #  - Patients in denominator: have at least one valid measurement post-baseline.
  #  - Patients in numerator: have at least one abnormality.
  total_denom <- length(unique(anl$id))
  total_num <- length(unique(anl$id[anl$var == abnormal]))

  # Baseline NA records are counted only in total rows.
  anl <- anl[anl$baseline != na_str, ]

  # Abnormal:
  #   - Patients in denominator: have abnormality at baseline.
  #   - Patients in numerator: have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  abn_denom <- length(unique(anl$id[anl$baseline == abnormal]))
  abn_num <- length(unique(anl$id[anl$baseline == abnormal & anl$var == abnormal]))

  # Not abnormal:
  #   - Patients in denominator: do not have abnormality at baseline.
  #   - Patients in numerator: do not have abnormality at baseline AND
  #     have at least one abnormality post-baseline.
  not_abn_denom <- length(unique(anl$id[anl$baseline != abnormal]))
  not_abn_num <- length(unique(anl$id[anl$baseline != abnormal & anl$var == abnormal]))

  labels <- d_count_abnormal_by_baseline(abnormal)
  list(fraction = list(
    not_abnormal = formatters::with_label(c(num = not_abn_num, denom = not_abn_denom), labels$not_abnormal),
    abnormal = formatters::with_label(c(num = abn_num, denom = abn_denom), labels$abnormal),
    total = formatters::with_label(c(num = total_num, denom = total_denom), labels$total)
  ))
}

#' @describeIn abnormal_by_baseline Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_baseline()`.
#'
#' @return
#' * `a_count_abnormal_by_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_by_baseline <- make_afun(
  s_count_abnormal_by_baseline,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal_by_baseline Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_baseline()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_baseline()` to the table layout.
#'
#' @examples
#' df <- data.frame(
#'   USUBJID = as.character(c(1:6)),
#'   ANRIND = factor(c(rep("LOW", 4), "NORMAL", "HIGH")),
#'   BNRIND = factor(c("LOW", "NORMAL", "HIGH", NA, "LOW", "NORMAL"))
#' )
#' df <- df_explicit_na(df)
#'
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal_by_baseline(var = "ANRIND", abnormal = c(High = "HIGH")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 2, 3, 4)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BLRANGE = factor(c("LOW", "HIGH", "HIGH", "NORMAL"))
#' )
#'
#' basic_table() %>%
#'   count_abnormal_by_baseline(
#'     var = "RANGE",
#'     abnormal = c(Low = "LOW"),
#'     variables = list(id = "ID", baseline = "BLRANGE"),
#'     .formats = c(fraction = "xx / xx"),
#'     .indent_mods = c(fraction = 2L)
#'   ) %>%
#'   build_table(df2)
#'
#' @export
#' @order 2
count_abnormal_by_baseline <- function(lyt,
                                       var,
                                       abnormal,
                                       variables = list(id = "USUBJID", baseline = "BNRIND"),
                                       na_str = "<Missing>",
                                       nested = TRUE,
                                       ...,
                                       table_names = abnormal,
                                       .stats = NULL,
                                       .formats = NULL,
                                       .labels = NULL,
                                       .indent_mods = NULL) {
  checkmate::assert_character(abnormal, len = length(table_names), names = "named")
  checkmate::assert_string(var)

  extra_args <- list(abnormal = abnormal, variables = variables, na_str = na_str, ...)

  afun <- make_afun(
    a_count_abnormal_by_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )
  for (i in seq_along(abnormal)) {
    extra_args[["abnormal"]] <- abnormal[i]

    lyt <- analyze(
      lyt = lyt,
      vars = var,
      var_labels = names(abnormal[i]),
      afun = afun,
      na_str = na_str,
      nested = nested,
      table_names = table_names[i],
      extra_args = extra_args,
      show_labels = "visible"
    )
  }
  lyt
}

#' Formatting Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' See below for the list of formatting functions created in `tern` to work with `rtables`.
#'
#' Other available formats can be listed via [`formatters::list_valid_format_labels()`]. Additional
#' custom formats can be created via the [`formatters::sprintf_format()`] function.
#'
#' @family formatting functions
#' @name formatting_functions
NULL

#' Formatting Fraction and Percentage
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction(x = c(num = 2L, denom = 3L))
#' format_fraction(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", round(x["num"] / x["denom"] * 100, 1), "%)"
    )
  }

  return(result)
}

#' Formatting Fraction and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction together with ratio in percent with fixed single decimal place.
#' Includes trailing zero in case of whole number percentages to always keep one decimal place.
#'
#' @param x (`integer`)\cr with elements `num` and `denom`.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `num / denom (ratio %)`. If `num` is 0, the format is `num / denom`.
#'
#' @examples
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 2L))
#' format_fraction_fixed_dp(x = c(num = 1L, denom = 4L))
#' format_fraction_fixed_dp(x = c(num = 0L, denom = 3L))
#'
#' @family formatting functions
#' @export
format_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL
  checkmate::assert_vector(x)
  checkmate::assert_count(x["num"])
  checkmate::assert_count(x["denom"])

  result <- if (x["num"] == 0) {
    paste0(x["num"], "/", x["denom"])
  } else {
    paste0(
      x["num"], "/", x["denom"],
      " (", sprintf("%.1f", round(x["num"] / x["denom"] * 100, 1)), "%)"
    )
  }
  return(result)
}

#' Formatting Count and Fraction
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction(x = c(2, 0.6667))
#' format_count_fraction(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else {
    paste0(x[1], " (", round(x[2] * 100, 1), "%)")
  }

  return(result)
}

#' Formatting Count and Percentage with Fixed Single Decimal Place
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' Formats a count together with fraction with special consideration when count is `0`.
#'
#' @param x (`integer`)\cr vector of length 2, count and fraction.
#' @param ... required for `rtables` interface.
#'
#' @return A string in the format `count (fraction %)`. If `count` is 0, the format is `0`.
#'
#' @examples
#' format_count_fraction_fixed_dp(x = c(2, 0.6667))
#' format_count_fraction_fixed_dp(x = c(2, 0.5))
#' format_count_fraction_fixed_dp(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_count_fraction_fixed_dp <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] == 0) {
    "0"
  } else if (x[2] == 1) {
    sprintf("%d (100%%)", x[1])
  } else {
    sprintf("%d (%.1f%%)", x[1], x[2] * 100)
  }

  return(result)
}

#' Formatting Count and Fraction with Special Case for Count < 10
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a count together with fraction with special consideration when count is less than 10.
#'
#' @inheritParams format_count_fraction
#'
#' @return A string in the format `count (fraction %)`. If `count` is less than 10, only `count` is printed.
#'
#' @examples
#' format_count_fraction_lt10(x = c(275, 0.9673))
#' format_count_fraction_lt10(x = c(2, 0.6667))
#' format_count_fraction_lt10(x = c(9, 1))
#'
#' @family formatting functions
#' @export
format_count_fraction_lt10 <- function(x, ...) {
  attr(x, "label") <- NULL

  if (any(is.na(x))) {
    return("NA")
  }

  checkmate::assert_vector(x)
  checkmate::assert_integerish(x[1])
  assert_proportion_value(x[2], include_boundaries = TRUE)

  result <- if (x[1] < 10) {
    paste0(x[1])
  } else {
    paste0(x[1], " (", round(x[2] * 100, 1), "%)")
  }

  return(result)
}

#' Formatting: XX as Formatting Function
#'
#' Translate a string where x and dots are interpreted as number place
#' holders, and others as formatting elements.
#'
#' @param str (`string`)\cr template.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' test <- list(c(1.658, 0.5761), c(1e1, 785.6))
#'
#' z <- format_xx("xx (xx.x)")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x - xx.x")
#' sapply(test, z)
#'
#' z <- format_xx("xx.x, incl. xx.x% NE")
#' sapply(test, z)
#'
#' @family formatting functions
#' @export
format_xx <- function(str) {
  # Find position in the string.
  positions <- gregexpr(pattern = "x+\\.x+|x+", text = str, perl = TRUE)
  x_positions <- regmatches(x = str, m = positions)[[1]]

  # Roundings depends on the number of x behind [.].
  roundings <- lapply(
    X = x_positions,
    function(x) {
      y <- strsplit(split = "\\.", x = x)[[1]]
      rounding <- function(x) {
        round(x, digits = ifelse(length(y) > 1, nchar(y[2]), 0))
      }
      return(rounding)
    }
  )

  rtable_format <- function(x, output) {
    values <- Map(y = x, fun = roundings, function(y, fun) fun(y))
    regmatches(x = str, m = positions)[[1]] <- values
    return(str)
  }

  return(rtable_format)
}

#' Formatting Numeric Values By Significant Figures
#'
#' Format numeric values to print with a specified number of significant figures.
#'
#' @param sigfig (`integer`)\cr number of significant figures to display.
#' @param format (`character`)\cr the format label (string) to apply when printing the value. Decimal
#'   places in string are ignored in favor of formatting by significant figures. Formats options are:
#'   `"xx"`, `"xx / xx"`, `"(xx, xx)"`, `"xx - xx"`, and `"xx (xx)"`.
#' @param num_fmt (`character`)\cr numeric format modifiers to apply to the value. Defaults to `"fg"` for
#'   standard significant figures formatting - fixed (non-scientific notation) format (`"f"`)
#'   and `sigfig` equal to number of significant figures instead of decimal places (`"g"`). See the
#'   [formatC()] `format` argument for more options.
#'
#' @return An `rtables` formatting function.
#'
#' @examples
#' fmt_3sf <- format_sigfig(3)
#' fmt_3sf(1.658)
#' fmt_3sf(1e1)
#'
#' fmt_5sf <- format_sigfig(5)
#' fmt_5sf(0.57)
#' fmt_5sf(0.000025645)
#'
#' @family formatting functions
#' @export
format_sigfig <- function(sigfig, format = "xx", num_fmt = "fg") {
  checkmate::assert_integerish(sigfig)
  format <- gsub("xx\\.|xx\\.x+", "xx", format)
  checkmate::assert_choice(format, c("xx", "xx / xx", "(xx, xx)", "xx - xx", "xx (xx)"))
  function(x, ...) {
    if (!is.numeric(x)) stop("`format_sigfig` cannot be used for non-numeric values. Please choose another format.")
    num <- formatC(signif(x, digits = sigfig), digits = sigfig, format = num_fmt, flag = "#")
    num <- gsub("\\.$", "", num) # remove trailing "."

    format_value(num, format)
  }
}

#' Formatting Fraction with Lower Threshold
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formats a fraction when the second element of the input `x` is the fraction. It applies
#' a lower threshold, below which it is just stated that the fraction is smaller than that.
#'
#' @param threshold (`proportion`)\cr lower threshold.
#'
#' @return An `rtables` formatting function that takes numeric input `x` where the second
#'   element is the fraction that is formatted. If the fraction is above or equal to the threshold,
#'   then it is displayed in percentage. If it is positive but below the threshold, it returns,
#'   e.g. "<1" if the threshold is `0.01`. If it is zero, then just "0" is returned.
#'
#' @examples
#' format_fun <- format_fraction_threshold(0.05)
#' format_fun(x = c(20, 0.1))
#' format_fun(x = c(2, 0.01))
#' format_fun(x = c(0, 0))
#'
#' @family formatting functions
#' @export
format_fraction_threshold <- function(threshold) {
  assert_proportion_value(threshold)
  string_below_threshold <- paste0("<", round(threshold * 100))
  function(x, ...) {
    assert_proportion_value(x[2], include_boundaries = TRUE)
    ifelse(
      x[2] > 0.01,
      round(x[2] * 100),
      ifelse(
        x[2] == 0,
        "0",
        string_below_threshold
      )
    )
  }
}

#' Formatting Extreme Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' `rtables` formatting functions that handle extreme values.
#'
#' @param digits (`integer`)\cr number of decimal places to display.
#'
#' @details For each input, apply a format to the specified number of `digits`. If the value is
#'    below a threshold, it returns "<0.01" e.g. if the number of `digits` is 2. If the value is
#'    above a threshold, it returns ">999.99" e.g. if the number of `digits` is 2.
#'    If it is zero, then returns "0.00".
#'
#' @family formatting functions
#' @name extreme_format
NULL

#' @describeIn extreme_format Internal helper function to calculate the threshold and create formatted strings
#'  used in Formatting Functions. Returns a list with elements `threshold` and `format_string`.
#'
#' @return
#' * `h_get_format_threshold()` returns a `list` of 2 elements: `threshold`, with `low` and `high` thresholds,
#'   and `format_string`, with thresholds formatted as strings.
#'
#' @examples
#' h_get_format_threshold(2L)
#'
#' @export
h_get_format_threshold <- function(digits = 2L) {
  checkmate::assert_integerish(digits)

  low_threshold <- 1 / (10 ^ digits) # styler: off
  high_threshold <- 1000 - (1 / (10 ^ digits)) # styler: off

  string_below_threshold <- paste0("<", low_threshold)
  string_above_threshold <- paste0(">", high_threshold)

  list(
    "threshold" = c(low = low_threshold, high = high_threshold),
    "format_string" = c(low = string_below_threshold, high = string_above_threshold)
  )
}

#' @describeIn extreme_format Internal helper function to apply a threshold format to a value.
#'   Creates a formatted string to be used in Formatting Functions.
#'
#' @param x (`number`)\cr value to format.
#'
#' @return
#' * `h_format_threshold()` returns the given value, or if the value is not within the digit threshold the relation
#'   of the given value to the digit threshold, as a formatted string.
#'
#' @examples
#' h_format_threshold(0.001)
#' h_format_threshold(1000)
#'
#' @export
h_format_threshold <- function(x, digits = 2L) {
  if (is.na(x)) {
    return(x)
  }

  checkmate::assert_numeric(x, lower = 0)

  l_fmt <- h_get_format_threshold(digits)

  result <- if (x < l_fmt$threshold["low"] && 0 < x) {
    l_fmt$format_string["low"]
  } else if (x > l_fmt$threshold["high"]) {
    l_fmt$format_string["high"]
  } else {
    sprintf(fmt = paste0("%.", digits, "f"), x)
  }

  unname(result)
}

#' Formatting a Single Extreme Value
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Create Formatting Function for a single extreme value.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme value.
#'
#' @examples
#' format_fun <- format_extreme_values(2L)
#' format_fun(x = 0.127)
#' format_fun(x = Inf)
#' format_fun(x = 0)
#' format_fun(x = 0.009)
#'
#' @family formatting functions
#' @export
format_extreme_values <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_scalar(x, na.ok = TRUE)

    h_format_threshold(x = x, digits = digits)
  }
}

#' Formatting Extreme Values Part of a Confidence Interval
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formatting Function for extreme values part of a confidence interval. Values
#' are formatted as e.g. "(xx.xx, xx.xx)" if the number of `digits` is 2.
#'
#' @inheritParams extreme_format
#'
#' @return An `rtables` formatting function that uses threshold `digits` to return a formatted extreme
#'   values confidence interval.
#'
#' @examples
#' format_fun <- format_extreme_values_ci(2L)
#' format_fun(x = c(0.127, Inf))
#' format_fun(x = c(0, 0.009))
#'
#' @family formatting functions
#' @export
format_extreme_values_ci <- function(digits = 2L) {
  function(x, ...) {
    checkmate::assert_vector(x, len = 2)
    l_result <- h_format_threshold(x = x[1], digits = digits)
    h_result <- h_format_threshold(x = x[2], digits = digits)

    paste0("(", l_result, ", ", h_result, ")")
  }
}

#' Automatic formats from data significant digits
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Formatting function for the majority of default methods used in [analyze_vars()].
#' For non-derived values, the significant digits of data is used (e.g. range), while derived
#' values have one more digits (measure of location and dispersion like mean, standard deviation).
#' This function can be called internally with "auto" like, for example,
#' `.formats = c("mean" = "auto")`. See details to see how this works with the inner function.
#'
#' @param dt_var (`numeric`) \cr all the data the statistics was created upon. Used only to find
#'   significant digits. In [analyze_vars] this comes from `.df_row` (see
#'   [rtables::additional_fun_params]), and it is the row data after the above row splits. No
#'   column split is considered.
#' @param x_stat (`string`) \cr string indicating the current statistical method used.
#'
#' @return A string that `rtables` prints in a table cell.
#'
#' @details
#' The internal function is needed to work with `rtables` default structure for
#' format functions, i.e. `function(x, ...)`, where is x are results from statistical evaluation.
#' It can be more than one element (e.g. for `.stats = "mean_sd"`).
#'
#' @examples
#' x_todo <- c(0.001, 0.2, 0.0011000, 3, 4)
#' res <- c(mean(x_todo[1:3]), sd(x_todo[1:3]))
#'
#' # x is the result coming into the formatting function -> res!!
#' format_auto(dt_var = x_todo, x_stat = "mean_sd")(x = res)
#' format_auto(x_todo, "range")(x = range(x_todo))
#' no_sc_x <- c(0.0000001, 1)
#' format_auto(no_sc_x, "range")(x = no_sc_x)
#'
#' @family formatting functions
#' @export
format_auto <- function(dt_var, x_stat) {
  function(x = "", ...) {
    checkmate::assert_numeric(x, min.len = 1)
    checkmate::assert_numeric(dt_var, min.len = 1)
    # Defaults - they may be a param in the future
    der_stats <- c(
      "mean", "sd", "se", "median", "geom_mean", "quantiles", "iqr",
      "mean_sd", "mean_se", "mean_se", "mean_ci", "mean_sei", "mean_sdi",
      "median_ci"
    )
    nonder_stats <- c("n", "range", "min", "max")

    # Safenet for miss-modifications
    stopifnot(length(intersect(der_stats, nonder_stats)) == 0) # nolint
    checkmate::assert_choice(x_stat, c(der_stats, nonder_stats))

    # Finds the max number of digits in data
    detect_dig <- vapply(dt_var, count_decimalplaces, FUN.VALUE = numeric(1)) %>%
      max()

    if (x_stat %in% der_stats) {
      detect_dig <- detect_dig + 1
    }

    # Render input
    str_vals <- formatC(x, digits = detect_dig, format = "f")
    def_fmt <- get_formats_from_stats(x_stat)[[x_stat]]
    str_fmt <- str_extract(def_fmt, invert = FALSE)[[1]]
    if (length(str_fmt) != length(str_vals)) {
      stop(
        "Number of inserted values as result (", length(str_vals),
        ") is not the same as there should be in the default tern formats for ",
        x_stat, " (-> ", def_fmt, " needs ", length(str_fmt), " values). ",
        "See tern_default_formats to check all of them."
      )
    }

    # Squashing them together
    inv_str_fmt <- str_extract(def_fmt, invert = TRUE)[[1]]
    stopifnot(length(inv_str_fmt) == length(str_vals) + 1) # nolint

    out <- vector("character", length = length(inv_str_fmt) + length(str_vals))
    is_even <- seq_along(out) %% 2 == 0
    out[is_even] <- str_vals
    out[!is_even] <- inv_str_fmt

    return(paste0(out, collapse = ""))
  }
}

# Utility function that could be useful in general
str_extract <- function(string, pattern = "xx|xx\\.|xx\\.x+", invert = FALSE) {
  regmatches(string, gregexpr(pattern, string), invert = invert)
}

# Helper function
count_decimalplaces <- function(dec) {
  if (abs(dec - round(dec)) > .Machine$double.eps^0.5) { # For precision
    nchar(strsplit(format(dec, scientific = FALSE, trim = FALSE), ".", fixed = TRUE)[[1]][[2]])
  } else {
    return(0)
  }
}

#' Apply Auto Formatting
#'
#' Checks if any of the listed formats in `.formats` are `"auto"`, and replaces `"auto"` with
#' the correct implementation of `format_auto` for the given statistics, data, and variable.
#'
#' @inheritParams argument_convention
#' @param x_stats (named `list`)\cr a named list of statistics where each element corresponds
#'   to an element in `.formats`, with matching names.
#'
#' @keywords internal
apply_auto_formatting <- function(.formats, x_stats, .df_row, .var) {
  is_auto_fmt <- vapply(.formats, function(ii) is.character(ii) && ii == "auto", logical(1))
  if (any(is_auto_fmt)) {
    auto_stats <- x_stats[is_auto_fmt]
    var_df <- .df_row[[.var]] # xxx this can be extended for the WHOLE data or single facets
    .formats[is_auto_fmt] <- lapply(names(auto_stats), format_auto, dt_var = var_df)
  }
  .formats
}

#' Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams prop_diff_strat_nc
#' @inheritParams argument_convention
#' @param method (`string`)\cr the method used for the confidence interval estimation.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_proportion_diff")`
#'   to see available statistics for this function.
#'
#' @seealso [d_proportion_diff()]
#'
#' @name prop_diff
#' @order 1
NULL

#' @describeIn prop_diff Statistics function estimating the difference
#'   in terms of responder proportion.
#'
#' @return
#' * `s_proportion_diff()` returns a named list of elements `diff` and `diff_ci`.
#'
#' @note When performing an unstratified analysis, methods `"cmh"`, `"strat_newcombe"`, and `"strat_newcombecc"` are
#'   not permitted.
#'
#' @examples
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' # CMH example with strata
#' s_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   variables = list(strata = c("f1", "f2")),
#'   conf_level = 0.90,
#'   method = "cmh"
#' )
#'
#' @export
s_proportion_diff <- function(df,
                              .var,
                              .ref_group,
                              .in_ref_col,
                              variables = list(strata = NULL),
                              conf_level = 0.95,
                              method = c(
                                "waldcc", "wald", "cmh",
                                "ha", "newcombe", "newcombecc",
                                "strat_newcombe", "strat_newcombecc"
                              ),
                              weights_method = "cmh") {
  method <- match.arg(method)
  if (is.null(variables$strata) && checkmate::test_subset(method, c("cmh", "strat_newcombe", "strat_newcombecc"))) {
    stop(paste(
      "When performing an unstratified analysis, methods 'cmh', 'strat_newcombe', and 'strat_newcombecc' are not",
      "permitted. Please choose a different method."
    ))
  }
  y <- list(diff = "", diff_ci = "")

  if (!.in_ref_col) {
    rsp <- c(.ref_group[[.var]], df[[.var]])
    grp <- factor(
      rep(
        c("ref", "Not-ref"),
        c(nrow(.ref_group), nrow(df))
      ),
      levels = c("ref", "Not-ref")
    )

    if (!is.null(variables$strata)) {
      strata_colnames <- variables$strata
      checkmate::assert_character(strata_colnames, null.ok = FALSE)
      strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)

      assert_df_with_variables(df, strata_vars)
      assert_df_with_variables(.ref_group, strata_vars)

      # Merging interaction strata for reference group rows data and remaining
      strata <- c(
        interaction(.ref_group[strata_colnames]),
        interaction(df[strata_colnames])
      )
      strata <- as.factor(strata)
    }

    # Defining the std way to calculate weights for strat_newcombe
    if (!is.null(variables$weights_method)) {
      weights_method <- variables$weights_method
    } else {
      weights_method <- "cmh"
    }

    y <- switch(method,
      "wald" = prop_diff_wald(rsp, grp, conf_level, correct = FALSE),
      "waldcc" = prop_diff_wald(rsp, grp, conf_level, correct = TRUE),
      "ha" = prop_diff_ha(rsp, grp, conf_level),
      "newcombe" = prop_diff_nc(rsp, grp, conf_level, correct = FALSE),
      "newcombecc" = prop_diff_nc(rsp, grp, conf_level, correct = TRUE),
      "strat_newcombe" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = FALSE
      ),
      "strat_newcombecc" = prop_diff_strat_nc(rsp,
        grp,
        strata,
        weights_method,
        conf_level,
        correct = TRUE
      ),
      "cmh" = prop_diff_cmh(rsp, grp, strata, conf_level)[c("diff", "diff_ci")]
    )

    y$diff <- y$diff * 100
    y$diff_ci <- y$diff_ci * 100
  }

  attr(y$diff, "label") <- "Difference in Response rate (%)"
  attr(y$diff_ci, "label") <- d_proportion_diff(
    conf_level, method,
    long = FALSE
  )

  y
}

#' @describeIn prop_diff Formatted analysis function which is used as `afun` in `estimate_proportion_diff()`.
#'
#' @return
#' * `a_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' a_proportion_diff(
#'   df = subset(dta, grp == "A"),
#'   .var = "rsp",
#'   .ref_group = subset(dta, grp == "B"),
#'   .in_ref_col = FALSE,
#'   conf_level = 0.90,
#'   method = "ha"
#' )
#'
#' @export
a_proportion_diff <- make_afun(
  s_proportion_diff,
  .formats = c(diff = "xx.x", diff_ci = "(xx.x, xx.x)"),
  .indent_mods = c(diff = 0L, diff_ci = 1L)
)

#' @describeIn prop_diff Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `estimate_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_proportion_diff()` to the table layout.
#'
#' @examples
#' ## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
#' nex <- 100 # Number of example rows
#' dta <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
#'   "grp" = sample(c("A", "B"), nex, TRUE),
#'   "f1" = sample(c("a1", "a2"), nex, TRUE),
#'   "f2" = sample(c("x", "y", "z"), nex, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' l <- basic_table() %>%
#'   split_cols_by(var = "grp", ref_group = "B") %>%
#'   estimate_proportion_diff(
#'     vars = "rsp",
#'     conf_level = 0.90,
#'     method = "ha"
#'   )
#'
#' build_table(l, df = dta)
#'
#' @export
#' @order 2
estimate_proportion_diff <- function(lyt,
                                     vars,
                                     variables = list(strata = NULL),
                                     conf_level = 0.95,
                                     method = c(
                                       "waldcc", "wald", "cmh",
                                       "ha", "newcombe", "newcombecc",
                                       "strat_newcombe", "strat_newcombecc"
                                     ),
                                     weights_method = "cmh",
                                     na_str = default_na_str(),
                                     nested = TRUE,
                                     ...,
                                     var_labels = vars,
                                     show_labels = "hidden",
                                     table_names = vars,
                                     .stats = NULL,
                                     .formats = NULL,
                                     .labels = NULL,
                                     .indent_mods = NULL) {
  extra_args <- list(
    variables = variables, conf_level = conf_level, method = method, weights_method = weights_method, ...
  )

  afun <- make_afun(
    a_proportion_diff,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    var_labels = var_labels,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = show_labels,
    table_names = table_names
  )
}

#' Check: Proportion Difference Arguments
#'
#' Verifies that and/or convert arguments into valid values to be used in the
#' estimation of difference in responder proportions.
#'
#' @inheritParams prop_diff
#' @inheritParams prop_diff_wald
#'
#' @keywords internal
check_diff_prop_ci <- function(rsp,
                               grp,
                               strata = NULL,
                               conf_level,
                               correct = NULL) {
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct, null.ok = TRUE)

  if (!is.null(strata)) {
    checkmate::assert_factor(strata, len = length(rsp))
  }

  invisible()
}

#' Description of Method Used for Proportion Comparison
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function that describes the analysis in
#' `s_proportion_diff`.
#'
#' @inheritParams s_proportion_diff
#' @param long (`logical`)\cr Whether a long or a short (default) description is required.
#'
#' @return A `string` describing the analysis.
#'
#' @seealso [prop_diff]
#'
#' @export
d_proportion_diff <- function(conf_level,
                              method,
                              long = FALSE) {
  label <- paste0(conf_level * 100, "% CI")
  if (long) {
    label <- paste(
      label,
      ifelse(
        method == "cmh",
        "for adjusted difference",
        "for difference"
      )
    )
  }

  method_part <- switch(method,
    "cmh" = "CMH, without correction",
    "waldcc" = "Wald, with correction",
    "wald" = "Wald, without correction",
    "ha" = "Anderson-Hauck",
    "newcombe" = "Newcombe, without correction",
    "newcombecc" = "Newcombe, with correction",
    "strat_newcombe" = "Stratified Newcombe, without correction",
    "strat_newcombecc" = "Stratified Newcombe, with correction",
    stop(paste(method, "does not have a description"))
  )
  paste0(label, " (", method_part, ")")
}

#' Helper Functions to Calculate Proportion Difference
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams argument_convention
#' @inheritParams prop_diff
#' @param grp (`factor`)\cr vector assigning observations to one out of two groups
#'   (e.g. reference and treatment group).
#'
#' @return A named `list` of elements `diff` (proportion difference) and `diff_ci`
#'   (proportion difference confidence interval).
#'
#' @seealso [prop_diff()] for implementation of these helper functions.
#'
#' @name h_prop_diff
NULL

#' @describeIn h_prop_diff The Wald interval follows the usual textbook
#'   definition for a single proportion confidence interval using the normal
#'   approximation. It is possible to include a continuity correction for Wald's
#'   interval.
#'
#' @param correct (`logical`)\cr whether to include the continuity correction. For further
#'   information, see [stats::prop.test()].
#'
#' @examples
#' # Wald confidence interval
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
#' grp <- factor(c(rep("A", 10), rep("B", 10)))
#'
#' prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
#'
#' @export
prop_diff_wald <- function(rsp,
                           grp,
                           conf_level = 0.95,
                           correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "waldcc"
  } else {
    mthd <- "wald"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, correct = correct
  )

  # check if binary response is coded as logical
  checkmate::assert_logical(rsp, any.missing = FALSE)
  checkmate::assert_factor(grp, len = length(rsp), any.missing = FALSE, n.levels = 2)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  diff_ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )

  list(
    "diff" = unname(diff_ci[, "est"]),
    "diff_ci" = unname(diff_ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Anderson-Hauck confidence interval.
#'
#' @examples
#' # Anderson-Hauck confidence interval
#' ## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
#' rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
#' grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
#'
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
#'
#' ## Edge case: Same proportion of response in A and B.
#' rsp <- c(TRUE, FALSE, TRUE, FALSE)
#' grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
#'
#' prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
#'
#' @export
prop_diff_ha <- function(rsp,
                         grp,
                         conf_level) {
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  # x1 and n1 are non-reference groups.
  ci <- desctools_binom(
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = "ha"
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff `Newcombe` confidence interval. It is based on
#'   the Wilson score confidence interval for a single binomial proportion.
#'
#' @examples
#' # `Newcombe` confidence interval
#'
#' set.seed(1)
#' rsp <- c(
#'   sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
#'   sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
#' )
#' grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
#' table(rsp, grp)
#'
#' prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
#'
#' @export
prop_diff_nc <- function(rsp,
                         grp,
                         conf_level,
                         correct = FALSE) {
  if (isTRUE(correct)) {
    mthd <- "scorecc"
  } else {
    mthd <- "score"
  }
  grp <- as_factor_keep_attributes(grp)
  check_diff_prop_ci(rsp = rsp, grp = grp, conf_level = conf_level)

  p_grp <- tapply(rsp, grp, mean)
  diff_p <- unname(diff(p_grp))
  tbl <- table(grp, factor(rsp, levels = c(TRUE, FALSE)))
  ci <- desctools_binom(
    # x1 and n1 are non-reference groups.
    x1 = tbl[2], n1 = sum(tbl[2], tbl[4]),
    x2 = tbl[1], n2 = sum(tbl[1], tbl[3]),
    conf.level = conf_level,
    method = mthd
  )
  list(
    "diff" = unname(ci[, "est"]),
    "diff_ci" = unname(ci[, c("lwr.ci", "upr.ci")])
  )
}

#' @describeIn h_prop_diff Calculates the weighted difference. This is defined as the difference in
#'   response rates between the experimental treatment group and the control treatment group, adjusted
#'   for stratification factors by applying `Cochran-Mantel-Haenszel` (`CMH`) weights. For the `CMH` chi-squared
#'   test, use [stats::mantelhaen.test()].
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#'
#' @examples
#' # Cochran-Mantel-Haenszel confidence interval
#'
#' set.seed(2)
#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
#' grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
#' grp <- factor(grp, levels = c("Placebo", "Treatment"))
#' strata_data <- data.frame(
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_cmh(
#'   rsp = rsp, grp = grp, strata = interaction(strata_data),
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_cmh <- function(rsp,
                          grp,
                          strata,
                          conf_level = 0.95) {
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )

  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  # first dimension: FALSE, TRUE
  # 2nd dimension: CONTROL, TX
  # 3rd dimension: levels of strat
  # rsp as factor rsp to handle edge case of no FALSE (or TRUE) rsp records
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n1 <- colSums(t_tbl[1:2, 1, ])
  n2 <- colSums(t_tbl[1:2, 2, ])
  p1 <- t_tbl[2, 1, ] / n1
  p2 <- t_tbl[2, 2, ] / n2
  # CMH weights
  use_stratum <- (n1 > 0) & (n2 > 0)
  n1 <- n1[use_stratum]
  n2 <- n2[use_stratum]
  p1 <- p1[use_stratum]
  p2 <- p2[use_stratum]
  wt <- (n1 * n2 / (n1 + n2))
  wt_normalized <- wt / sum(wt)
  est1 <- sum(wt_normalized * p1)
  est2 <- sum(wt_normalized * p2)
  estimate <- c(est1, est2)
  names(estimate) <- levels(grp)
  se1 <- sqrt(sum(wt_normalized^2 * p1 * (1 - p1) / n1))
  se2 <- sqrt(sum(wt_normalized^2 * p2 * (1 - p2) / n2))
  z <- stats::qnorm((1 + conf_level) / 2)
  err1 <- z * se1
  err2 <- z * se2
  ci1 <- c((est1 - err1), (est1 + err1))
  ci2 <- c((est2 - err2), (est2 + err2))
  estimate_ci <- list(ci1, ci2)
  names(estimate_ci) <- levels(grp)
  diff_est <- est2 - est1
  se_diff <- sqrt(sum(((p1 * (1 - p1) / n1) + (p2 * (1 - p2) / n2)) * wt_normalized^2))
  diff_ci <- c(diff_est - z * se_diff, diff_est + z * se_diff)

  list(
    prop = estimate,
    prop_ci = estimate_ci,
    diff = diff_est,
    diff_ci = diff_ci,
    weights = wt_normalized,
    n1 = n1,
    n2 = n2
  )
}

#' @describeIn h_prop_diff Calculates the stratified `Newcombe` confidence interval and difference in response
#'   rates between the experimental treatment group and the control treatment group, adjusted for stratification
#'   factors. This implementation follows closely the one proposed by \insertCite{Yan2010-jt;textual}{tern}.
#'   Weights can be estimated from the heuristic proposed in [prop_strat_wilson()] or from `CMH`-derived weights
#'   (see [prop_diff_cmh()]).
#'
#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
#' @param weights_method (`string`)\cr weights method. Can be either `"cmh"` or `"heuristic"`
#'   and directs the way weights are estimated.
#'
#' @references
#' \insertRef{Yan2010-jt}{tern}
#'
#' @examples
#' # Stratified `Newcombe` confidence interval
#'
#' set.seed(2)
#' data_set <- data.frame(
#'   "rsp" = sample(c(TRUE, FALSE), 100, TRUE),
#'   "f1" = sample(c("a", "b"), 100, TRUE),
#'   "f2" = sample(c("x", "y", "z"), 100, TRUE),
#'   "grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
#'   stringsAsFactors = TRUE
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "cmh",
#'   conf_level = 0.90
#' )
#'
#' prop_diff_strat_nc(
#'   rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
#'   weights_method = "wilson_h",
#'   conf_level = 0.90
#' )
#'
#' @export
prop_diff_strat_nc <- function(rsp,
                               grp,
                               strata,
                               weights_method = c("cmh", "wilson_h"),
                               conf_level = 0.95,
                               correct = FALSE) {
  weights_method <- match.arg(weights_method)
  grp <- as_factor_keep_attributes(grp)
  strata <- as_factor_keep_attributes(strata)
  check_diff_prop_ci(
    rsp = rsp, grp = grp, conf_level = conf_level, strata = strata
  )
  checkmate::assert_number(conf_level, lower = 0, upper = 1)
  checkmate::assert_flag(correct)
  if (any(tapply(rsp, strata, length) < 5)) {
    warning("Less than 5 observations in some strata.")
  }

  rsp_by_grp <- split(rsp, f = grp)
  strata_by_grp <- split(strata, f = grp)

  # Finding the weights
  weights <- if (identical(weights_method, "cmh")) {
    prop_diff_cmh(rsp = rsp, grp = grp, strata = strata)$weights
  } else if (identical(weights_method, "wilson_h")) {
    prop_strat_wilson(rsp, strata, conf_level = conf_level, correct = correct)$weights
  }
  weights[levels(strata)[!levels(strata) %in% names(weights)]] <- 0

  # Calculating lower (`l`) and upper (`u`) confidence bounds per group.
  strat_wilson_by_grp <- Map(
    prop_strat_wilson,
    rsp = rsp_by_grp,
    strata = strata_by_grp,
    weights = list(weights, weights),
    conf_level = conf_level,
    correct = correct
  )

  ci_ref <- strat_wilson_by_grp[[1]]
  ci_trt <- strat_wilson_by_grp[[2]]
  l_ref <- as.numeric(ci_ref$conf_int[1])
  u_ref <- as.numeric(ci_ref$conf_int[2])
  l_trt <- as.numeric(ci_trt$conf_int[1])
  u_trt <- as.numeric(ci_trt$conf_int[2])

  # Estimating the diff and n_ref, n_trt (it allows different weights to be used)
  t_tbl <- table(
    factor(rsp, levels = c("FALSE", "TRUE")),
    grp,
    strata
  )
  n_ref <- colSums(t_tbl[1:2, 1, ])
  n_trt <- colSums(t_tbl[1:2, 2, ])
  use_stratum <- (n_ref > 0) & (n_trt > 0)
  n_ref <- n_ref[use_stratum]
  n_trt <- n_trt[use_stratum]
  p_ref <- t_tbl[2, 1, use_stratum] / n_ref
  p_trt <- t_tbl[2, 2, use_stratum] / n_trt
  est1 <- sum(weights * p_ref)
  est2 <- sum(weights * p_trt)
  diff_est <- est2 - est1

  lambda1 <- sum(weights^2 / n_ref)
  lambda2 <- sum(weights^2 / n_trt)
  z <- stats::qnorm((1 + conf_level) / 2)

  lower <- diff_est - z * sqrt(lambda2 * l_trt * (1 - l_trt) + lambda1 * u_ref * (1 - u_ref))
  upper <- diff_est + z * sqrt(lambda1 * l_ref * (1 - l_ref) + lambda2 * u_trt * (1 - u_trt))

  list(
    "diff" = diff_est,
    "diff_ci" = c("lower" = lower, "upper" = upper)
  )
}

#' Occurrence Table Sorting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Functions to score occurrence table subtables and rows which can be used in the
#' sorting of occurrence tables.
#'
#' @name score_occurrences
NULL

#' @describeIn score_occurrences Scoring function which sums the counts across all
#'   columns. It will fail if anything else but counts are used.
#'
#' @inheritParams rtables_access
#'
#' @return
#' * `score_occurrences()` returns the sum of counts across all columns of a table row.
#'
#' @seealso [h_row_first_values()]
#'
#' @examples
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   analyze_num_patients(
#'     vars = "USUBJID",
#'     .stats = c("unique"),
#'     .labels = c("Total number of patients with at least one event")
#'   ) %>%
#'   split_rows_by("AEBODSYS", child_labels = "visible", nested = FALSE) %>%
#'   summarize_num_patients(
#'     var = "USUBJID",
#'     .stats = c("unique", "nonunique"),
#'     .labels = c(
#'       "Total number of patients with at least one event",
#'       "Total number of events"
#'     )
#'   ) %>%
#'   count_occurrences(vars = "AEDECOD")
#'
#' tbl <- build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl) %>%
#'   prune_table()
#'
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_occurrences)
#'
#' tbl_sorted
#'
#' @export
score_occurrences <- function(table_row) {
  row_counts <- h_row_counts(table_row)
  sum(row_counts)
}

#' @describeIn score_occurrences Scoring functions can be produced by this constructor to only include
#'   specific columns in the scoring. See [h_row_counts()] for further information.
#'
#' @inheritParams has_count_in_cols
#'
#' @return
#' * `score_occurrences_cols()` returns a function that sums counts across all specified columns
#'   of a table row.
#'
#' @seealso [h_row_counts()]
#'
#' @examples
#' score_cols_a_and_b <- score_occurrences_cols(col_names = c("A: Drug X", "B: Placebo"))
#'
#' # Note that this here just sorts the AEDECOD inside the AEBODSYS. The AEBODSYS are not sorted.
#' # That would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS", "*", "AEDECOD"), scorefun = score_cols_a_and_b)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_cols <- function(...) {
  function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    sum(row_counts)
  }
}

#' @describeIn score_occurrences Scoring functions produced by this constructor can be used on
#'   subtables: They sum up all specified column counts in the subtable. This is useful when
#'   there is no available content row summing up these counts.
#'
#' @return
#' * `score_occurrences_subtable()` returns a function that sums counts in each subtable
#'   across all specified columns.
#'
#' @examples
#' score_subtable_all <- score_occurrences_subtable(col_names = names(tbl))
#'
#' # Note that this code just sorts the AEBODSYS, not the AEDECOD within AEBODSYS. That
#' # would require a second pass of `sort_at_path`.
#' tbl_sorted <- tbl %>%
#'   sort_at_path(path = c("AEBODSYS"), scorefun = score_subtable_all, decreasing = FALSE)
#'
#' tbl_sorted
#'
#' @export
score_occurrences_subtable <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    table_rows <- collect_leaves(table_tree)
    counts <- vapply(table_rows, score_table_row, numeric(1))
    sum(counts)
  }
}

#' @describeIn score_occurrences Produce score function for sorting table by summing the first content row in
#'   specified columns. Note that this is extending [rtables::cont_n_onecol()] and [rtables::cont_n_allcols()].
#'
#' @return
#' * `score_occurrences_cont_cols()` returns a function that sums counts in the first content row in
#'   specified columns.
#'
#' @export
score_occurrences_cont_cols <- function(...) {
  score_table_row <- score_occurrences_cols(...)
  function(table_tree) {
    if (inherits(table_tree, "ContentRow")) {
      return(NA)
    }
    content_row <- h_content_first_row(table_tree)
    score_table_row(content_row)
  }
}

#' Individual Patient Plots
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Line plot(s) displaying trend in patients' parameter values over time is rendered.
#' Patients' individual baseline values can be added to the plot(s) as reference.
#'
#' @inheritParams argument_convention
#' @param xvar (`string`)\cr time point variable to be plotted on x-axis.
#' @param yvar (`string`)\cr continuous analysis variable to be plotted on y-axis.
#' @param xlab (`string`)\cr plot label for x-axis.
#' @param ylab (`string`)\cr plot label for y-axis.
#' @param id_var (`string`)\cr variable used as patient identifier.
#' @param title (`string`)\cr title for plot.
#' @param subtitle (`string`)\cr subtitle for plot.
#' @param add_baseline_hline (`flag`)\cr adds horizontal line at baseline y-value on
#'   plot when TRUE.
#' @param yvar_baseline (`string`)\cr variable with baseline values only.
#'   Ignored when `add_baseline_hline` is FALSE.
#' @param ggtheme (`theme`)\cr optional graphical theme function as provided
#'   by `ggplot2` to control outlook of plot. Use `ggplot2::theme()` to tweak the display.
#' @param plotting_choices (`character`)\cr specifies options for displaying
#'   plots. Must be one of "all_in_one", "split_by_max_obs", "separate_by_obs".
#' @param max_obs_per_plot (`count`)\cr Number of observations to be plotted on one
#'   plot. Ignored when `plotting_choices` is not "separate_by_obs".
#' @param caption (`character` scalar)\cr optional caption below the plot.
#' @param col (`character`)\cr lines colors.
#'
#' @seealso Relevant helper function [h_g_ipp()].
#'
#' @name individual_patient_plot
NULL

#' Helper Function To Create Simple Line Plot over Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Function that generates a simple line plot displaying parameter trends over time.
#'
#' @inheritParams argument_convention
#' @inheritParams g_ipp
#'
#' @return A `ggplot` line plot.
#'
#' @seealso [g_ipp()] which uses this function.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' p <- h_g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   id_var = "USUBJID",
#'   ylab = "SGOT/ALT (U/L)",
#'   add_baseline_hline = TRUE
#' )
#' p
#'
#' @export
h_g_ipp <- function(df,
                    xvar,
                    yvar,
                    xlab,
                    ylab,
                    id_var,
                    title = "Individual Patient Plots",
                    subtitle = "",
                    caption = NULL,
                    add_baseline_hline = FALSE,
                    yvar_baseline = "BASE",
                    ggtheme = nestcolor::theme_nest(),
                    col = NULL) {
  checkmate::assert_string(xvar)
  checkmate::assert_string(yvar)
  checkmate::assert_string(yvar_baseline)
  checkmate::assert_string(id_var)
  checkmate::assert_string(xlab)
  checkmate::assert_string(ylab)
  checkmate::assert_string(title)
  checkmate::assert_string(subtitle)
  checkmate::assert_subset(c(xvar, yvar, yvar_baseline, id_var), colnames(df))
  checkmate::assert_data_frame(df)
  checkmate::assert_flag(add_baseline_hline)
  checkmate::assert_character(col, null.ok = TRUE)

  p <- ggplot2::ggplot(
    data = df,
    mapping = ggplot2::aes(
      x = .data[[xvar]],
      y = .data[[yvar]],
      group = .data[[id_var]],
      colour = .data[[id_var]]
    )
  ) +
    ggplot2::geom_line(linewidth = 0.4) +
    ggplot2::geom_point(size = 2) +
    ggplot2::labs(
      x = xlab,
      y = ylab,
      title = title,
      subtitle = subtitle,
      caption = caption
    ) +
    ggtheme

  if (add_baseline_hline) {
    baseline_df <- df[, c(id_var, yvar_baseline)]
    baseline_df <- unique(baseline_df)

    p <- p +
      ggplot2::geom_hline(
        data = baseline_df,
        mapping = ggplot2::aes(
          yintercept = .data[[yvar_baseline]],
          colour = .data[[id_var]]
        ),
        linetype = "dotdash",
        linewidth = 0.4
      ) +
      ggplot2::geom_text(
        data = baseline_df,
        mapping = ggplot2::aes(
          x = 1,
          y = .data[[yvar_baseline]],
          label = .data[[id_var]],
          colour = .data[[id_var]]
        ),
        nudge_y = 0.025 * (max(df[, yvar], na.rm = TRUE) - min(df[, yvar], na.rm = TRUE)),
        vjust = "right",
        size = 2
      )

    if (!is.null(col)) {
      p <- p +
        ggplot2::scale_color_manual(values = col)
    }
  }
  p
}

#' @describeIn individual_patient_plot Plotting function for individual patient plots which, depending on user
#'   preference, renders a single graphic or compiles a list of graphics that show trends in individual's parameter
#'   values over time.
#'
#' @return A `ggplot` object or a list of `ggplot` objects.
#'
#' @examples
#' library(dplyr)
#' library(nestcolor)
#'
#' # Select a small sample of data to plot.
#' adlb <- tern_ex_adlb %>%
#'   filter(PARAMCD == "ALT", !(AVISIT %in% c("SCREENING", "BASELINE"))) %>%
#'   slice(1:36)
#'
#' plot_list <- g_ipp(
#'   df = adlb,
#'   xvar = "AVISIT",
#'   yvar = "AVAL",
#'   xlab = "Visit",
#'   ylab = "SGOT/ALT (U/L)",
#'   title = "Individual Patient Plots",
#'   add_baseline_hline = TRUE,
#'   plotting_choices = "split_by_max_obs",
#'   max_obs_per_plot = 5
#' )
#' plot_list
#'
#' @export
g_ipp <- function(df,
                  xvar,
                  yvar,
                  xlab,
                  ylab,
                  id_var = "USUBJID",
                  title = "Individual Patient Plots",
                  subtitle = "",
                  caption = NULL,
                  add_baseline_hline = FALSE,
                  yvar_baseline = "BASE",
                  ggtheme = nestcolor::theme_nest(),
                  plotting_choices = c("all_in_one", "split_by_max_obs", "separate_by_obs"),
                  max_obs_per_plot = 4,
                  col = NULL) {
  checkmate::assert_count(max_obs_per_plot)
  checkmate::assert_subset(plotting_choices, c("all_in_one", "split_by_max_obs", "separate_by_obs"))
  checkmate::assert_character(col, null.ok = TRUE)

  plotting_choices <- match.arg(plotting_choices)

  if (plotting_choices == "all_in_one") {
    p <- h_g_ipp(
      df = df,
      xvar = xvar,
      yvar = yvar,
      xlab = xlab,
      ylab = ylab,
      id_var = id_var,
      title = title,
      subtitle = subtitle,
      caption = caption,
      add_baseline_hline = add_baseline_hline,
      yvar_baseline = yvar_baseline,
      ggtheme = ggtheme,
      col = col
    )

    return(p)
  } else if (plotting_choices == "split_by_max_obs") {
    id_vec <- unique(df[[id_var]])
    id_list <- split(
      id_vec,
      rep(1:ceiling(length(id_vec) / max_obs_per_plot),
        each = max_obs_per_plot,
        length.out = length(id_vec)
      )
    )

    df_list <- list()
    plot_list <- list()

    for (i in seq_along(id_list)) {
      df_list[[i]] <- df[df[[id_var]] %in% id_list[[i]], ]

      plots <- h_g_ipp(
        df = df_list[[i]],
        xvar = xvar,
        yvar = yvar,
        xlab = xlab,
        ylab = ylab,
        id_var = id_var,
        title = title,
        subtitle = subtitle,
        caption = caption,
        add_baseline_hline = add_baseline_hline,
        yvar_baseline = yvar_baseline,
        ggtheme = ggtheme,
        col = col
      )

      plot_list[[i]] <- plots
    }
    return(plot_list)
  } else {
    ind_df <- split(df, df[[id_var]])
    plot_list <- lapply(
      ind_df,
      function(x) {
        h_g_ipp(
          df = x,
          xvar = xvar,
          yvar = yvar,
          xlab = xlab,
          ylab = ylab,
          id_var = id_var,
          title = title,
          subtitle = subtitle,
          caption = caption,
          add_baseline_hline = add_baseline_hline,
          yvar_baseline = yvar_baseline,
          ggtheme = ggtheme,
          col = col
        )
      }
    )

    return(plot_list)
  }
}

#' Patient Counts with the Most Extreme Post-baseline Toxicity Grade per Direction of Abnormality
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the toxicity grade (`factor`), and additional
#' analysis variables are `id` (`character` or `factor`), `param` (`factor`) and `grade_dir` (`factor`).
#' The pre-processing steps are crucial when using this function.
#' For a certain direction (e.g. high or low) this function counts
#' patients in the denominator as number of patients with at least one valid measurement during treatment,
#' and patients in the numerator as follows:
#'   * `1` to `4`: Numerator is number of patients with worst grades 1-4 respectively;
#'   * `Any`: Numerator is number of patients with at least one abnormality, which means grade is different from 0.
#'
#' Pre-processing is crucial when using this function and can be done automatically using the
#' [h_adlb_abnormal_by_worst_grade()] helper function. See the description of this function for details on the
#' necessary pre-processing steps.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal_by_worst_grade")`
#'   to see available statistics for this function.
#'
#' @seealso [h_adlb_abnormal_by_worst_grade()] which pre-processes `ADLB` data frames to be used in
#'   [count_abnormal_by_worst_grade()].
#'
#' @name abnormal_by_worst_grade
#' @order 1
NULL

#' @describeIn abnormal_by_worst_grade Statistics function which counts patients by worst grade.
#'
#' @return
#' * `s_count_abnormal_by_worst_grade()` returns the single statistic `count_fraction` with grades 1 to 4 and
#'   "Any" results.
#'
#' @keywords internal
s_count_abnormal_by_worst_grade <- function(df, # nolint
                                            .var = "GRADE_ANL",
                                            .spl_context,
                                            variables = list(
                                              id = "USUBJID",
                                              param = "PARAM",
                                              grade_dir = "GRADE_DIR"
                                            )) {
  checkmate::assert_string(.var)
  assert_valid_factor(df[[.var]])
  assert_valid_factor(df[[variables$param]])
  assert_valid_factor(df[[variables$grade_dir]])
  assert_df_with_variables(df, c(a = .var, variables))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  # To verify that the `split_rows_by` are performed with correct variables.
  checkmate::assert_subset(c(variables[["param"]], variables[["grade_dir"]]), .spl_context$split)
  first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
  x_lvls <- c(setdiff(levels(df[[.var]]), "0"), "Any")
  result <- split(numeric(0), factor(x_lvls))

  subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
  subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
  # Some subjects may have a record for high and low directions but
  # should be counted only once.
  denom <- length(unique(subj_cur_col))

  for (lvl in x_lvls) {
    if (lvl != "Any") {
      df_lvl <- df[df[[.var]] == lvl, ]
    } else {
      df_lvl <- df[df[[.var]] != 0, ]
    }
    num <- length(unique(df_lvl[[variables[["id"]]]]))
    fraction <- ifelse(denom == 0, 0, num / denom)
    result[[lvl]] <- formatters::with_label(c(count = num, fraction = fraction), lvl)
  }

  result <- list(count_fraction = result)
  result
}

#' @describeIn abnormal_by_worst_grade Formatted analysis function which is used as `afun`
#'   in `count_abnormal_by_worst_grade()`.
#'
#' @return
#' * `a_count_abnormal_by_worst_grade()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal_by_worst_grade <- make_afun( # nolint
  s_count_abnormal_by_worst_grade,
  .formats = c(count_fraction = format_count_fraction)
)

#' @describeIn abnormal_by_worst_grade Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal_by_worst_grade()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal_by_worst_grade()` to the table layout.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#' adlb <- tern_ex_adlb
#'
#' # Data is modified in order to have some parameters with grades only in one direction
#' # and simulate the real data.
#' adlb$ATOXGR[adlb$PARAMCD == "ALT" & adlb$ATOXGR %in% c("1", "2", "3", "4")] <- "-1"
#' adlb$ANRIND[adlb$PARAMCD == "ALT" & adlb$ANRIND == "HIGH"] <- "LOW"
#' adlb$WGRHIFL[adlb$PARAMCD == "ALT"] <- ""
#'
#' adlb$ATOXGR[adlb$PARAMCD == "IGA" & adlb$ATOXGR %in% c("-1", "-2", "-3", "-4")] <- "1"
#' adlb$ANRIND[adlb$PARAMCD == "IGA" & adlb$ANRIND == "LOW"] <- "HIGH"
#' adlb$WGRLOFL[adlb$PARAMCD == "IGA"] <- ""
#'
#' # Pre-processing
#' adlb_f <- adlb %>% h_adlb_abnormal_by_worst_grade()
#'
#' # Map excludes records without abnormal grade since they should not be displayed
#' # in the table.
#' map <- unique(adlb_f[adlb_f$GRADE_DIR != "ZERO", c("PARAM", "GRADE_DIR", "GRADE_ANL")]) %>%
#'   lapply(as.character) %>%
#'   as.data.frame() %>%
#'   arrange(PARAM, desc(GRADE_DIR), GRADE_ANL)
#'
#' basic_table() %>%
#'   split_cols_by("ARMCD") %>%
#'   split_rows_by("PARAM") %>%
#'   split_rows_by("GRADE_DIR", split_fun = trim_levels_to_map(map)) %>%
#'   count_abnormal_by_worst_grade(
#'     var = "GRADE_ANL",
#'     variables = list(id = "USUBJID", param = "PARAM", grade_dir = "GRADE_DIR")
#'   ) %>%
#'   build_table(df = adlb_f)
#'
#' @export
#' @order 2
count_abnormal_by_worst_grade <- function(lyt,
                                          var,
                                          variables = list(
                                            id = "USUBJID",
                                            param = "PARAM",
                                            grade_dir = "GRADE_DIR"
                                          ),
                                          na_str = default_na_str(),
                                          nested = TRUE,
                                          ...,
                                          .stats = NULL,
                                          .formats = NULL,
                                          .labels = NULL,
                                          .indent_mods = NULL) {
  extra_args <- list(variables = variables, ...)

  afun <- make_afun(
    a_count_abnormal_by_worst_grade,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = "hidden"
  )
}

#' Helper function to prepare `ADLB` for [count_abnormal_by_worst_grade()]
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper function to prepare an `ADLB` data frame to be used as input in
#' [count_abnormal_by_worst_grade()]. The following pre-processing steps are applied:
#'
#' 1. `adlb` is filtered on variable `avisit` to only include post-baseline visits.
#' 2. `adlb` is filtered on variables `worst_flag_low` and `worst_flag_high` so that only
#'    worst grades (in either direction) are included.
#' 3. From the standard lab grade variable `atoxgr`, the following two variables are derived
#'    and added to `adlb`:
#'   * A grade direction variable (e.g. `GRADE_DIR`). The variable takes value `"HIGH"` when
#'     `atoxgr > 0`, `"LOW"` when `atoxgr < 0`, and `"ZERO"` otherwise.
#'   * A toxicity grade variable (e.g. `GRADE_ANL`) where all negative values from `atoxgr` are
#'     replaced by their absolute values.
#' 4. Unused factor levels are dropped from `adlb` via [droplevels()].
#'
#' @param adlb (`data.frame`)\cr `ADLB` dataframe.
#' @param atoxgr (`character`)\cr Analysis toxicity grade variable. This must be a `factor`
#'   variable.
#' @param avisit (`character`)\cr Analysis visit variable.
#' @param worst_flag_low (`character`)\cr Worst low lab grade flag variable. This variable is
#'   set to `"Y"` when indicating records of worst low lab grades.
#' @param worst_flag_high (`character`)\cr Worst high lab grade flag variable. This variable is
#'   set to `"Y"` when indicating records of worst high lab grades.
#'
#' @return `h_adlb_abnormal_by_worst_grade()` returns the `adlb` data frame with two new
#'   variables: `GRADE_DIR` and `GRADE_ANL`.
#'
#' @seealso [abnormal_by_worst_grade]
#'
#' @examples
#' h_adlb_abnormal_by_worst_grade(tern_ex_adlb) %>%
#'   dplyr::select(ATOXGR, GRADE_DIR, GRADE_ANL) %>%
#'   head(10)
#'
#' @export
h_adlb_abnormal_by_worst_grade <- function(adlb,
                                           atoxgr = "ATOXGR",
                                           avisit = "AVISIT",
                                           worst_flag_low = "WGRLOFL",
                                           worst_flag_high = "WGRHIFL") {
  adlb %>%
    dplyr::filter(
      !.data[[avisit]] %in% c("SCREENING", "BASELINE"),
      .data[[worst_flag_low]] == "Y" | .data[[worst_flag_high]] == "Y"
    ) %>%
    dplyr::mutate(
      GRADE_DIR = factor(
        dplyr::case_when(
          .data[[atoxgr]] %in% c("-1", "-2", "-3", "-4") ~ "LOW",
          .data[[atoxgr]] == "0" ~ "ZERO",
          .data[[atoxgr]] %in% c("1", "2", "3", "4") ~ "HIGH"
        ),
        levels = c("LOW", "ZERO", "HIGH")
      ),
      GRADE_ANL = forcats::fct_relevel(
        forcats::fct_recode(.data[[atoxgr]], `1` = "-1", `2` = "-2", `3` = "-3", `4` = "-4"),
        c("0", "1", "2", "3", "4")
      )
    ) %>%
    droplevels()
}

#' Sort Data by `PK PARAM` Variable
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @param pk_data (`data.frame`)\cr `Pharmacokinetics` dataframe
#' @param key_var (`character`)\cr key variable used to merge pk_data and metadata created by `d_pkparam()`
#'
#' @return A PK `data.frame` sorted by a `PARAM` variable.
#'
#' @examples
#' library(dplyr)
#'
#' adpp <- tern_ex_adpp %>% mutate(PKPARAM = factor(paste0(PARAM, " (", AVALU, ")")))
#' pk_ordered_data <- h_pkparam_sort(adpp)
#'
#' @export
h_pkparam_sort <- function(pk_data, key_var = "PARAMCD") {
  assert_df_with_variables(pk_data, list(key_var = key_var))
  pk_data$PARAMCD <- pk_data[[key_var]]

  ordered_pk_data <- d_pkparam()

  # Add the numeric values from ordered_pk_data to pk_data
  joined_data <- merge(pk_data, ordered_pk_data, by = "PARAMCD", suffix = c("", ".y"))

  joined_data <- joined_data[, -grep(".*.y$", colnames(joined_data))]

  joined_data$TLG_ORDER <- as.numeric(joined_data$TLG_ORDER)

  # Then order PARAM based on this column
  joined_data$PARAM <- factor(joined_data$PARAM,
    levels = unique(joined_data$PARAM[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data$TLG_DISPLAY <- factor(joined_data$TLG_DISPLAY,
    levels = unique(joined_data$TLG_DISPLAY[order(joined_data$TLG_ORDER)]),
    ordered = TRUE
  )

  joined_data
}

#' Control Function for `CoxPH` Model
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `CoxPH` model, typically used internally to specify
#' details of `CoxPH` model for [s_coxph_pairwise()]. `conf_level` refers to Hazard Ratio estimation.
#'
#' @inheritParams argument_convention
#' @param pval_method (`string`)\cr p-value method for testing hazard ratio = 1.
#'   Default method is `"log-rank"`, can also be set to `"wald"` or `"likelihood"`.
#' @param ties (`string`)\cr specifying the method for tie handling. Default is `"efron"`,
#'   can also be set to `"breslow"` or `"exact"`. See more in [survival::coxph()].
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_coxph <- function(pval_method = c("log-rank", "wald", "likelihood"),
                          ties = c("efron", "breslow", "exact"),
                          conf_level = 0.95) {
  pval_method <- match.arg(pval_method)
  ties <- match.arg(ties)
  assert_proportion_value(conf_level)

  list(pval_method = pval_method, ties = ties, conf_level = conf_level)
}

#' Control Function for `survfit` Model for Survival Time
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_time()]. `conf_level` refers to survival time estimation.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
#'   see more in [survival::survfit()]. Note option "none" is no longer supported.
#' @param quantiles (`numeric`)\cr of length two to specify the quantiles of survival time.
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_time <- function(conf_level = 0.95,
                              conf_type = c("plain", "log", "log-log"),
                              quantiles = c(0.25, 0.75)) {
  conf_type <- match.arg(conf_type)
  checkmate::assert_numeric(quantiles, lower = 0, upper = 1, len = 2, unique = TRUE, sorted = TRUE)
  nullo <- lapply(quantiles, assert_proportion_value)
  assert_proportion_value(conf_level)
  list(conf_level = conf_level, conf_type = conf_type, quantiles = quantiles)
}

#' Control Function for `survfit` Model for Patient's Survival Rate at time point
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for `survfit` model, typically used internally to specify
#' details of `survfit` model for [s_surv_timepoint()]. `conf_level` refers to patient risk estimation at a time point.
#'
#' @inheritParams argument_convention
#' @inheritParams control_surv_time
#'
#' @return A list of components with the same names as the arguments
#'
#' @export
control_surv_timepoint <- function(conf_level = 0.95,
                                   conf_type = c("plain", "log", "log-log")) {
  conf_type <- match.arg(conf_type)
  assert_proportion_value(conf_level)
  list(
    conf_level = conf_level,
    conf_type = conf_type
  )
}

#' Occurrence Table Pruning
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Family of constructor and condition functions to flexibly prune occurrence tables.
#' The condition functions always return whether the row result is higher than the threshold.
#' Since they are of class [CombinationFunction()] they can be logically combined with other condition
#' functions.
#'
#' @note Since most table specifications are worded positively, we name our constructor and condition
#'   functions positively, too. However, note that the result of [keep_rows()] says what
#'   should be pruned, to conform with the [rtables::prune_table()] interface.
#'
#' @examples
#' \donttest{
#' tab <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("RACE") %>%
#'   split_rows_by("STRATA1") %>%
#'   summarize_row_groups() %>%
#'   analyze_vars("COUNTRY", .stats = "count_fraction") %>%
#'   build_table(DM)
#' }
#'
#' @name prune_occurrences
NULL

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a row condition function. This removes all analysis rows (`TableRow`) that should be
#'   pruned, i.e., don't fulfill the row condition. It removes the sub-tree if there are no
#'   children left.
#'
#' @param row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   analysis rows and flags whether these should be kept in the pruned table.
#'
#' @return
#' * `keep_rows()` returns a pruning function that can be used with [rtables::prune_table()]
#'   to prune an `rtables` table.
#'
#' @examples
#' \donttest{
#' # `keep_rows`
#' is_non_empty <- !CombinationFunction(all_zero_or_na)
#' prune_table(tab, keep_rows(is_non_empty))
#' }
#'
#' @export
keep_rows <- function(row_condition) {
  checkmate::assert_function(row_condition)
  function(table_tree) {
    if (inherits(table_tree, "TableRow")) {
      return(!row_condition(table_tree))
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating pruning functions based on
#'   a condition for the (first) content row in leaf tables. This removes all leaf tables where
#'   the first content row does not fulfill the condition. It does not check individual rows.
#'   It then proceeds recursively by removing the sub tree if there are no children left.
#'
#' @param content_row_condition (`CombinationFunction`)\cr condition function which works on individual
#'   first content rows of leaf tables and flags whether these leaf tables should be kept in the pruned table.
#'
#' @return
#' * `keep_content_rows()` returns a pruning function that checks the condition on the first content
#'   row of leaf tables in the table.
#'
#' @examples
#' # `keep_content_rows`
#' \donttest{
#' more_than_twenty <- has_count_in_cols(atleast = 20L, col_names = names(tab))
#' prune_table(tab, keep_content_rows(more_than_twenty))
#' }
#'
#' @export
keep_content_rows <- function(content_row_condition) {
  checkmate::assert_function(content_row_condition)
  function(table_tree) {
    if (is_leaf_table(table_tree)) {
      content_row <- h_content_first_row(table_tree)
      return(!content_row_condition(content_row))
    }
    if (inherits(table_tree, "DataRow")) {
      return(FALSE)
    }
    children <- tree_children(table_tree)
    identical(length(children), 0L)
  }
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total counts in the specified columns.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#' @param ... arguments for row or column access, see [`rtables_access`]: either `col_names` (`character`) including
#'   the names of the columns which should be used, or alternatively `col_indices` (`integer`) giving the indices
#'   directly instead.
#'
#' @return
#' * `has_count_in_cols()` returns a condition function that sums the counts in the specified column.
#'
#' @examples
#' \donttest{
#' more_than_one <- has_count_in_cols(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one))
#' }
#'
#' @export
has_count_in_cols <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    total_count >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any of the counts in
#'   the specified columns satisfying a threshold.
#'
#' @param atleast (`count` or `proportion`)\cr threshold which should be met in order to keep the row.
#'
#' @return
#' * `has_count_in_any_col()` returns a condition function that compares the counts in the
#'   specified columns with the threshold.
#'
#' @examples
#' \donttest{
#' # `has_count_in_any_col`
#' any_more_than_one <- has_count_in_any_col(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(any_more_than_one))
#' }
#'
#' @export
has_count_in_any_col <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    any(row_counts >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on total fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_cols()` returns a condition function that sums the counts in the
#'   specified column, and computes the fraction by dividing by the total column counts.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_cols`
#' more_than_five_percent <- has_fraction_in_cols(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_cols <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_counts <- h_row_counts(table_row, ...)
    total_count <- sum(row_counts)
    col_counts <- h_col_counts(table_row, ...)
    total_n <- sum(col_counts)
    total_percent <- total_count / total_n
    total_percent >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition functions on any fraction in
#'   the specified columns.
#'
#' @return
#' * `has_fraction_in_any_col()` returns a condition function that looks at the fractions
#'  in the specified columns and checks whether any of them fulfill the threshold.
#'
#' @examples
#' \donttest{
#' # `has_fraction_in_any_col`
#' any_atleast_five_percent <- has_fraction_in_any_col(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent))
#' }
#'
#' @export
has_fraction_in_any_col <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    row_fractions <- h_row_fractions(table_row, ...)
    any(row_fractions >= atleast)
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the fractions reported in each specified column.
#'
#' @return
#' * `has_fractions_difference()` returns a condition function that extracts the fractions of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' # `has_fractions_difference`
#' more_than_five_percent_diff <- has_fractions_difference(atleast = 0.05, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_five_percent_diff))
#' }
#'
#' @export
has_fractions_difference <- function(atleast, ...) {
  assert_proportion_value(atleast, include_boundaries = TRUE)
  CombinationFunction(function(table_row) {
    fractions <- h_row_fractions(table_row, ...)
    difference <- diff(range(fractions))
    difference >= atleast
  })
}

#' @describeIn prune_occurrences Constructor for creating condition function that checks the difference
#'   between the counts reported in each specified column.
#'
#' @return
#' * `has_counts_difference()` returns a condition function that extracts the counts of each
#'   specified column, and computes the difference of the minimum and maximum.
#'
#' @examples
#' \donttest{
#' more_than_one_diff <- has_counts_difference(atleast = 1L, col_names = names(tab))
#' prune_table(tab, keep_rows(more_than_one_diff))
#' }
#'
#' @export
has_counts_difference <- function(atleast, ...) {
  checkmate::assert_count(atleast)
  CombinationFunction(function(table_row) {
    counts <- h_row_counts(table_row, ...)
    difference <- diff(range(counts))
    difference >= atleast
  })
}

#' Custom Split Functions
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Collection of useful functions that are expanding on the core list of functions
#' provided by `rtables`. See [rtables::custom_split_funs] and [rtables::make_split_fun()]
#' for more information on how to make a custom split function. All these functions
#' work with [split_rows_by()] argument `split_fun` to modify the way the split
#' happens. For other split functions, consider consulting [`rtables::split_funcs`].
#'
#' @seealso [rtables::make_split_fun()]
#'
#' @name utils_split_funs
NULL

#' @describeIn utils_split_funs split function to place reference group facet at a specific position
#'  during post-processing stage.
#'
#' @param position (`string` or `integer`)\cr should it be `"first"` or `"last"` or in a specific position?
#'
#' @return
#' * `ref_group_position` returns an utility function that puts the reference group
#'  as first, last or at a certain position and needs to be assigned to `split_fun`.
#'
#' @examples
#' library(dplyr)
#'
#' dat <- data.frame(
#'   x = factor(letters[1:5], levels = letters[5:1]),
#'   y = 1:5
#' )
#'
#' # With rtables layout functions
#' basic_table() %>%
#'   split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) %>%
#'   analyze("y") %>%
#'   build_table(dat)
#'
#' # With tern layout funcitons
#' adtte_f <- tern_ex_adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVAL = day2month(AVAL),
#'     is_event = CNSR == 0
#'   )
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' basic_table() %>%
#'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>%
#'   add_colcounts() %>%
#'   surv_time(
#'     vars = "AVAL",
#'     var_labels = "Survival Time (Months)",
#'     is_event = "is_event",
#'   ) %>%
#'   build_table(df = adtte_f)
#'
#' @export
ref_group_position <- function(position = "first") {
  make_split_fun(
    post = list(
      function(splret, spl, fulldf) {
        if (!"ref_group_value" %in% methods::slotNames(spl)) {
          stop("Reference group is undefined.")
        }

        spl_var <- rtables:::spl_payload(spl)
        fulldf[[spl_var]] <- factor(fulldf[[spl_var]])
        init_lvls <- levels(fulldf[[spl_var]])

        if (!all(names(splret$values) %in% init_lvls)) {
          stop("This split function does not work with combination facets.")
        }

        ref_group_pos <- which(init_lvls == rtables:::spl_ref_group(spl))
        pos_choices <- c("first", "last")
        if (checkmate::test_choice(position, pos_choices) && position == "first") {
          pos <- 0
        } else if (checkmate::test_choice(position, pos_choices) && position == "last") {
          pos <- length(init_lvls)
        } else if (checkmate::test_int(position, lower = 1, upper = length(init_lvls))) {
          pos <- position - 1
        } else {
          stop("Wrong input for ref group position. It must be 'first', 'last', or a integer.")
        }

        reord_lvls <- append(init_lvls[-ref_group_pos], init_lvls[ref_group_pos], after = pos)
        ord <- match(reord_lvls, names(splret$values))

        make_split_result(
          splret$values[ord],
          splret$datasplit[ord],
          splret$labels[ord]
        )
      }
    )
  )
}

#' @describeIn utils_split_funs split function to change level order based on a `integer`
#'   vector or a `character` vector that represent the split variable's factor levels.
#'
#' @param order (`character` or `integer`)\cr vector of ordering indexes for the split facets.
#'
#' @return
#' * `level_order` returns an utility function that changes the original levels' order,
#'   depending on input `order` and split levels.
#'
#' @examples
#' # level_order --------
#' # Even if default would bring ref_group first, the original order puts it last
#' basic_table() %>%
#'   split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>%
#'   analyze("Sepal.Length") %>%
#'   build_table(iris)
#'
#' # character vector
#' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)])
#' basic_table() %>%
#'   split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>%
#'   analyze("Sepal.Length") %>%
#'   build_table(iris)
#'
#' @export
level_order <- function(order) {
  make_split_fun(
    post = list(
      function(splret, spl, fulldf) {
        if (checkmate::test_integerish(order)) {
          checkmate::assert_integerish(order, lower = 1, upper = length(splret$values))
          ord <- order
        } else {
          checkmate::assert_character(order, len = length(splret$values))
          checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE)
          ord <- match(order, names(splret$values))
        }
        make_split_result(
          splret$values[ord],
          splret$datasplit[ord],
          splret$labels[ord]
        )
      }
    )
  )
}

#' Counting Patients Summing Exposure Across All Patients in Columns
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Counting the number of patients and summing analysis value (i.e exposure values) across all patients
#' when a column table layout is required.
#'
#' @inheritParams argument_convention
#' @param ex_var (`character`)\cr name of the variable within `df` containing exposure values.
#' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will be used as label.
#' @param .stats (`character`)\cr statistics to select for the table. Run
#' `get_stats("analyze_patients_exposure_in_cols")` to see available statistics for this function.
#'
#' @name summarize_patients_exposure_in_cols
#' @order 1
NULL

#' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
#'   of patients and the sum of exposure across all patients.
#'
#' @return
#' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
#'   * `n_patients`: Number of unique patients in `df`.
#'   * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
#'
#' @keywords internal
s_count_patients_sum_exposure <- function(df,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          labelstr = "",
                                          .stats = c("n_patients", "sum_exposure"),
                                          .N_col, # nolint
                                          custom_label = NULL) {
  assert_df_with_variables(df, list(ex_var = ex_var, id = id))
  checkmate::assert_string(id)
  checkmate::assert_string(labelstr)
  checkmate::assert_string(custom_label, null.ok = TRUE)
  checkmate::assert_numeric(df[[ex_var]])
  checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))

  row_label <- if (labelstr != "") {
    labelstr
  } else if (!is.null(custom_label)) {
    custom_label
  } else {
    "Total patients numbers/person time"
  }

  y <- list()

  if ("n_patients" %in% .stats) {
    y$n_patients <-
      formatters::with_label(
        s_num_patients_content(
          df = df,
          .N_col = .N_col, # nolint
          .var = id,
          labelstr = ""
        )$unique,
        row_label
      )
  }
  if ("sum_exposure" %in% .stats) {
    y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
  }
  y
}

#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
#'   [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
#'   [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
#'
#' @return
#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
#'
#' @examples
#' a_count_patients_sum_exposure(
#'   df = df,
#'   var = "SEX",
#'   .N_col = nrow(df),
#'   .stats = "n_patients"
#' )
#'
#' @export
a_count_patients_sum_exposure <- function(df,
                                          var = NULL,
                                          ex_var = "AVAL",
                                          id = "USUBJID",
                                          add_total_level = FALSE,
                                          custom_label = NULL,
                                          labelstr = "",
                                          .N_col, # nolint
                                          .stats,
                                          .formats = list(n_patients = "xx (xx.x%)", sum_exposure = "xx")) {
  checkmate::assert_flag(add_total_level)

  if (!is.null(var)) {
    assert_df_with_variables(df, list(var = var))
    df[[var]] <- as.factor(df[[var]])
  }

  y <- list()
  if (is.null(var)) {
    y[[.stats]] <- list(Total = s_count_patients_sum_exposure(
      df = df,
      ex_var = ex_var,
      id = id,
      labelstr = labelstr,
      .N_col = .N_col,
      .stats = .stats,
      custom_label = custom_label
    )[[.stats]])
  } else {
    for (lvl in levels(df[[var]])) {
      y[[.stats]][[lvl]] <- s_count_patients_sum_exposure(
        df = subset(df, get(var) == lvl),
        ex_var = ex_var,
        id = id,
        labelstr = labelstr,
        .N_col = .N_col,
        .stats = .stats,
        custom_label = lvl
      )[[.stats]]
    }
    if (add_total_level) {
      y[[.stats]][["Total"]] <- s_count_patients_sum_exposure(
        df = df,
        ex_var = ex_var,
        id = id,
        labelstr = labelstr,
        .N_col = .N_col,
        .stats = .stats,
        custom_label = custom_label
      )[[.stats]]
    }
  }

  in_rows(.list = y[[.stats]], .formats = .formats[[.stats]])
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
#'
#' @return
#' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @examples
#' lyt5 <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
#'
#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
#' result5
#'
#' lyt6 <- basic_table() %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
#'
#' result6 <- build_table(lyt6, df = df, alt_counts_df = adsl)
#' result6
#'
#' @export
#' @order 3
summarize_patients_exposure_in_cols <- function(lyt, # nolint
                                                var,
                                                ex_var = "AVAL",
                                                id = "USUBJID",
                                                add_total_level = FALSE,
                                                custom_label = NULL,
                                                col_split = TRUE,
                                                na_str = default_na_str(),
                                                ...,
                                                .stats = c("n_patients", "sum_exposure"),
                                                .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                                .indent_mods = NULL) {
  extra_args <- list(ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, ...)

  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  summarize_row_groups(
    lyt = lyt,
    var = var,
    cfun = a_count_patients_sum_exposure,
    na_str = na_str,
    extra_args = extra_args
  )
}

#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
#'   function arguments and additional format arguments. This function is a wrapper for
#'   [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
#'
#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
#'   column split has been done already earlier in the layout pipe.
#'
#' @return
#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
#'   add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
#'   columns, to the table layout.
#'
#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows,
#'   `analyze_patients_exposure_in_cols()` generates data rows which will _not_ be repeated on multiple
#'   pages when pagination is used.
#'
#' @examples
#' set.seed(1)
#' df <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 6), rep("ARM B", 6)),
#'   SEX = c(rep("Female", 6), rep("Male", 6)),
#'   AVAL = as.numeric(sample(seq(1, 20), 12)),
#'   stringsAsFactors = TRUE
#' )
#' adsl <- data.frame(
#'   USUBJID = c(paste("id", seq(1, 12), sep = "")),
#'   ARMCD = c(rep("ARM A", 2), rep("ARM B", 2)),
#'   SEX = c(rep("Female", 2), rep("Male", 2)),
#'   stringsAsFactors = TRUE
#' )
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
#' result <- build_table(lyt, df = df, alt_counts_df = adsl)
#' result
#'
#' lyt2 <- basic_table() %>%
#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
#'   summarize_patients_exposure_in_cols(
#'     var = "AVAL", col_split = TRUE,
#'     .stats = "n_patients", custom_label = "some custom label"
#'   ) %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
#' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
#' result2
#'
#' lyt3 <- basic_table() %>%
#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
#' result3
#'
#' # Adding total levels and custom label
#' lyt4 <- basic_table(
#'   show_colcounts = TRUE
#' ) %>%
#'   analyze_patients_exposure_in_cols(
#'     var = "ARMCD",
#'     col_split = TRUE,
#'     add_total_level = TRUE,
#'     custom_label = "TOTAL"
#'   ) %>%
#'   append_topleft(c("", "Sex"))
#'
#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
#' result4
#'
#' @export
#' @order 2
analyze_patients_exposure_in_cols <- function(lyt, # nolint
                                              var = NULL,
                                              ex_var = "AVAL",
                                              id = "USUBJID",
                                              add_total_level = FALSE,
                                              custom_label = NULL,
                                              col_split = TRUE,
                                              na_str = default_na_str(),
                                              .stats = c("n_patients", "sum_exposure"),
                                              .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                              .indent_mods = 0L,
                                              ...) {
  extra_args <- list(
    var = var, ex_var = ex_var, id = id, add_total_level = add_total_level, custom_label = custom_label, ...
  )

  if (col_split) {
    lyt <- split_cols_by_multivar(
      lyt = lyt,
      vars = rep(ex_var, length(.stats)),
      varlabels = .labels[.stats],
      extra_args = list(.stats = .stats)
    )
  }
  lyt <- lyt %>% analyze_colvars(
    afun = a_count_patients_sum_exposure,
    indent_mod = .indent_mods,
    na_str = na_str,
    extra_args = extra_args
  )
  lyt
}

#' Summarize the Change from Baseline or Absolute Baseline Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` indicates the numerical change from baseline results,
#' and additional required secondary analysis variables are `value` and `baseline_flag`.
#' Depending on the baseline flag, either the absolute baseline values (at baseline)
#' or the change from baseline values (post-baseline) are then summarized.
#'
#' @inheritParams argument_convention
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("analyze_vars_numeric)`
#'   to see available statistics for this function.
#'
#' @name summarize_change
#' @order 1
NULL

#' @describeIn summarize_change Statistics function that summarizes baseline or post-baseline visits.
#'
#' @return
#' * `s_change_from_baseline()` returns the same values returned by [s_summary.numeric()].
#'
#' @note The data in `df` must be either all be from baseline or post-baseline visits. Otherwise
#'   an error will be thrown.
#'
#' @keywords internal
s_change_from_baseline <- function(df,
                                   .var,
                                   variables,
                                   na.rm = TRUE, # nolint
                                   ...) {
  checkmate::assert_numeric(df[[variables$value]])
  checkmate::assert_numeric(df[[.var]])
  checkmate::assert_logical(df[[variables$baseline_flag]])
  checkmate::assert_vector(unique(df[[variables$baseline_flag]]), max.len = 1)
  assert_df_with_variables(df, c(variables, list(chg = .var)))

  combined <- ifelse(
    df[[variables$baseline_flag]],
    df[[variables$value]],
    df[[.var]]
  )
  if (is.logical(combined) && identical(length(combined), 0L)) {
    combined <- numeric(0)
  }
  s_summary(combined, na.rm = na.rm, ...)
}

#' @describeIn summarize_change Formatted analysis function which is used as `afun` in `summarize_change()`.
#'
#' @return
#' * `a_change_from_baseline()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_change_from_baseline <- make_afun(
  s_change_from_baseline,
  .formats = c(
    n = "xx",
    mean_sd = "xx.xx (xx.xx)",
    mean_se = "xx.xx (xx.xx)",
    median = "xx.xx",
    range = "xx.xx - xx.xx",
    mean_ci = "(xx.xx, xx.xx)",
    median_ci = "(xx.xx, xx.xx)",
    mean_pval = "xx.xx"
  ),
  .labels = c(
    mean_sd = "Mean (SD)",
    mean_se = "Mean (SE)",
    median = "Median",
    range = "Min - Max"
  )
)

#' @describeIn summarize_change Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `summarize_change()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_change_from_baseline()` to the table layout.
#'
#' @note To be used after a split on visits in the layout, such that each data subset only contains
#'   either baseline or post-baseline data.
#'
#' @examples
#' library(dplyr)
#'
#' ## Fabricate dataset
#' dta_test <- data.frame(
#'   USUBJID = rep(1:6, each = 3),
#'   AVISIT = rep(paste0("V", 1:3), 6),
#'   ARM = rep(LETTERS[1:3], rep(6, 3)),
#'   AVAL = c(9:1, rep(NA, 9))
#' ) %>%
#'   mutate(ABLFLL = AVISIT == "V1") %>%
#'   group_by(USUBJID) %>%
#'   mutate(
#'     BLVAL = AVAL[ABLFLL],
#'     CHG = AVAL - BLVAL
#'   ) %>%
#'   ungroup()
#'
#' results <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_rows_by("AVISIT") %>%
#'   summarize_change("CHG", variables = list(value = "AVAL", baseline_flag = "ABLFLL")) %>%
#'   build_table(dta_test)
#'
#' results
#'
#' @export
#' @order 2
summarize_change <- function(lyt,
                             vars,
                             variables,
                             na_str = default_na_str(),
                             nested = TRUE,
                             ...,
                             table_names = vars,
                             .stats = c("n", "mean_sd", "median", "range"),
                             .formats = NULL,
                             .labels = NULL,
                             .indent_mods = NULL) {
  extra_args <- list(variables = variables, ...)

  afun <- make_afun(
    a_change_from_baseline,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  analyze(
    lyt,
    vars,
    afun = afun,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    table_names = table_names
  )
}

#' Patient Counts with Abnormal Range Values
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Primary analysis variable `.var` indicates the abnormal range result (`character` or `factor`)
#' and additional analysis variables are `id` (`character` or `factor`) and `baseline` (`character` or
#' `factor`). For each direction specified in `abnormal` (e.g. high or low) count patients in the
#' numerator and denominator as follows:
#'   * `num` : The number of patients with this abnormality recorded while on treatment.
#'   * `denom`: The number of patients with at least one post-baseline assessment.
#'
#' @inheritParams argument_convention
#' @param abnormal (named `list`)\cr list identifying the abnormal range level(s) in `var`. Defaults to
#'   `list(Low = "LOW", High = "HIGH")` but you can also group different levels into the named list,
#'   for example, `abnormal = list(Low = c("LOW", "LOW LOW"), High = c("HIGH", "HIGH HIGH"))`.
#' @param exclude_base_abn (`flag`)\cr whether to exclude subjects with baseline abnormality
#'   from numerator and denominator.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal")`
#'   to see available statistics for this function.
#'
#' @note
#' * `count_abnormal()` only works with a single variable containing multiple abnormal levels.
#' * `df` should be filtered to include only post-baseline records.
#' * the denominator includes patients that might have other abnormal levels at baseline,
#'   and patients with missing baseline. Patients with these abnormalities at
#'   baseline can be optionally excluded from numerator and denominator.
#'
#' @name abnormal
#' @include formatting_functions.R
#' @order 1
NULL

#' @describeIn abnormal Statistics function which counts patients with abnormal range values
#'   for a single `abnormal` level.
#'
#' @return
#' * `s_count_abnormal()` returns the statistic `fraction` which is a vector with `num` and `denom` counts of patients.
#'
#' @keywords internal
s_count_abnormal <- function(df,
                             .var,
                             abnormal = list(Low = "LOW", High = "HIGH"),
                             variables = list(id = "USUBJID", baseline = "BNRIND"),
                             exclude_base_abn = FALSE) {
  checkmate::assert_list(abnormal, types = "character", names = "named", len = 2, any.missing = FALSE)
  checkmate::assert_true(any(unlist(abnormal) %in% levels(df[[.var]])))
  checkmate::assert_factor(df[[.var]])
  checkmate::assert_flag(exclude_base_abn)
  assert_df_with_variables(df, c(range = .var, variables))
  checkmate::assert_multi_class(df[[variables$baseline]], classes = c("factor", "character"))
  checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))

  count_abnormal_single <- function(abn_name, abn) {
    # Patients in the denominator fulfill:
    # - have at least one post-baseline visit
    # - their baseline must not be abnormal if `exclude_base_abn`.
    if (exclude_base_abn) {
      denom_select <- !(df[[variables$baseline]] %in% abn)
    } else {
      denom_select <- TRUE
    }
    denom <- length(unique(df[denom_select, variables$id, drop = TRUE]))

    # Patients in the numerator fulfill:
    # - have at least one post-baseline visit with the required abnormality level
    # - are part of the denominator patients.
    num_select <- (df[[.var]] %in% abn) & denom_select
    num <- length(unique(df[num_select, variables$id, drop = TRUE]))

    formatters::with_label(c(num = num, denom = denom), abn_name)
  }

  # This will define the abnormal levels theoretically possible for a specific lab parameter
  # within a split level of a layout.
  abnormal_lev <- lapply(abnormal, intersect, levels(df[[.var]]))
  abnormal_lev <- abnormal_lev[vapply(abnormal_lev, function(x) length(x) > 0, logical(1))]

  result <- sapply(names(abnormal_lev), function(i) count_abnormal_single(i, abnormal_lev[[i]]), simplify = FALSE)
  result <- list(fraction = result)
  result
}

#' @describeIn abnormal Formatted analysis function which is used as `afun` in `count_abnormal()`.
#'
#' @return
#' * `a_count_abnormal()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_abnormal <- make_afun(
  s_count_abnormal,
  .formats = c(fraction = format_fraction)
)

#' @describeIn abnormal Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_abnormal()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_abnormal()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' df <- data.frame(
#'   USUBJID = as.character(c(1, 1, 2, 2)),
#'   ANRIND = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BNRIND = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df <- df %>%
#'   filter(ONTRTFL == "Y")
#'
#' # Layout creating function.
#' basic_table() %>%
#'   count_abnormal(var = "ANRIND", abnormal = list(high = "HIGH", low = "LOW")) %>%
#'   build_table(df)
#'
#' # Passing of statistics function and formatting arguments.
#' df2 <- data.frame(
#'   ID = as.character(c(1, 1, 2, 2)),
#'   RANGE = factor(c("NORMAL", "LOW", "HIGH", "HIGH")),
#'   BL_RANGE = factor(c("NORMAL", "NORMAL", "HIGH", "HIGH")),
#'   ONTRTFL = c("", "Y", "", "Y"),
#'   stringsAsFactors = FALSE
#' )
#'
#' # Select only post-baseline records.
#' df2 <- df2 %>%
#'   filter(ONTRTFL == "Y")
#'
#' basic_table() %>%
#'   count_abnormal(
#'     var = "RANGE",
#'     abnormal = list(low = "LOW", high = "HIGH"),
#'     variables = list(id = "ID", baseline = "BL_RANGE")
#'   ) %>%
#'   build_table(df2)
#'
#' @export
#' @order 2
count_abnormal <- function(lyt,
                           var,
                           abnormal = list(Low = "LOW", High = "HIGH"),
                           variables = list(id = "USUBJID", baseline = "BNRIND"),
                           exclude_base_abn = FALSE,
                           na_str = default_na_str(),
                           nested = TRUE,
                           ...,
                           table_names = var,
                           .stats = NULL,
                           .formats = NULL,
                           .labels = NULL,
                           .indent_mods = NULL) {
  extra_args <- list(abnormal = abnormal, variables = variables, exclude_base_abn = exclude_base_abn, ...)

  afun <- make_afun(
    a_count_abnormal,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "fraction"
  )

  checkmate::assert_string(var)

  analyze(
    lyt = lyt,
    vars = var,
    afun = afun,
    na_str = na_str,
    nested = nested,
    table_names = table_names,
    extra_args = extra_args,
    show_labels = "hidden"
  )
}

#' Count the Number of Patients with a Particular Event
#'
#' @description `r lifecycle::badge("stable")`
#'
#' The primary analysis variable `.var` denotes the unique patient identifier.
#'
#' @inheritParams argument_convention
#' @param filters (`character`)\cr a character vector specifying the column names and flag variables
#'   to be used for counting the number of unique identifiers satisfying such conditions.
#'   Multiple column names and flags are accepted in this format
#'   `c("column_name1" = "flag1", "column_name2" = "flag2")`.
#'   Note that only equality is being accepted as condition.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("count_patients_with_event")`
#'   to see available statistics for this function.
#'
#' @seealso [count_patients_with_flags]
#'
#' @name count_patients_with_event
#' @order 1
NULL

#' @describeIn count_patients_with_event Statistics function which counts the number of patients for which
#'   the defined event has occurred.
#'
#' @inheritParams analyze_variables
#' @param .var (`character`)\cr name of the column that contains the unique identifier.
#'
#' @return
#' * `s_count_patients_with_event()` returns the count and fraction of unique identifiers with the defined event.
#'
#' @examples
#' # `s_count_patients_with_event()`
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y")
#' )
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL")
#' )
#'
#' s_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'   denom = "N_col",
#'   .N_col = 456
#' )
#'
#' @export
s_count_patients_with_event <- function(df,
                                        .var,
                                        filters,
                                        .N_col, # nolint
                                        .N_row, # nolint
                                        denom = c("n", "N_row", "N_col")) {
  col_names <- names(filters)
  filter_values <- filters

  checkmate::assert_subset(col_names, colnames(df))

  temp <- Map(
    function(x, y) which(df[[x]] == y),
    col_names,
    filter_values
  )
  position_satisfy_filters <- Reduce(intersect, temp)
  id_satisfy_filters <- as.character(unique(df[position_satisfy_filters, ][[.var]]))
  result <- s_count_values(
    as.character(unique(df[[.var]])),
    id_satisfy_filters,
    denom = denom,
    .N_col = .N_col,
    .N_row = .N_row
  )
  result
}

#' @describeIn count_patients_with_event Formatted analysis function which is used as `afun`
#'   in `count_patients_with_event()`.
#'
#' @return
#' * `a_count_patients_with_event()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @examples
#' # `a_count_patients_with_event()`
#'
#' a_count_patients_with_event(
#'   tern_ex_adae,
#'   .var = "SUBJID",
#'   filters = c("TRTEMFL" = "Y"),
#'   .N_col = 100,
#'   .N_row = 100
#' )
#'
#' @export
a_count_patients_with_event <- make_afun(
  s_count_patients_with_event,
  .formats = c(count_fraction = format_count_fraction_fixed_dp)
)

#' @describeIn count_patients_with_event Layout-creating function which can take statistics function
#'   arguments and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_patients_with_event()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_patients_with_event()` to the table layout.
#'
#' @examples
#' # `count_patients_with_event()`
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_values(
#'     "STUDYID",
#'     values = "AB12345",
#'     .stats = "count",
#'     .labels = c(count = "Total AEs")
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with at least one adverse event"),
#'     table_names = "tbl_all"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL"),
#'     .labels = c(count_fraction = "Total number of patients with fatal AEs"),
#'     table_names = "tbl_fatal"
#'   ) %>%
#'   count_patients_with_event(
#'     "SUBJID",
#'     filters = c("TRTEMFL" = "Y", "AEOUT" = "FATAL", "AEREL" = "Y"),
#'     .labels = c(count_fraction = "Total number of patients with related fatal AEs"),
#'     .indent_mods = c(count_fraction = 2L),
#'     table_names = "tbl_rel_fatal"
#'   )
#'
#' build_table(lyt, tern_ex_adae, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_patients_with_event <- function(lyt,
                                      vars,
                                      filters,
                                      riskdiff = FALSE,
                                      na_str = default_na_str(),
                                      nested = TRUE,
                                      ...,
                                      table_names = vars,
                                      .stats = "count_fraction",
                                      .formats = NULL,
                                      .labels = NULL,
                                      .indent_mods = NULL) {
  checkmate::assert_flag(riskdiff)

  s_args <- list(filters = filters, ...)

  afun <- make_afun(
    a_count_patients_with_event,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods
  )

  extra_args <- if (isFALSE(riskdiff)) {
    s_args
  } else {
    list(
      afun = list("s_count_patients_with_event" = afun),
      .stats = .stats,
      .indent_mods = .indent_mods,
      s_args = s_args
    )
  }

  analyze(
    lyt,
    vars,
    afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff),
    na_str = na_str,
    nested = nested,
    extra_args = extra_args,
    show_labels = ifelse(length(vars) > 1, "visible", "hidden"),
    table_names = table_names
  )
}

#' Counting Missed Doses
#'
#' @description `r lifecycle::badge("stable")`
#'
#' These are specific functions to count patients with missed doses. The difference to [count_cumulative()] is
#' mainly the special labels.
#'
#' @inheritParams s_count_cumulative
#' @inheritParams argument_convention
#' @param thresholds (vector of `count`)\cr number of missed doses the patients at least had.
#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("count_missed_doses")`
#'   to see available statistics for this function.
#'
#' @seealso Relevant description function [d_count_missed_doses()].
#'
#' @name count_missed_doses
#' @order 1
NULL

#' @describeIn count_missed_doses Statistics function to count non-missing values.
#'
#' @return
#' * `s_count_nonmissing()` returns the statistic `n` which is the count of non-missing values in `x`.
#'
#' @keywords internal
s_count_nonmissing <- function(x) {
  list(n = n_available(x))
}

#' Description Function that Calculates Labels for [s_count_missed_doses()].
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @inheritParams s_count_missed_doses
#'
#' @return [d_count_missed_doses()] returns a named `character` vector with the labels.
#'
#' @seealso [s_count_missed_doses()]
#'
#' @export
d_count_missed_doses <- function(thresholds) {
  paste0("At least ", thresholds, " missed dose", ifelse(thresholds > 1, "s", ""))
}

#' @describeIn count_missed_doses Statistics function to count patients with missed doses.
#'
#' @return
#' * `s_count_missed_doses()` returns the statistics `n` and `count_fraction` with one element for each threshold.
#'
#' @keywords internal
s_count_missed_doses <- function(x,
                                 thresholds,
                                 .N_col) { # nolint
  stat <- s_count_cumulative(
    x = x,
    thresholds = thresholds,
    lower_tail = FALSE,
    include_eq = TRUE,
    .N_col = .N_col
  )
  labels <- d_count_missed_doses(thresholds)
  for (i in seq_along(stat$count_fraction)) {
    stat$count_fraction[[i]] <- formatters::with_label(stat$count_fraction[[i]], label = labels[i])
  }
  n_stat <- s_count_nonmissing(x)
  c(n_stat, stat)
}

#' @describeIn count_missed_doses Formatted analysis function which is used as `afun`
#'   in `count_missed_doses()`.
#'
#' @return
#' * `a_count_missed_doses()` returns the corresponding list with formatted [rtables::CellValue()].
#'
#' @keywords internal
a_count_missed_doses <- make_afun(
  s_count_missed_doses,
  .formats = c(n = "xx", count_fraction = format_count_fraction)
)

#' @describeIn count_missed_doses Layout-creating function which can take statistics function arguments
#'   and additional format arguments. This function is a wrapper for [rtables::analyze()].
#'
#' @return
#' * `count_missed_doses()` returns a layout object suitable for passing to further layouting functions,
#'   or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
#'   the statistics from `s_count_missed_doses()` to the table layout.
#'
#' @examples
#' library(dplyr)
#'
#' anl <- tern_ex_adsl %>%
#'   distinct(STUDYID, USUBJID, ARM) %>%
#'   mutate(
#'     PARAMCD = "TNDOSMIS",
#'     PARAM = "Total number of missed doses during study",
#'     AVAL = sample(0:20, size = nrow(tern_ex_adsl), replace = TRUE),
#'     AVALC = ""
#'   )
#'
#' basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   add_colcounts() %>%
#'   count_missed_doses("AVAL", thresholds = c(1, 5, 10, 15), var_labels = "Missed Doses") %>%
#'   build_table(anl, alt_counts_df = tern_ex_adsl)
#'
#' @export
#' @order 2
count_missed_doses <- function(lyt,
                               vars,
                               thresholds,
                               var_labels = vars,
                               show_labels = "visible",
                               na_str = default_na_str(),
                               nested = TRUE,
                               ...,
                               table_names = vars,
                               .stats = NULL,
                               .formats = NULL,
                               .labels = NULL,
                               .indent_mods = NULL) {
  extra_args <- list(thresholds = thresholds, ...)

  afun <- make_afun(
    a_count_missed_doses,
    .stats = .stats,
    .formats = .formats,
    .labels = .labels,
    .indent_mods = .indent_mods,
    .ungroup_stats = "count_fraction"
  )
  analyze(
    lyt = lyt,
    vars = vars,
    afun = afun,
    var_labels = var_labels,
    table_names = table_names,
    show_labels = show_labels,
    na_str = na_str,
    nested = nested,
    extra_args = extra_args
  )
}

#' Helper Functions for Tabulating Biomarker Effects on Survival by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Helper functions which are documented here separately to not confuse the user
#' when reading about the user-facing functions.
#'
#' @inheritParams survival_biomarkers_subgroups
#' @inheritParams argument_convention
#' @inheritParams fit_coxreg_multivar
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adtte <- tern_ex_adtte
#'
#' # Save variable labels before data processing steps.
#' adtte_labels <- formatters::var_labels(adtte, fill = FALSE)
#'
#' adtte_f <- adtte %>%
#'   filter(PARAMCD == "OS") %>%
#'   mutate(
#'     AVALU = as.character(AVALU),
#'     is_event = CNSR == 0
#'   )
#' labels <- c("AVALU" = adtte_labels[["AVALU"]], "is_event" = "Event Flag")
#' formatters::var_labels(adtte_f)[names(labels)] <- labels
#'
#' @name h_survival_biomarkers_subgroups
NULL

#' @describeIn h_survival_biomarkers_subgroups helps with converting the "survival" function variable list
#'   to the "Cox regression" variable list. The reason is that currently there is an inconsistency between the variable
#'   names accepted by `extract_survival_subgroups()` and `fit_coxreg_multivar()`.
#'
#' @param biomarker (`string`)\cr the name of the biomarker variable.
#'
#' @return
#' * `h_surv_to_coxreg_variables()` returns a named `list` of elements `time`, `event`, `arm`,
#'   `covariates`, and `strata`.
#'
#' @examples
#' # This is how the variable list is converted internally.
#' h_surv_to_coxreg_variables(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "EVNT",
#'     covariates = c("A", "B"),
#'     strata = "D"
#'   ),
#'   biomarker = "AGE"
#' )
#'
#' @export
h_surv_to_coxreg_variables <- function(variables, biomarker) {
  checkmate::assert_list(variables)
  checkmate::assert_string(variables$tte)
  checkmate::assert_string(variables$is_event)
  checkmate::assert_string(biomarker)
  list(
    time = variables$tte,
    event = variables$is_event,
    arm = biomarker,
    covariates = variables$covariates,
    strata = variables$strata
  )
}

#' @describeIn h_survival_biomarkers_subgroups prepares estimates for number of events, patients and median survival
#'   times, as well as hazard ratio estimates, confidence intervals and p-values, for multiple biomarkers
#'   in a given single data set.
#'   `variables` corresponds to names of variables found in `data`, passed as a named list and requires elements
#'   `tte`, `is_event`, `biomarkers` (vector of continuous biomarker variables) and optionally `subgroups` and `strat`.
#'
#' @return
#' * `h_coxreg_mult_cont_df()` returns a `data.frame` containing estimates and statistics for the selected biomarkers.
#'
#' @examples
#' # For a single population, estimate separately the effects
#' # of two biomarkers.
#' df <- h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "SEX",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f
#' )
#' df
#'
#' # If the data set is empty, still the corresponding rows with missings are returned.
#' h_coxreg_mult_cont_df(
#'   variables = list(
#'     tte = "AVAL",
#'     is_event = "is_event",
#'     biomarkers = c("BMRKR1", "AGE"),
#'     covariates = "REGION1",
#'     strata = c("STRATA1", "STRATA2")
#'   ),
#'   data = adtte_f[NULL, ]
#' )
#'
#' @export
h_coxreg_mult_cont_df <- function(variables,
                                  data,
                                  control = control_coxreg()) {
  assert_df_with_variables(data, variables)
  checkmate::assert_list(control, names = "named")
  checkmate::assert_character(variables$biomarkers, min.len = 1, any.missing = FALSE)
  conf_level <- control[["conf_level"]]
  pval_label <- paste0(
    # the regex capitalizes the first letter of the string / senetence.
    "p-value (", gsub("(^[a-z])", "\\U\\1", trimws(control[["pval_method"]]), perl = TRUE), ")"
  )
  # If there is any data, run model, otherwise return empty results.
  if (nrow(data) > 0) {
    bm_cols <- match(variables$biomarkers, names(data))
    l_result <- lapply(variables$biomarkers, function(bm) {
      coxreg_list <- fit_coxreg_multivar(
        variables = h_surv_to_coxreg_variables(variables, bm),
        data = data,
        control = control
      )
      result <- do.call(
        h_coxreg_multivar_extract,
        c(list(var = bm), coxreg_list[c("mod", "data", "control")])
      )
      data_fit <- as.data.frame(as.matrix(coxreg_list$mod$y))
      data_fit$status <- as.logical(data_fit$status)
      median <- s_surv_time(
        df = data_fit,
        .var = "time",
        is_event = "status"
      )$median
      data.frame(
        # Dummy column needed downstream to create a nested header.
        biomarker = bm,
        biomarker_label = formatters::var_labels(data[bm], fill = TRUE),
        n_tot = coxreg_list$mod$n,
        n_tot_events = coxreg_list$mod$nevent,
        median = as.numeric(median),
        result[1L, c("hr", "lcl", "ucl")],
        conf_level = conf_level,
        pval = result[1L, "pval"],
        pval_label = pval_label,
        stringsAsFactors = FALSE
      )
    })
    do.call(rbind, args = c(l_result, make.row.names = FALSE))
  } else {
    data.frame(
      biomarker = variables$biomarkers,
      biomarker_label = formatters::var_labels(data[variables$biomarkers], fill = TRUE),
      n_tot = 0L,
      n_tot_events = 0L,
      median = NA,
      hr = NA,
      lcl = NA,
      ucl = NA,
      conf_level = conf_level,
      pval = NA,
      pval_label = pval_label,
      row.names = seq_along(variables$biomarkers),
      stringsAsFactors = FALSE
    )
  }
}

#' @describeIn h_survival_biomarkers_subgroups prepares a single sub-table given a `df_sub` containing
#'   the results for a single biomarker.
#'
#' @param df (`data.frame`)\cr results for a single biomarker, as part of what is
#'   returned by [extract_survival_biomarkers()] (it needs a couple of columns which are
#'   added by that high-level function relative to what is returned by [h_coxreg_mult_cont_df()],
#'   see the example).
#'
#' @return
#' * `h_tab_surv_one_biomarker()` returns an `rtables` table object with the given statistics arranged in columns.
#'
#' @examples
#' # Starting from above `df`, zoom in on one biomarker and add required columns.
#' df1 <- df[1, ]
#' df1$subgroup <- "All patients"
#' df1$row_type <- "content"
#' df1$var <- "ALL"
#' df1$var_label <- "All patients"
#' h_tab_surv_one_biomarker(
#'   df1,
#'   vars = c("n_tot", "n_tot_events", "median", "hr", "ci", "pval"),
#'   time_unit = "days"
#' )
#'
#' @export
h_tab_surv_one_biomarker <- function(df,
                                     vars,
                                     time_unit,
                                     na_str = default_na_str(),
                                     .indent_mods = 0L,
                                     ...) {
  afuns <- a_survival_subgroups(na_str = na_str)[vars]
  colvars <- d_survival_subgroups_colvars(
    vars,
    conf_level = df$conf_level[1],
    method = df$pval_label[1],
    time_unit = time_unit
  )
  h_tab_one_biomarker(
    df = df,
    afuns = afuns,
    colvars = colvars,
    na_str = na_str,
    .indent_mods = .indent_mods,
    ...
  )
}

#' Control Function for Logistic Regression Model Fitting
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for logistic regression models.
#' `conf_level` refers to the confidence level used for the Odds Ratio CIs.
#'
#' @inheritParams argument_convention
#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
#'   This will be used when fitting the logistic regression model on the left hand side of the formula.
#'   Note that the evaluated expression should result in either a logical vector or a factor with 2
#'   levels. By default this is just `"response"` such that the original response variable is used
#'   and not modified further.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @examples
#' # Standard options.
#' control_logistic()
#'
#' # Modify confidence level.
#' control_logistic(conf_level = 0.9)
#'
#' # Use a different response definition.
#' control_logistic(response_definition = "I(response %in% c('CR', 'PR'))")
#'
#' @export
control_logistic <- function(response_definition = "response",
                             conf_level = 0.95) {
  checkmate::assert_true(grepl("response", response_definition))
  checkmate::assert_string(response_definition)
  assert_proportion_value(conf_level)
  list(
    response_definition = response_definition,
    conf_level = conf_level
  )
}

#' Generate PK reference dataset
#'
#' @description `r lifecycle::badge("stable")`
#'
#' @return `data.frame` of PK parameters
#'
#' @examples
#' pk_reference_dataset <- d_pkparam()
#'
#' @export
d_pkparam <- function() {
  pk_dataset <- as.data.frame(matrix(
    c(
      "TMAX", "Time of CMAX", "Tmax", "Plasma/Blood/Serum", "1",
      "CMAX", "Max Conc", "Cmax", "Plasma/Blood/Serum", "2",
      "CMAXD", "Max Conc Norm by Dose", "Cmax/D", "Plasma/Blood/Serum", "3",
      "AUCIFO", "AUC Infinity Obs", "AUCinf obs", "Plasma/Blood/Serum", "4",
      "AUCIFP", "AUC Infinity Pred", "AUCinf pred", "Plasma/Blood/Serum", "5",
      "AUCIFOD", "AUC Infinity Obs Norm by Dose", "AUCinf/D obs", "Plasma/Blood/Serum", "6",
      "AUCIFD", "AUC Infinity Pred Norm by Dose", "AUCinf/D pred", "Plasma/Blood/Serum", "7",
      "AUCPEO", "AUC %Extrapolation Obs", "AUCinf extrap obs", "Plasma/Blood/Serum", "8",
      "AUCPEP", "AUC %Extrapolation Pred", "AUCinf extrap pred", "Plasma/Blood/Serum", "9",
      "AUCINT", "AUC from T1 to T2", "AUCupper-lower ", "Plasma/Blood/Serum", "10",
      "AUCTAU", "AUC Over Dosing Interval", "AUCtau", "Plasma/Blood/Serum", "11",
      "AUCLST", "AUC to Last Nonzero Conc", "AUClast", "Plasma/Blood/Serum", "12",
      "AUCALL", "AUC All", "AUCall", "Plasma/Blood/Serum", "13",
      "AUMCIFO", "AUMC Infinity Obs", "AUMCinf obs", "Plasma/Blood/Serum", "14",
      "AUMCIFP", "AUMC Infinity Pred", "AUMCinf pred", "Plasma/Blood/Serum", "15",
      "AUMCPEO", "AUMC % Extrapolation Obs", "AUMC extrap obs", "Plasma/Blood/Serum", "16",
      "AUMCPEP", "AUMC % Extrapolation Pred", "AUMC extrap pred", "Plasma/Blood/Serum", "17",
      "AUMCTAU", "AUMC Over Dosing Interval", "AUMCtau", "Plasma/Blood/Serum", "18",
      "AUMCLST", "AUMC to Last Nonzero Conc", "AUMClast", "Plasma/Blood/Serum", "19",
      "AURCIFO", "AURC Infinity Obs", "AURCinf obs", "Plasma/Blood/Serum", "20",
      "AURCIFP", "AURC Infinity Pred", "AURCinf pred", "Plasma/Blood/Serum", "21",
      "AURCPEO", "AURC % Extrapolation Obs", "AURC extrap obs", "Plasma/Blood/Serum", "22",
      "AURCPEP", "AURC % Extrapolation Pred", "AURC extrap pred", "Plasma/Blood/Serum", "23",
      "AURCLST", "AURC Dosing to Last Conc", "AURClast", "Plasma/Blood/Serum", "24",
      "AURCALL", "AURC All", "AURCall", "Plasma/Blood/Serum", "25",
      "TLST", "Time of Last Nonzero Conc", "Tlast", "Plasma/Blood/Serum", "26",
      "CO", "Initial Conc", "CO", "Plasma/Blood/Serum", "27",
      "C0", "Initial Conc", "C0", "Plasma/Blood/Serum", "28",
      "CAVG", "Average Conc", "Cavg", "Plasma/Blood/Serum", "29",
      "CLST", "Last Nonzero Conc", "Clast", "Plasma/Blood/Serum", "30",
      "CMIN", "Min Conc", "Cmin", "Plasma/Blood/Serum", "31",
      "LAMZHL", "Half-Life Lambda z", "t1/2", "Plasma/Blood/Serum", "32",
      "CLFO", "Total CL Obs by F", "CL/F obs", "Plasma/Blood/Serum", "33",
      "CLFP", "Total CL Pred by F", "CL/F pred", "Plasma/Blood/Serum", "34",
      "CLO", "Total CL Obs", "CL obs", "Plasma/Blood/Serum", "35",
      "CLP", "Total CL Pred", "CL pred", "Plasma/Blood/Serum", "36",
      "CLSS", "Total CL Steady State Pred", "CLss", "Plasma/Blood/Serum", "37",
      "CLSSF", "Total CL Steady State Pred by F", "CLss/F", "Plasma/Blood/Serum", "38",
      "VZFO", "Vz Obs by F", "Vz/F obs", "Plasma/Blood/Serum", "39",
      "VZFP", "Vz Pred by F", "Vz/F pred", "Plasma/Blood/Serum", "40",
      "VZO", "Vz Obs", "Vz obs", "Plasma/Blood/Serum", "41",
      "VZP", "Vz Pred", "Vz pred", "Plasma/Blood/Serum", "42",
      "VSSO", "Vol Dist Steady State Obs", "Vss obs", "Plasma/Blood/Serum", "43",
      "VSSP", "Vol Dist Steady State Pred", "Vss pred", "Plasma/Blood/Serum", "44",
      "LAMZ", "Lambda z", "Lambda z", "Plasma/Blood/Serum", "45",
      "LAMZLL", "Lambda z Lower Limit", "Lambda z lower", "Plasma/Blood/Serum", "46",
      "LAMZUL", "Lambda z Upper Limit", "Lambda z upper", "Plasma/Blood/Serum", "47",
      "LAMZNPT", "Number of Points for Lambda z", "No points Lambda z", "Plasma/Blood/Serum", "48",
      "MRTIFO", "MRT Infinity Obs", "MRTinf obs", "Plasma/Blood/Serum", "49",
      "MRTIFP", "MRT Infinity Pred", "MRTinf pred", "Plasma/Blood/Serum", "50",
      "MRTLST", "MRT to Last Nonzero Conc", "MRTlast", "Plasma/Blood/Serum", "51",
      "R2", "R Squared", "Rsq", "Plasma/Blood/Serum", "52",
      "R2ADJ", "R Squared Adjusted", "Rsq adjusted", "Plasma/Blood/Serum", "53",
      "TLAG", "Time Until First Nonzero Conc", "TIag", "Plasma/Blood/Serum", "54",
      "TMIN", "Time of CMIN Observation", "Tmin", "Plasma/Blood/Serum", "55",
      "ACCI", "Accumulation Index", "Accumulation Index", "Plasma/Blood/Serum/Urine", "56",
      "FLUCP", "Fluctuation%", "Fluctuation", "Plasma/Blood/Serum", "57",
      "CORRXY", "Correlation Between TimeX and Log ConcY", "Corr xy", "Plasma/Blood/Serum", "58",
      "RCAMINT", "Amt Rec from T1 to T2", "Ae", "Urine", "59",
      "RCPCINT", "Pct Rec from T1 to T2", "Fe", "Urine", "60",
      "VOLPK", "Sum of Urine Vol", "Urine volume", "Urine", "61",
      "RENALCL", "Renal CL", "CLR", "Plasma/Blood/Serum/Urine", "62",
      "ERTMAX", "Time of Max Excretion Rate", "Tmax Rate", "Urine", "63",
      "RMAX", "Time of Maximum Response", "Rmax", "Matrix of PD", "64",
      "RMIN", "Time of Minimum Response", "Rmin", "Matrix of PD", "65",
      "ERMAX", "Max Excretion Rate", "Max excretion rate", "Urine", "66",
      "MIDPTLST", "Midpoint of Collection Interval", "Midpoint last", "Urine", "67",
      "ERLST", "Last Meas Excretion Rate", "Rate last", "Urine", "68",
      "TON", "Time to Onset", "Tonset", "Matrix of PD", "69",
      "TOFF", "Time to Offset", "Toffset", "Matrix of PD", "70",
      "TBBLP", "Time Below Baseline %", "Time %Below Baseline", "Matrix of PD", "71",
      "TBTP", "Time Below Threshold %", "Time %Below Threshold", "Matrix of PD", "72",
      "TABL", "Time Above Baseline", "Time Above Baseline", "Matrix of PD", "73",
      "TAT", "Time Above Threshold", "Time Above Threshold", "Matrix of PD", "74",
      "TBT", "Time Below Threshold", "Time Below Threshold", "Matrix of PD", "75",
      "TBLT", "Time Between Baseline and Threshold", "Time Between Baseline Threshold", "Matrix of PD", "76",
      "BLRSP", "Baseline Response", "Baseline", "Matrix of PD", "77",
      "TSHDRSP", "Response Threshold", "Threshold", "Matrix of PD", "78",
      "AUCABL", "AUC Above Baseline", "AUC above baseline", "Matrix of PD", "79",
      "AUCAT", "AUC Above Threshold", "AUC above threshold", "Matrix of PD", "80",
      "AUCBBL", "AUC Below Baseline", "AUC below baseline", "Matrix of PD", "81",
      "AUCBT", "AUC Below Threshold", "AUC below threshold", "Matrix of PD", "82",
      "AUCBLDIF", "Diff AUC Above Base and AUC Below Base", "AUC diff baseline", "Matrix of PD", "83",
      "AUCTDIF", "Diff AUC Above Thr and AUC Below Thr", "AUCnet threshold", "Matrix of PD", "84",
      "TDIFF", "Diff Time to Offset and Time to Onset", "Diff toffset-tonset", "Matrix of PD", "85",
      "AUCPBEO", "AUC %Back Extrapolation Obs", "AUC%Back extrap obs", "Plasma/Blood/Serum", "86",
      "AUCPBEP", "AUC %Back Extrapolation Pred", "AUC%Back extrap pred", "Plasma/Blood/Serum", "87",
      "TSLP1L", "Lower Time Limit Slope 1st", "Slope1 lower", "Matrix of PD", "88",
      "TSLP1U", "Upper Time Limit Slope 1st Segment", "Slope1 upper", "Matrix of PD", "89",
      "TSLP2L", "Lower Time Limit Slope 2nd Segment", "Slope2 lower", "Matrix of PD", "90",
      "TSLP2U", "Upper Time Limit Slope 2nd Segment", "Slope2 upper", "Matrix of PD", "91",
      "SLP1", "Slope, 1st Segment", "Slope1", "Matrix of PD", "92",
      "SLP2", "Slope, 2nd Segment", "Slope2", "Matrix of PD", "93",
      "SLP1PT", "Number of Points for Slope 1st Segment", "No points slope1", "Matrix of PD", "94",
      "SLP2PT", "Number of Points for Slope 2nd Segment", "No points slope2", "Matrix of PD", "95",
      "R2ADJS1", "R-Squared Adjusted Slope, 1st Segment", "Rsq adjusted slope1", "Matrix of PD", "96",
      "R2ADJS2", "R-Squared Adjusted Slope, 2nd Segment", "Rsq adjusted slope2", "Matrix of PD", "97",
      "R2SLP1", "R Squared, Slope, 1st Segment", "Rsq slope1", "Matrix of PD", "98",
      "R2SLP2", "R Squared, Slope, 2nd Segment", "Rsq slope2", "Matrix of PD", "99",
      "CORRXYS1", "Corr Btw TimeX and Log ConcY, Slope 1st", "Corr xy slope1", "Plasma/Blood/Serum", "100",
      "CORRXYS2", "Corr Btw TimeX and Log ConcY, Slope 1st Slope 2nd", "Corr xy slope2", "Plasma/Blood/Serum", "101",
      "AILAMZ", "Accumulation Index using Lambda z", "AILAMZ", "Plasma/Blood/Serum", "102",
      "ARAUC", "Accumulation Ratio AUCTAU", "ARAUC", "Plasma/Blood/Serum", "103",
      "ARAUCD", "Accum Ratio AUCTAU norm by dose", "ARAUCD", "Plasma/Blood/Serum", "104",
      "ARAUCIFO", "Accum Ratio AUC Infinity Obs", "ARAUCIFO", "Plasma/Blood/Serum", "105",
      "ARAUCIFP", "Accum Ratio AUC Infinity Pred", "ARAUCIFP", "Plasma/Blood/Serum", "106",
      "ARAUCIND", "Accum Ratio AUC T1 to T2 norm by dose", "ARAUCIND_T1_T2_UNIT", "Plasma/Blood/Serum", "107",
      "ARAUCINT", "Accumulation Ratio AUC from T1 to T2", "ARAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "108",
      "ARAUCIOD", "Accum Ratio AUCIFO Norm by Dose", "ARAUCIOD", "Plasma/Blood/Serum", "109",
      "ARAUCIPD", "Accum Ratio AUCIFP Norm by Dose", "ARAUCIPD", "Plasma/Blood/Serum", "110",
      "ARAUCLST", "Accum Ratio AUC to Last Nonzero Conc", "ARAUCLST", "Plasma/Blood/Serum", "111",
      "ARCMAX", "Accumulation Ratio Cmax", "ARCMAX", "Plasma/Blood/Serum", "112",
      "ARCMAXD", "Accum Ratio Cmax norm by dose", "ARCMAXD", "Plasma/Blood/Serum", "113",
      "ARCMIN", "Accumulation Ratio Cmin", "ARCMIN", "Plasma/Blood/Serum", "114",
      "ARCMIND", "Accum Ratio Cmin norm by dose", "ARCMIND", "Plasma/Blood/Serum", "115",
      "ARCTROUD", "Accum Ratio Ctrough norm by dose", "ARCTROUD", "Plasma/Blood/Serum", "116",
      "ARCTROUG", "Accumulation Ratio Ctrough", "ARCTROUG", "Plasma/Blood/Serum", "117",
      "AUCALLB", "AUC All Norm by BMI", "AUCall_B", "Plasma/Blood/Serum", "118",
      "AUCALLD", "AUC All Norm by Dose", "AUCall_D", "Plasma/Blood/Serum", "119",
      "AUCALLS", "AUC All Norm by SA", "AUCall_S", "Plasma/Blood/Serum", "120",
      "AUCALLW", "AUC All Norm by WT", "AUCall_W", "Plasma/Blood/Serum", "121",
      "AUCIFOB", "AUC Infinity Obs Norm by BMI", "AUCINF_obs_B", "Plasma/Blood/Serum", "122",
      "AUCIFOLN", "AUC Infinity Obs LN Transformed", "AUCIFOLN", "Plasma/Blood/Serum", "123",
      "AUCIFOS", "AUC Infinity Obs Norm by SA", "AUCINF_obs_S", "Plasma/Blood/Serum", "124",
      "AUCIFOUB", "AUC Infinity Obs, Unbound Drug", "AUCIFOUB", "Plasma/Blood/Serum", "125",
      "AUCIFOW", "AUC Infinity Obs Norm by WT", "AUCINF_obs_W", "Plasma/Blood/Serum", "126",
      "AUCIFPB", "AUC Infinity Pred Norm by BMI", "AUCINF_pred_B", "Plasma/Blood/Serum", "127",
      "AUCIFPD", "AUC Infinity Pred Norm by Dose", "AUCINF_pred_D", "Plasma/Blood/Serum", "128",
      "AUCIFPS", "AUC Infinity Pred Norm by SA", "AUCINF_pred_S", "Plasma/Blood/Serum", "129",
      "AUCIFPUB", "AUC Infinity Pred, Unbound Drug", "AUCIFPUB", "Plasma/Blood/Serum", "130",
      "AUCIFPW", "AUC Infinity Pred Norm by WT", "AUCINF_pred_W", "Plasma/Blood/Serum", "131",
      "AUCINTB", "AUC from T1 to T2 Norm by BMI", "AUC_B_T1_T2_UNIT", "Plasma/Blood/Serum", "132",
      "AUCINTD", "AUC from T1 to T2 Norm by Dose", "AUC_D_T1_T2_UNIT", "Plasma/Blood/Serum", "133",
      "AUCINTS", "AUC from T1 to T2 Norm by SA", "AUC_S_T1_T2_UNIT", "Plasma/Blood/Serum", "134",
      "AUCINTW", "AUC from T1 to T2 Norm by WT", "AUC_W_T1_T2_UNIT", "Plasma/Blood/Serum", "135",
      "AUCLSTB", "AUC to Last Nonzero Conc Norm by BMI", "AUClast_B", "Plasma/Blood/Serum", "136",
      "AUCLSTD", "AUC to Last Nonzero Conc Norm by Dose", "AUClast_D", "Plasma/Blood/Serum", "137",
      "AUCLSTLN", "AUC to Last Nonzero Conc LN Transformed", "AUCLSTLN", "Plasma/Blood/Serum", "138",
      "AUCLSTS", "AUC to Last Nonzero Conc Norm by SA", "AUClast_S", "Plasma/Blood/Serum", "139",
      "AUCLSTUB", "AUC to Last Nonzero Conc, Unbound Drug", "AUCLSTUB", "Plasma/Blood/Serum", "140",
      "AUCLSTW", "AUC to Last Nonzero Conc Norm by WT", "AUClast_W", "Plasma/Blood/Serum", "141",
      "AUCTAUB", "AUC Over Dosing Interval Norm by BMI", "AUC_TAU_B", "Plasma/Blood/Serum", "142",
      "AUCTAUD", "AUC Over Dosing Interval Norm by Dose", "AUC_TAU_D", "Plasma/Blood/Serum", "143",
      "AUCTAUS", "AUC Over Dosing Interval Norm by SA", "AUC_TAU_S", "Plasma/Blood/Serum", "144",
      "AUCTAUW", "AUC Over Dosing Interval Norm by WT", "AUC_TAU_W", "Plasma/Blood/Serum", "145",
      "AUMCIFOB", "AUMC Infinity Obs Norm by BMI", "AUMCINF_obs_B", "Plasma/Blood/Serum", "146",
      "AUMCIFOD", "AUMC Infinity Obs Norm by Dose", "AUMCINF_obs_D", "Plasma/Blood/Serum", "147",
      "AUMCIFOS", "AUMC Infinity Obs Norm by SA", "AUMCINF_obs_S", "Plasma/Blood/Serum", "148",
      "AUMCIFOW", "AUMC Infinity Obs Norm by WT", "AUMCINF_obs_W", "Plasma/Blood/Serum", "149",
      "AUMCIFPB", "AUMC Infinity Pred Norm by BMI", "AUMCINF_pred_B", "Plasma/Blood/Serum", "150",
      "AUMCIFPD", "AUMC Infinity Pred Norm by Dose", "AUMCINF_pred_D", "Plasma/Blood/Serum", "151",
      "AUMCIFPS", "AUMC Infinity Pred Norm by SA", "AUMCINF_pred_S", "Plasma/Blood/Serum", "152",
      "AUMCIFPW", "AUMC Infinity Pred Norm by WT", "AUMCINF_pred_W", "Plasma/Blood/Serum", "153",
      "AUMCLSTB", "AUMC to Last Nonzero Conc Norm by BMI", "AUMClast_B", "Plasma/Blood/Serum", "154",
      "AUMCLSTD", "AUMC to Last Nonzero Conc Norm by Dose", "AUMClast_D", "Plasma/Blood/Serum", "155",
      "AUMCLSTS", "AUMC to Last Nonzero Conc Norm by SA", "AUMClast_S", "Plasma/Blood/Serum", "156",
      "AUMCLSTW", "AUMC to Last Nonzero Conc Norm by WT", "AUMClast_W", "Plasma/Blood/Serum", "157",
      "AUMCTAUB", "AUMC Over Dosing Interval Norm by BMI", "AUMCTAUB", "Plasma/Blood/Serum", "158",
      "AUMCTAUD", "AUMC Over Dosing Interval Norm by Dose", "AUMCTAUD", "Plasma/Blood/Serum", "159",
      "AUMCTAUS", "AUMC Over Dosing Interval Norm by SA", "AUMCTAUS", "Plasma/Blood/Serum", "160",
      "AUMCTAUW", "AUMC Over Dosing Interval Norm by WT", "AUMCTAUW", "Plasma/Blood/Serum", "161",
      "AURCALLB", "AURC All Norm by BMI", "AURCALLB", "Plasma/Blood/Serum", "162",
      "AURCALLD", "AURC All Norm by Dose", "AURCALLD", "Plasma/Blood/Serum", "163",
      "AURCALLS", "AURC All Norm by SA", "AURCALLS", "Plasma/Blood/Serum", "164",
      "AURCALLW", "AURC All Norm by WT", "AURCALLW", "Plasma/Blood/Serum", "165",
      "AURCIFOB", "AURC Infinity Obs Norm by BMI", "AURCIFOB", "Plasma/Blood/Serum", "166",
      "AURCIFOD", "AURC Infinity Obs Norm by Dose", "AURCIFOD", "Plasma/Blood/Serum", "167",
      "AURCIFOS", "AURC Infinity Obs Norm by SA", "AURCIFOS", "Plasma/Blood/Serum", "168",
      "AURCIFOW", "AURC Infinity Obs Norm by WT", "AURCIFOW", "Plasma/Blood/Serum", "169",
      "AURCIFPB", "AURC Infinity Pred Norm by BMI", "AURCIFPB", "Plasma/Blood/Serum", "170",
      "AURCIFPD", "AURC Infinity Pred Norm by Dose", "AURCIFPD", "Plasma/Blood/Serum", "171",
      "AURCIFPS", "AURC Infinity Pred Norm by SA", "AURCIFPS", "Plasma/Blood/Serum", "172",
      "AURCIFPW", "AURC Infinity Pred Norm by WT", "AURCIFPW", "Plasma/Blood/Serum", "173",
      "AURCINT", "AURC from T1 to T2", "AURCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "174",
      "AURCINTB", "AURC from T1 to T2 Norm by BMI", "AURCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "175",
      "AURCINTD", "AURC from T1 to T2 Norm by Dose", "AURCINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "176",
      "AURCINTS", "AURC from T1 to T2 Norm by SA", "AURCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "177",
      "AURCINTW", "AURC from T1 to T2 Norm by WT", "AURCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "178",
      "AURCLSTB", "AURC to Last Nonzero Rate Norm by BMI", "AURCLSTB", "Plasma/Blood/Serum", "179",
      "AURCLSTD", "AURC to Last Nonzero Rate Norm by Dose", "AURCLSTD", "Plasma/Blood/Serum", "180",
      "AURCLSTS", "AURC to Last Nonzero Rate Norm by SA", "AURCLSTS", "Plasma/Blood/Serum", "181",
      "AURCLSTW", "AURC to Last Nonzero Rate Norm by WT", "AURCLSTW", "Plasma/Blood/Serum", "182",
      "C0B", "Initial Conc Norm by BMI", "C0B", "Plasma/Blood/Serum", "183",
      "C0D", "Initial Conc Norm by Dose", "C0D", "Plasma/Blood/Serum", "184",
      "C0S", "Initial Conc Norm by SA", "C0S", "Plasma/Blood/Serum", "185",
      "C0W", "Initial Conc Norm by WT", "C0W", "Plasma/Blood/Serum", "186",
      "CAVGB", "Average Conc Norm by BMI", "CAVGB", "Plasma/Blood/Serum", "187",
      "CAVGD", "Average Conc Norm by Dose", "CAVGD", "Plasma/Blood/Serum", "188",
      "CAVGINT", "Average Conc from T1 to T2", "CAVGINT_T1_T2_UNIT", "Plasma/Blood/Serum", "189",
      "CAVGINTB", "Average Conc from T1 to T2 Norm by BMI", "CAVGINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "190",
      "CAVGINTD", "Average Conc from T1 to T2 Norm by Dose", "CAVGINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "191",
      "CAVGINTS", "Average Conc from T1 to T2 Norm by SA", "CAVGINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "192",
      "CAVGINTW", "Average Conc from T1 to T2 Norm by WT", "CAVGINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "193",
      "CAVGS", "Average Conc Norm by SA", "CAVGS", "Plasma/Blood/Serum", "194",
      "CAVGW", "Average Conc Norm by WT", "CAVGW", "Plasma/Blood/Serum", "195",
      "CHTMAX", "Concentration at Half Tmax", "CHTMAX", "Plasma/Blood/Serum", "196",
      "CLFOB", "Total CL Obs by F Norm by BMI", "CLFOB", "Plasma/Blood/Serum", "197",
      "CLFOD", "Total CL Obs by F Norm by Dose", "CLFOD", "Plasma/Blood/Serum", "198",
      "CLFOS", "Total CL Obs by F Norm by SA", "CLFOS", "Plasma/Blood/Serum", "199",
      "CLFOW", "Total CL Obs by F Norm by WT", "CLFOW", "Plasma/Blood/Serum", "200",
      "CLFPB", "Total CL Pred by F Norm by BMI", "CLFPB", "Plasma/Blood/Serum", "201",
      "CLFPD", "Total CL Pred by F Norm by Dose", "CLFPD", "Plasma/Blood/Serum", "202",
      "CLFPS", "Total CL Pred by F Norm by SA", "CLFPS", "Plasma/Blood/Serum", "203",
      "CLFPW", "Total CL Pred by F Norm by WT", "CLFPW", "Plasma/Blood/Serum", "204",
      "CLFTAU", "Total CL by F for Dose Int", "CLFTAU", "Plasma/Blood/Serum", "205",
      "CLFTAUB", "Total CL by F for Dose Int Norm by BMI", "CLFTAUB", "Plasma/Blood/Serum", "206",
      "CLFTAUD", "Total CL by F for Dose Int Norm by Dose", "CLFTAUD", "Plasma/Blood/Serum", "207",
      "CLFTAUS", "Total CL by F for Dose Int Norm by SA", "CLFTAUS", "Plasma/Blood/Serum", "208",
      "CLFTAUW", "Total CL by F for Dose Int Norm by WT", "CLFTAUW", "Plasma/Blood/Serum", "209",
      "CLFUB", "Apparent CL for Unbound Drug", "CLFUB", "Plasma/Blood/Serum", "210",
      "CLOB", "Total CL Obs Norm by BMI", "CLOB", "Plasma/Blood/Serum", "211",
      "CLOD", "Total CL Obs Norm by Dose", "CLOD", "Plasma/Blood/Serum", "212",
      "CLOS", "Total CL Obs Norm by SA", "CLOS", "Plasma/Blood/Serum", "213",
      "CLOUB", "Total CL Obs for Unbound Drug", "CLOUB", "Plasma/Blood/Serum", "214",
      "CLOW", "Total CL Obs Norm by WT", "CLOW", "Plasma/Blood/Serum", "215",
      "CLPB", "Total CL Pred Norm by BMI", "CLPB", "Plasma/Blood/Serum", "216",
      "CLPD", "Total CL Pred Norm by Dose", "CLPD", "Plasma/Blood/Serum", "217",
      "CLPS", "Total CL Pred Norm by SA", "CLPS", "Plasma/Blood/Serum", "218",
      "CLPUB", "Total CL Pred for Unbound Drug", "CLPUB", "Plasma/Blood/Serum", "219",
      "CLPW", "Total CL Pred Norm by WT", "CLPW", "Plasma/Blood/Serum", "220",
      "CLRPCLEV", "Renal CL as Pct CL EV", "CLRPCLEV", "Urine", "221",
      "CLRPCLIV", "Renal CL as Pct CL IV", "CLRPCLIV", "Urine", "222",
      "CLSTB", "Last Nonzero Conc Norm by BMI", "CLSTB", "Plasma/Blood/Serum", "223",
      "CLSTD", "Last Nonzero Conc Norm by Dose", "CLSTD", "Plasma/Blood/Serum", "224",
      "CLSTS", "Last Nonzero Conc Norm by SA", "CLSTS", "Plasma/Blood/Serum", "225",
      "CLSTW", "Last Nonzero Conc Norm by WT", "CLSTW", "Plasma/Blood/Serum", "226",
      "CLTAU", "Total CL for Dose Int", "CLTAU", "Plasma/Blood/Serum", "227",
      "CLTAUB", "Total CL for Dose Int Norm by BMI", "CLTAUB", "Plasma/Blood/Serum", "228",
      "CLTAUD", "Total CL for Dose Int Norm by Dose", "CLTAUD", "Plasma/Blood/Serum", "229",
      "CLTAUS", "Total CL for Dose Int Norm by SA", "CLTAUS", "Plasma/Blood/Serum", "230",
      "CLTAUW", "Total CL for Dose Int Norm by WT", "CLTAUW", "Plasma/Blood/Serum", "231",
      "CMAXB", "Max Conc Norm by BMI", "CMAX_B", "Plasma/Blood/Serum", "232",
      "CMAXLN", "Max Conc LN Transformed", "CMAXLN", "Plasma/Blood/Serum", "233",
      "CMAXS", "Max Conc Norm by SA", "CMAXS", "Plasma/Blood/Serum", "234",
      "CMAXUB", "Max Conc, Unbound Drug", "CMAXUB", "Plasma/Blood/Serum", "235",
      "CMAXW", "Max Conc Norm by WT", "CMAXW", "Plasma/Blood/Serum", "236",
      "CMINB", "Min Conc Norm by BMI", "CMINB", "Plasma/Blood/Serum", "237",
      "CMIND", "Min Conc Norm by Dose", "CMIND", "Plasma/Blood/Serum", "238",
      "CMINS", "Min Conc Norm by SA", "CMINS", "Plasma/Blood/Serum", "239",
      "CMINW", "Min Conc Norm by WT", "CMINW", "Plasma/Blood/Serum", "240",
      "CONC", "Concentration", "CONC", "Plasma/Blood/Serum", "241",
      "CONCB", "Conc by BMI", "CONCB", "Plasma/Blood/Serum", "242",
      "CONCD", "Conc by Dose", "CONCD", "Plasma/Blood/Serum", "243",
      "CONCS", "Conc by SA", "CONCS", "Plasma/Blood/Serum", "244",
      "CONCW", "Conc by WT", "CONCW", "Plasma/Blood/Serum", "245",
      "CTROUGH", "Conc Trough", "CTROUGH", "Plasma/Blood/Serum", "246",
      "CTROUGHB", "Conc Trough by BMI", "CTROUGHB", "Plasma/Blood/Serum", "247",
      "CTROUGHD", "Conc Trough by Dose", "CTROUGHD", "Plasma/Blood/Serum", "248",
      "CTROUGHS", "Conc Trough by SA", "CTROUGHS", "Plasma/Blood/Serum", "249",
      "CTROUGHW", "Conc Trough by WT", "CTROUGHW", "Plasma/Blood/Serum", "250",
      "EFFHL", "Effective Half-Life", "EFFHL", "Plasma/Blood/Serum", "251",
      "ERINT", "Excret Rate from T1 to T2", "ERINT_T1_T2_UNIT", "Plasma/Blood/Serum", "252",
      "ERINTB", "Excret Rate from T1 to T2 Norm by BMI", "ERINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "253",
      "ERINTD", "Excret Rate from T1 to T2 Norm by Dose", "ERINTD_T1_T2_UNIT", "Plasma/Blood/Serum", "254",
      "ERINTS", "Excret Rate from T1 to T2 Norm by SA", "ERINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "255",
      "ERINTW", "Excret Rate from T1 to T2 Norm by WT", "ERINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "256",
      "ERLSTB", "Last Meas Excretion Rate Norm by BMI", "ERLSTB", "Plasma/Blood/Serum", "257",
      "ERLSTD", "Last Meas Excretion Rate Norm by Dose", "ERLSTD", "Plasma/Blood/Serum", "258",
      "ERLSTS", "Last Meas Excretion Rate Norm by SA", "ERLSTS", "Plasma/Blood/Serum", "259",
      "ERLSTW", "Last Meas Excretion Rate Norm by WT", "ERLSTW", "Plasma/Blood/Serum", "260",
      "ERMAXB", "Max Excretion Rate Norm by BMI", "ERMAXB", "Plasma/Blood/Serum", "261",
      "ERMAXD", "Max Excretion Rate Norm by Dose", "ERMAXD", "Plasma/Blood/Serum", "262",
      "ERMAXS", "Max Excretion Rate Norm by SA", "ERMAXS", "Plasma/Blood/Serum", "263",
      "ERMAXW", "Max Excretion Rate Norm by WT", "ERMAXW", "Plasma/Blood/Serum", "264",
      "ERTLST", "Midpoint of Interval of Last Nonzero ER", "ERTLST", "Plasma/Blood/Serum", "265",
      "FABS", "Absolute Bioavailability", "FABS", "Plasma/Blood/Serum", "266",
      "FB", "Fraction Bound", "FB", "Plasma/Blood/Serum", "267",
      "FREL", "Relative Bioavailability", "FREL", "Plasma/Blood/Serum", "268",
      "FREXINT", "Fract Excr from T1 to T2", "FREXINT_T1_T2_UNIT", "Plasma/Blood/Serum", "269",
      "FU", "Fraction Unbound", "FU", "Plasma/Blood/Serum", "270",
      "HDCL", "Hemodialysis Clearance", "HDCL", "Plasma/Blood/Serum", "271",
      "HDER", "Hemodialysis Extraction Ratio", "HDER", "Plasma/Blood/Serum", "272",
      "HTMAX", "Half Tmax", "HTMAX", "Plasma/Blood/Serum", "273",
      "LAMZLTAU", "Lambda z Lower Limit TAU", "LAMZLTAU", "Plasma/Blood/Serum", "274",
      "LAMZNTAU", "Number of Points for Lambda z TAU", "LAMZNTAU", "Plasma/Blood/Serum", "275",
      "LAMZSPN", "Lambda z Span", "LAMZSPN", "Plasma/Blood/Serum", "276",
      "LAMZTAU", "Lambda z TAU", "LAMZTAU", "Plasma/Blood/Serum", "277",
      "LAMZUTAU", "Lambda z Upper Limit TAU", "LAMZUTAU", "Plasma/Blood/Serum", "278",
      "MAT", "Mean Absorption Time", "MAT", "Plasma/Blood/Serum", "279",
      "MRAUCIFO", "Metabolite Ratio for AUC Infinity Obs", "MRAUCIFO", "Plasma/Blood/Serum", "280",
      "MRAUCIFP", "Metabolite Ratio for AUC Infinity Pred", "MRAUCIFP", "Plasma/Blood/Serum", "281",
      "MRAUCINT", "Metabolite Ratio AUC from T1 to T2", "MRAUCINT_T1_T2_UNIT", "Plasma/Blood/Serum", "282",
      "MRAUCLST", "Metabolite Ratio AUC Last Nonzero Conc", "MRAUCLST", "Plasma/Blood/Serum", "283",
      "MRAUCTAU", "Metabolite Ratio for AUC Dosing Interval", "MRAUCTAU", "Plasma/Blood/Serum", "284",
      "MRCMAX", "Metabolite Ratio for Max Conc", "MRCMAX", "Plasma/Blood/Serum", "285",
      "MRTEVIFO", "MRT Extravasc Infinity Obs", "MRTEVIFO", "Plasma/Blood/Serum", "286",
      "MRTEVIFP", "MRT Extravasc Infinity Pred", "MRTEVIFP", "Plasma/Blood/Serum", "287",
      "MRTEVLST", "MRT Extravasc to Last Nonzero Conc", "MRTEVLST", "Plasma/Blood/Serum", "288",
      "MRTIVIFO", "MRT Intravasc Infinity Obs", "MRTIVIFO", "Plasma/Blood/Serum", "289",
      "MRTIVIFP", "MRT Intravasc Infinity Pred", "MRTIVIFP", "Plasma/Blood/Serum", "290",
      "MRTIVLST", "MRT Intravasc to Last Nonzero Conc", "MRTIVLST", "Plasma/Blood/Serum", "291",
      "NRENALCL", "Nonrenal CL", "NRENALCL", "Urine", "292",
      "NRENLCLB", "Nonrenal CL Norm by BMI", "NRENLCLB", "Urine", "293",
      "NRENLCLD", "Nonrenal CL Norm by Dose", "NRENLCLD", "Urine", "294",
      "NRENLCLS", "Nonrenal CL Norm by SA", "NRENLCLS", "Urine", "295",
      "NRENLCLW", "Nonrenal CL Norm by WT", "NRENLCLW", "Urine", "296",
      "PTROUGHR", "Peak Trough Ratio", "PTROUGHR", "Plasma/Blood/Serum", "297",
      "RAAUC", "Ratio AUC", "RAAUC", "Plasma/Blood/Serum", "298",
      "RAAUCIFO", "Ratio AUC Infinity Obs", "RAAUCIFO", "Plasma/Blood/Serum", "299",
      "RAAUCIFP", "Ratio AUC Infinity Pred", "RAAUCIFP", "Plasma/Blood/Serum", "300",
      "RACMAX", "Ratio CMAX", "RACMAX", "Plasma/Blood/Serum", "301",
      "RAMAXMIN", "Ratio of CMAX to CMIN", "RAMAXMIN", "Plasma/Blood/Serum", "302",
      "RCAMIFO", "Amt Rec Infinity Obs", "RCAMIFO", "Plasma/Blood/Serum", "303",
      "RCAMIFOB", "Amt Rec Infinity Obs Norm by BMI", "RCAMIFOB", "Plasma/Blood/Serum", "304",
      "RCAMIFOS", "Amt Rec Infinity Obs Norm by SA", "RCAMIFOS", "Plasma/Blood/Serum", "305",
      "RCAMIFOW", "Amt Rec Infinity Obs Norm by WT", "RCAMIFOW", "Plasma/Blood/Serum", "306",
      "RCAMIFP", "Amt Rec Infinity Pred", "RCAMIFP", "Plasma/Blood/Serum", "307",
      "RCAMIFPB", "Amt Rec Infinity Pred Norm by BMI", "RCAMIFPB", "Plasma/Blood/Serum", "308",
      "RCAMIFPS", "Amt Rec Infinity Pred Norm by SA", "RCAMIFPS", "Plasma/Blood/Serum", "309",
      "RCAMIFPW", "Amt Rec Infinity Pred Norm by WT", "RCAMIFPW", "Plasma/Blood/Serum", "310",
      "RCAMINTB", "Amt Rec from T1 to T2 Norm by BMI", "RCAMINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "311",
      "RCAMINTS", "Amt Rec from T1 to T2 Norm by SA", "RCAMINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "312",
      "RCAMINTW", "Amt Rec from T1 to T2 Norm by WT", "RCAMINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "313",
      "RCAMTAU", "Amt Rec Over Dosing Interval", "RCAMTAU", "Plasma/Blood/Serum", "314",
      "RCAMTAUB", "Amt Rec Over Dosing Interval Norm by BMI", "RCAMTAUB", "Plasma/Blood/Serum", "315",
      "RCAMTAUS", "Amt Rec Over Dosing Interval Norm by SA", "RCAMTAUS", "Plasma/Blood/Serum", "316",
      "RCAMTAUW", "Amt Rec Over Dosing Interval Norm by WT", "RCAMTAUW", "Plasma/Blood/Serum", "317",
      "RCPCIFO", "Pct Rec Infinity Obs", "RCPCIFO", "Plasma/Blood/Serum", "318",
      "RCPCIFOB", "Pct Rec Infinity Obs Norm by BMI", "RCPCIFOB", "Plasma/Blood/Serum", "319",
      "RCPCIFOS", "Pct Rec Infinity Obs Norm by SA", "RCPCIFOS", "Plasma/Blood/Serum", "320",
      "RCPCIFOW", "Pct Rec Infinity Obs Norm by WT", "RCPCIFOW", "Plasma/Blood/Serum", "321",
      "RCPCIFP", "Pct Rec Infinity Pred", "RCPCIFP", "Plasma/Blood/Serum", "322",
      "RCPCIFPB", "Pct Rec Infinity Pred Norm by BMI", "RCPCIFPB", "Plasma/Blood/Serum", "323",
      "RCPCIFPS", "Pct Rec Infinity Pred Norm by SA", "RCPCIFPS", "Plasma/Blood/Serum", "324",
      "RCPCIFPW", "Pct Rec Infinity Pred Norm by WT", "RCPCIFPW", "Plasma/Blood/Serum", "325",
      "RCPCINTB", "Pct Rec from T1 to T2 Norm by BMI", "RCPCINTB_T1_T2_UNIT", "Plasma/Blood/Serum", "326",
      "RCPCINTS", "Pct Rec from T1 to T2 Norm by SA", "RCPCINTS_T1_T2_UNIT", "Plasma/Blood/Serum", "327",
      "RCPCINTW", "Pct Rec from T1 to T2 Norm by WT", "RCPCINTW_T1_T2_UNIT", "Plasma/Blood/Serum", "328",
      "RCPCLST", "Pct Rec to Last Nonzero Conc", "RCPCLST", "Plasma/Blood/Serum", "329",
      "RCPCTAU", "Pct Rec Over Dosing Interval", "RCPCTAU", "Plasma/Blood/Serum", "330",
      "RCPCTAUB", "Pct Rec Over Dosing Interval Norm by BMI", "RCPCTAUB", "Plasma/Blood/Serum", "331",
      "RCPCTAUS", "Pct Rec Over Dosing Interval Norm by SA", "RCPCTAUS", "Plasma/Blood/Serum", "332",
      "RCPCTAUW", "Pct Rec Over Dosing Interval Norm by WT", "RCPCTAUW", "Plasma/Blood/Serum", "333",
      "RENALCLB", "Renal CL Norm by BMI", "RENALCLB", "Urine", "334",
      "RENALCLD", "Renal CL Norm by Dose", "RENALCLD", "Urine", "335",
      "RENALCLS", "Renal CL Norm by SA", "RENALCLS", "Urine", "336",
      "RENALCLW", "Renal CL Norm by WT", "RENALCLW", "Urine", "337",
      "RENCLTAU", "Renal CL for Dose Int", "RENCLTAU", "Urine", "338",
      "RNCLINT", "Renal CL from T1 to T2", "RNCLINT_T1_T2_UNIT", "Urine", "339",
      "RNCLINTB", "Renal CL from T1 to T2 Norm by BMI", "RNCLINTB_T1_T2_UNIT", "Urine", "340",
      "RNCLINTD", "Renal CL from T1 to T2 Norm by Dose", "RNCLINTD_T1_T2_UNIT", "Urine", "341",
      "RNCLINTS", "Renal CL from T1 to T2 Norm by SA", "RNCLINTS_T1_T2_UNIT", "Urine", "342",
      "RNCLINTW", "Renal CL from T1 to T2 Norm by WT", "RNCLINTW_T1_T2_UNIT", "Urine", "343",
      "RNCLTAUB", "Renal CL for Dose Int Norm by BMI", "RNCLTAUB", "Urine", "344",
      "RNCLTAUD", "Renal CL for Dose Int Norm by Dose", "RNCLTAUD", "Urine", "345",
      "RNCLTAUS", "Renal CL for Dose Int Norm by SA", "RNCLTAUS", "Urine", "346",
      "RNCLTAUW", "Renal CL for Dose Int Norm by WT", "RNCLTAUW", "Urine", "347",
      "RNCLUB", "Renal CL for Unbound Drug", "RNCLUB", "Urine", "348",
      "SRAUC", "Stationarity Ratio AUC", "SRAUC", "Plasma/Blood/Serum", "349",
      "SWING", "Swing", "SWING", "Plasma/Blood/Serum", "350",
      "TAUHL", "Half-Life TAU", "TAUHL", "Plasma/Blood/Serum", "351",
      "TBBL", "Time Below Baseline", "Time_Below_B", "Plasma/Blood/Serum", "352",
      "TROUGHPR", "Trough Peak Ratio", "TROUGHPR", "Plasma/Blood/Serum", "353",
      "V0", "Vol Dist Initial", "V0", "Plasma/Blood/Serum", "354",
      "V0B", "Vol Dist Initial Norm by BMI", "V0B", "Plasma/Blood/Serum", "355",
      "V0D", "Vol Dist Initial Norm by Dose", "V0D", "Plasma/Blood/Serum", "356",
      "V0S", "Vol Dist Initial Norm by SA", "V0S", "Plasma/Blood/Serum", "357",
      "V0W", "Vol Dist Initial Norm by WT", "V0W", "Plasma/Blood/Serum", "358",
      "VSSOB", "Vol Dist Steady State Obs Norm by BMI", "VSSOB", "Plasma/Blood/Serum", "359",
      "VSSOBD", "Vol Dist Steady State Obs by B", "VSSOBD", "Plasma/Blood/Serum", "360",
      "VSSOD", "Vol Dist Steady State Obs Norm by Dose", "VSSOD", "Plasma/Blood/Serum", "361",
      "VSSOF", "Vol Dist Steady State Obs by F", "VSSOF", "Plasma/Blood/Serum", "362",
      "VSSOS", "Vol Dist Steady State Obs Norm by SA", "VSSOS", "Plasma/Blood/Serum", "363",
      "VSSOUB", "Vol Dist Steady State Obs by UB", "VSSOUB", "Plasma/Blood/Serum", "364",
      "VSSOW", "Vol Dist Steady State Obs Norm by WT", "VSSOW", "Plasma/Blood/Serum", "365",
      "VSSPB", "Vol Dist Steady State Pred Norm by BMI", "VSSPB", "Plasma/Blood/Serum", "366",
      "VSSPBD", "Vol Dist Steady State Pred by B", "VSSPBD", "Plasma/Blood/Serum", "367",
      "VSSPD", "Vol Dist Steady State Pred Norm by Dose", "VSSPD", "Plasma/Blood/Serum", "368",
      "VSSPF", "Vol Dist Steady State Pred by F", "VSSPF", "Plasma/Blood/Serum", "369",
      "VSSPS", "Vol Dist Steady State Pred Norm by SA", "VSSPS", "Plasma/Blood/Serum", "370",
      "VSSPUB", "Vol Dist Steady State Pred by UB", "VSSPUB", "Plasma/Blood/Serum", "371",
      "VSSPW", "Vol Dist Steady State Pred Norm by WT", "VSSPW", "Plasma/Blood/Serum", "372",
      "VZ", "Vol Z", "Vz", "Plasma/Blood/Serum", "373",
      "VZF", "Vol Z by F", "Vz_F", "Plasma/Blood/Serum", "374",
      "VZFOB", "Vz Obs by F Norm by BMI", "VZFOB", "Plasma/Blood/Serum", "375",
      "VZFOD", "Vz Obs by F Norm by Dose", "VZFOD", "Plasma/Blood/Serum", "376",
      "VZFOS", "Vz Obs by F Norm by SA", "VZFOS", "Plasma/Blood/Serum", "377",
      "VZFOUB", "Vz Obs by F for UB", "VZFOUB", "Plasma/Blood/Serum", "378",
      "VZFOW", "Vz Obs by F Norm by WT", "VZFOW", "Plasma/Blood/Serum", "379",
      "VZFPB", "Vz Pred by F Norm by BMI", "VZFPB", "Plasma/Blood/Serum", "380",
      "VZFPD", "Vz Pred by F Norm by Dose", "VZFPD", "Plasma/Blood/Serum", "381",
      "VZFPS", "Vz Pred by F Norm by SA", "VZFPS", "Plasma/Blood/Serum", "382",
      "VZFPUB", "Vz Pred by F for UB", "VZFPUB", "Plasma/Blood/Serum", "383",
      "VZFPW", "Vz Pred by F Norm by WT", "VZFPW", "Plasma/Blood/Serum", "384",
      "VZFTAU", "Vz for Dose Int by F", "VZFTAU", "Plasma/Blood/Serum", "385",
      "VZFTAUB", "Vz for Dose Int by F Norm by BMI", "VZFTAUB", "Plasma/Blood/Serum", "386",
      "VZFTAUD", "Vz for Dose Int by F Norm by Dose", "VZFTAUD", "Plasma/Blood/Serum", "387",
      "VZFTAUS", "Vz for Dose Int by F Norm by SA", "VZFTAUS", "Plasma/Blood/Serum", "388",
      "VZFTAUW", "Vz for Dose Int by F Norm by WT", "VZFTAUW", "Plasma/Blood/Serum", "389",
      "VZOB", "Vz Obs Norm by BMI", "VZOB", "Plasma/Blood/Serum", "390",
      "VZOD", "Vz Obs Norm by Dose", "VZOD", "Plasma/Blood/Serum", "391",
      "VZOS", "Vz Obs Norm by SA", "VZOS", "Plasma/Blood/Serum", "392",
      "VZOUB", "Vz Obs for UB", "VZOUB", "Plasma/Blood/Serum", "393",
      "VZOW", "Vz Obs Norm by WT", "VZOW", "Plasma/Blood/Serum", "394",
      "VZPB", "Vz Pred Norm by BMI", "VZPB", "Plasma/Blood/Serum", "395",
      "VZPD", "Vz Pred Norm by Dose", "VZPD", "Plasma/Blood/Serum", "396",
      "VZPS", "Vz Pred Norm by SA", "VZPS", "Plasma/Blood/Serum", "397",
      "VZPUB", "Vz Pred for UB", "VZPUB", "Plasma/Blood/Serum", "398"
    ),
    ncol = 5,
    byrow = TRUE
  ))
  colnames(pk_dataset) <- c("PARAMCD", "PARAM", "TLG_DISPLAY", "MATRIX", "TLG_ORDER")
  pk_dataset
}

#' Control function for incidence rate
#'
#' @description `r lifecycle::badge("stable")`
#'
#' This is an auxiliary function for controlling arguments for the incidence rate, used
#' internally to specify details in `s_incidence_rate()`.
#'
#' @inheritParams argument_convention
#' @param conf_type (`string`)\cr `normal` (default), `normal_log`, `exact`, or `byar`
#'   for confidence interval type.
#' @param input_time_unit (`string`)\cr `day`, `week`, `month`, or `year` (default)
#'   indicating time unit for data input.
#' @param num_pt_year (`numeric`)\cr number of patient-years to use when calculating adverse event rates.
#' @param time_unit_input `r lifecycle::badge("deprecated")` Please use the `input_time_unit` argument instead.
#' @param time_unit_output `r lifecycle::badge("deprecated")` Please use the `num_pt_year` argument instead.
#'
#' @return A list of components with the same names as the arguments.
#'
#' @seealso [incidence_rate]
#'
#' @examples
#' control_incidence_rate(0.9, "exact", "month", 100)
#'
#' @export
control_incidence_rate <- function(conf_level = 0.95,
                                   conf_type = c("normal", "normal_log", "exact", "byar"),
                                   input_time_unit = c("year", "day", "week", "month"),
                                   num_pt_year = 100,
                                   time_unit_input = lifecycle::deprecated(),
                                   time_unit_output = lifecycle::deprecated()) {
  if (lifecycle::is_present(time_unit_input)) {
    lifecycle::deprecate_warn(
      "0.8.3", "control_incidence_rate(time_unit_input)", "control_incidence_rate(input_time_unit)"
    )
    input_time_unit <- time_unit_input
  }
  if (lifecycle::is_present(time_unit_output)) {
    lifecycle::deprecate_warn(
      "0.8.3", "control_incidence_rate(time_unit_output)", "control_incidence_rate(num_pt_year)"
    )
    num_pt_year <- time_unit_output
  }

  conf_type <- match.arg(conf_type)
  input_time_unit <- match.arg(input_time_unit)
  checkmate::assert_number(num_pt_year)
  assert_proportion_value(conf_level)

  list(
    conf_level = conf_level,
    conf_type = conf_type,
    input_time_unit = input_time_unit,
    num_pt_year = num_pt_year
  )
}

1		#' Difference Test for Two Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Various tests were implemented to test the difference between two proportions.
6		#'
7		#' @inheritParams argument_convention
8		#' @param method (`string`)\cr one of `chisq`, `cmh`, `fisher`, or `schouten`; specifies the test used
9		#' to calculate the p-value.
10		#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("test_proportion_diff")`
11		#' to see available statistics for this function.
12		#'
13		#' @seealso [h_prop_diff_test]
14		#'
15		#' @name prop_diff_test
16		#' @order 1
17		NULL
18
19		#' @describeIn prop_diff_test Statistics function which tests the difference between two proportions.
20		#'
21		#' @return
22		#' * `s_test_proportion_diff()` returns a named `list` with a single item `pval` with an attribute `label`
23		#' describing the method used. The p-value tests the null hypothesis that proportions in two groups are the same.
24		#'
25		#' @keywords internal
26		s_test_proportion_diff <- function(df,
27		.var,
28		.ref_group,
29		.in_ref_col,
30		variables = list(strata = NULL),
31		method = c("chisq", "schouten", "fisher", "cmh")) {
32	35x	method <- match.arg(method)
33	35x	y <- list(pval = "")
34
35	35x	if (!.in_ref_col) {
36	35x	assert_df_with_variables(df, list(rsp = .var))
37	35x	assert_df_with_variables(.ref_group, list(rsp = .var))
38	35x	rsp <- factor(
39	35x	c(.ref_group[[.var]], df[[.var]]),
40	35x	levels = c("TRUE", "FALSE")
41		)
42	35x	grp <- factor(
43	35x	rep(c("ref", "Not-ref"), c(nrow(.ref_group), nrow(df))),
44	35x	levels = c("ref", "Not-ref")
45		)
46
47	35x	if (!is.null(variables$strata) \|\| method == "cmh") {
48	12x	strata <- variables$strata
49	12x	checkmate::assert_false(is.null(strata))
50	12x	strata_vars <- stats::setNames(as.list(strata), strata)
51	12x	assert_df_with_variables(df, strata_vars)
52	12x	assert_df_with_variables(.ref_group, strata_vars)
53	12x	strata <- c(interaction(.ref_group[strata]), interaction(df[strata]))
54		}
55
56	35x	tbl <- switch(method,
57	35x	cmh = table(grp, rsp, strata),
58	35x	table(grp, rsp)
59		)
60
61	35x	y$pval <- switch(method,
62	35x	chisq = prop_chisq(tbl),
63	35x	cmh = prop_cmh(tbl),
64	35x	fisher = prop_fisher(tbl),
65	35x	schouten = prop_schouten(tbl)
66		)
67		}
68
69	35x	y$pval <- formatters::with_label(y$pval, d_test_proportion_diff(method))
70	35x	y
71		}
72
73		#' Description of the Difference Test Between Two Proportions
74		#'
75		#' @description `r lifecycle::badge("stable")`
76		#'
77		#' This is an auxiliary function that describes the analysis in `s_test_proportion_diff`.
78		#'
79		#' @inheritParams s_test_proportion_diff
80		#'
81		#' @return `string` describing the test from which the p-value is derived.
82		#'
83		#' @export
84		d_test_proportion_diff <- function(method) {
85	49x	checkmate::assert_string(method)
86	49x	meth_part <- switch(method,
87	49x	"schouten" = "Chi-Squared Test with Schouten Correction",
88	49x	"chisq" = "Chi-Squared Test",
89	49x	"cmh" = "Cochran-Mantel-Haenszel Test",
90	49x	"fisher" = "Fisher's Exact Test",
91	49x	stop(paste(method, "does not have a description"))
92		)
93	49x	paste0("p-value (", meth_part, ")")
94		}
95
96		#' @describeIn prop_diff_test Formatted analysis function which is used as `afun` in `test_proportion_diff()`.
97		#'
98		#' @return
99		#' * `a_test_proportion_diff()` returns the corresponding list with formatted [rtables::CellValue()].
100		#'
101		#' @keywords internal
102		a_test_proportion_diff <- make_afun(
103		s_test_proportion_diff,
104		.formats = c(pval = "x.xxxx \| (<0.0001)"),
105		.indent_mods = c(pval = 1L)
106		)
107
108		#' @describeIn prop_diff_test Layout-creating function which can take statistics function arguments
109		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
110		#'
111		#' @return
112		#' * `test_proportion_diff()` returns a layout object suitable for passing to further layouting functions,
113		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
114		#' the statistics from `s_test_proportion_diff()` to the table layout.
115		#'
116		#' @examples
117		#' dta <- data.frame(
118		#' rsp = sample(c(TRUE, FALSE), 100, TRUE),
119		#' grp = factor(rep(c("A", "B"), each = 50)),
120		#' strat = factor(rep(c("V", "W", "X", "Y", "Z"), each = 20))
121		#' )
122		#'
123		#' # With `rtables` pipelines.
124		#' l <- basic_table() %>%
125		#' split_cols_by(var = "grp", ref_group = "B") %>%
126		#' test_proportion_diff(
127		#' vars = "rsp",
128		#' method = "cmh", variables = list(strata = "strat")
129		#' )
130		#'
131		#' build_table(l, df = dta)
132		#'
133		#' @export
134		#' @order 2
135		test_proportion_diff <- function(lyt,
136		vars,
137		variables = list(strata = NULL),
138		method = c("chisq", "schouten", "fisher", "cmh"),
139		na_str = default_na_str(),
140		nested = TRUE,
141		...,
142		var_labels = vars,
143		show_labels = "hidden",
144		table_names = vars,
145		.stats = NULL,
146		.formats = NULL,
147		.labels = NULL,
148		.indent_mods = NULL) {
149	6x	extra_args <- list(variables = variables, method = method, ...)
150
151	6x	afun <- make_afun(
152	6x	a_test_proportion_diff,
153	6x	.stats = .stats,
154	6x	.formats = .formats,
155	6x	.labels = .labels,
156	6x	.indent_mods = .indent_mods
157		)
158	6x	analyze(
159	6x	lyt,
160	6x	vars,
161	6x	afun = afun,
162	6x	var_labels = var_labels,
163	6x	na_str = na_str,
164	6x	nested = nested,
165	6x	extra_args = extra_args,
166	6x	show_labels = show_labels,
167	6x	table_names = table_names
168		)
169		}
170
171		#' Helper Functions to Test Proportion Differences
172		#'
173		#' Helper functions to implement various tests on the difference between two proportions.
174		#'
175		#' @param tbl (`matrix`)\cr matrix with two groups in rows and the binary response (`TRUE`/`FALSE`) in columns.
176		#'
177		#' @return A p-value.
178		#'
179		#' @seealso [prop_diff_test()] for implementation of these helper functions.
180		#'
181		#' @name h_prop_diff_test
182		NULL
183
184		#' @describeIn h_prop_diff_test performs Chi-Squared test. Internally calls [stats::prop.test()].
185		#'
186		#' @keywords internal
187		prop_chisq <- function(tbl) {
188	30x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
189	30x	tbl <- tbl[, c("TRUE", "FALSE")]
190	30x	if (any(colSums(tbl) == 0)) {
191	2x	return(1)
192		}
193	28x	stats::prop.test(tbl, correct = FALSE)$p.value
194		}
195
196		#' @describeIn h_prop_diff_test performs stratified Cochran-Mantel-Haenszel test. Internally calls
197		#' [stats::mantelhaen.test()]. Note that strata with less than two observations are automatically discarded.
198		#'
199		#' @param ary (`array`, 3 dimensions)\cr array with two groups in rows, the binary response
200		#' (`TRUE`/`FALSE`) in columns, and the strata in the third dimension.
201		#'
202		#' @keywords internal
203		prop_cmh <- function(ary) {
204	16x	checkmate::assert_array(ary)
205	16x	checkmate::assert_integer(c(ncol(ary), nrow(ary)), lower = 2, upper = 2)
206	16x	checkmate::assert_integer(length(dim(ary)), lower = 3, upper = 3)
207	16x	strata_sizes <- apply(ary, MARGIN = 3, sum)
208	16x	if (any(strata_sizes < 5)) {
209	1x	warning("<5 data points in some strata. CMH test may be incorrect.")
210	1x	ary <- ary[, , strata_sizes > 1]
211		}
212
213	16x	stats::mantelhaen.test(ary, correct = FALSE)$p.value
214		}
215
216		#' @describeIn h_prop_diff_test performs the Chi-Squared test with Schouten correction.
217		#'
218		#' @seealso Schouten correction is based upon \insertCite{Schouten1980-kd;textual}{tern}.
219		#'
220		#' @keywords internal
221		prop_schouten <- function(tbl) {
222	100x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
223	100x	tbl <- tbl[, c("TRUE", "FALSE")]
224	100x	if (any(colSums(tbl) == 0)) {
225	1x	return(1)
226		}
227
228	99x	n <- sum(tbl)
229	99x	n1 <- sum(tbl[1, ])
230	99x	n2 <- sum(tbl[2, ])
231
232	99x	ad <- diag(tbl)
233	99x	bc <- diag(apply(tbl, 2, rev))
234	99x	ac <- tbl[, 1]
235	99x	bd <- tbl[, 2]
236
237	99x	t_schouten <- (n - 1) *
238	99x	(abs(prod(ad) - prod(bc)) - 0.5 * min(n1, n2))^2 /
239	99x	(n1 * n2 * sum(ac) * sum(bd))
240
241	99x	1 - stats::pchisq(t_schouten, df = 1)
242		}
243
244		#' @describeIn h_prop_diff_test performs the Fisher's exact test. Internally calls [stats::fisher.test()].
245		#'
246		#' @keywords internal
247		prop_fisher <- function(tbl) {
248	2x	checkmate::assert_integer(c(ncol(tbl), nrow(tbl)), lower = 2, upper = 2)
249	2x	tbl <- tbl[, c("TRUE", "FALSE")]
250	2x	stats::fisher.test(tbl)$p.value
251		}

1		#' Estimation of Proportions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Estimate the proportion of responders within a studied population.
6		#'
7		#' @inheritParams prop_strat_wilson
8		#' @inheritParams argument_convention
9		#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("estimate_proportion")`
10		#' to see available statistics for this function.
11		#' @param method (`string`)\cr the method used to construct the confidence interval
12		#' for proportion of successful outcomes; one of `waldcc`, `wald`, `clopper-pearson`,
13		#' `wilson`, `wilsonc`, `strat_wilson`, `strat_wilsonc`, `agresti-coull` or `jeffreys`.
14		#' @param long (`flag`)\cr a long description is required.
15		#'
16		#' @seealso [h_proportions]
17		#'
18		#' @name estimate_proportions
19		#' @order 1
20		NULL
21
22		#' @describeIn estimate_proportions Statistics function estimating a
23		#' proportion along with its confidence interval.
24		#'
25		#' @param df (`logical` or `data.frame`)\cr if only a logical vector is used,
26		#' it indicates whether each subject is a responder or not. `TRUE` represents
27		#' a successful outcome. If a `data.frame` is provided, also the `strata` variable
28		#' names must be provided in `variables` as a list element with the strata strings.
29		#' In the case of `data.frame`, the logical vector of responses must be indicated as a
30		#' variable name in `.var`.
31		#'
32		#' @return
33		#' * `s_proportion()` returns statistics `n_prop` (`n` and proportion) and `prop_ci` (proportion CI) for a
34		#' given variable.
35		#'
36		#' @examples
37		#' # Case with only logical vector.
38		#' rsp_v <- c(1, 0, 1, 0, 1, 1, 0, 0)
39		#' s_proportion(rsp_v)
40		#'
41		#' # Example for Stratified Wilson CI
42		#' nex <- 100 # Number of example rows
43		#' dta <- data.frame(
44		#' "rsp" = sample(c(TRUE, FALSE), nex, TRUE),
45		#' "grp" = sample(c("A", "B"), nex, TRUE),
46		#' "f1" = sample(c("a1", "a2"), nex, TRUE),
47		#' "f2" = sample(c("x", "y", "z"), nex, TRUE),
48		#' stringsAsFactors = TRUE
49		#' )
50		#'
51		#' s_proportion(
52		#' df = dta,
53		#' .var = "rsp",
54		#' variables = list(strata = c("f1", "f2")),
55		#' conf_level = 0.90,
56		#' method = "strat_wilson"
57		#' )
58		#'
59		#' @export
60		s_proportion <- function(df,
61		.var,
62		conf_level = 0.95,
63		method = c(
64		"waldcc", "wald", "clopper-pearson",
65		"wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
66		"agresti-coull", "jeffreys"
67		),
68		weights = NULL,
69		max_iterations = 50,
70		variables = list(strata = NULL),
71		long = FALSE) {
72	135x	method <- match.arg(method)
73	135x	checkmate::assert_flag(long)
74	135x	assert_proportion_value(conf_level)
75
76	135x	if (!is.null(variables$strata)) {
77		# Checks for strata
78	!	if (missing(df)) stop("When doing stratified analysis a data.frame with specific columns is needed.")
79	!	strata_colnames <- variables$strata
80	!	checkmate::assert_character(strata_colnames, null.ok = FALSE)
81	!	strata_vars <- stats::setNames(as.list(strata_colnames), strata_colnames)
82	!	assert_df_with_variables(df, strata_vars)
83
84	!	strata <- interaction(df[strata_colnames])
85	!	strata <- as.factor(strata)
86
87		# Pushing down checks to prop_strat_wilson
88	135x	} else if (checkmate::test_subset(method, c("strat_wilson", "strat_wilsonc"))) {
89	!	stop("To use stratified methods you need to specify the strata variables.")
90		}
91	135x	if (checkmate::test_atomic_vector(df)) {
92	135x	rsp <- as.logical(df)
93		} else {
94	!	rsp <- as.logical(df[[.var]])
95		}
96	135x	n <- sum(rsp)
97	135x	p_hat <- mean(rsp)
98
99	135x	prop_ci <- switch(method,
100	135x	"clopper-pearson" = prop_clopper_pearson(rsp, conf_level),
101	135x	"wilson" = prop_wilson(rsp, conf_level),
102	135x	"wilsonc" = prop_wilson(rsp, conf_level, correct = TRUE),
103	135x	"strat_wilson" = prop_strat_wilson(rsp,
104	135x	strata,
105	135x	weights,
106	135x	conf_level,
107	135x	max_iterations,
108	135x	correct = FALSE
109	135x	)$conf_int,
110	135x	"strat_wilsonc" = prop_strat_wilson(rsp,
111	135x	strata,
112	135x	weights,
113	135x	conf_level,
114	135x	max_iterations,
115	135x	correct = TRUE
116	135x	)$conf_int,
117	135x	"wald" = prop_wald(rsp, conf_level),
118	135x	"waldcc" = prop_wald(rsp, conf_level, correct = TRUE),
119	135x	"agresti-coull" = prop_agresti_coull(rsp, conf_level),
120	135x	"jeffreys" = prop_jeffreys(rsp, conf_level)
121		)
122
123	135x	list(
124	135x	"n_prop" = formatters::with_label(c(n, p_hat), "Responders"),
125	135x	"prop_ci" = formatters::with_label(
126	135x	x = 100 * prop_ci, label = d_proportion(conf_level, method, long = long)
127		)
128		)
129		}
130
131		#' @describeIn estimate_proportions Formatted analysis function which is used as `afun`
132		#' in `estimate_proportion()`.
133		#'
134		#' @return
135		#' * `a_proportion()` returns the corresponding list with formatted [rtables::CellValue()].
136		#'
137		#' @export
138		a_proportion <- make_afun(
139		s_proportion,
140		.formats = c(n_prop = "xx (xx.x%)", prop_ci = "(xx.x, xx.x)")
141		)
142
143		#' @describeIn estimate_proportions Layout-creating function which can take statistics function arguments
144		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
145		#'
146		#' @return
147		#' * `estimate_proportion()` returns a layout object suitable for passing to further layouting functions,
148		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
149		#' the statistics from `s_proportion()` to the table layout.
150		#'
151		#' @examples
152		#' dta_test <- data.frame(
153		#' USUBJID = paste0("S", 1:12),
154		#' ARM = rep(LETTERS[1:3], each = 4),
155		#' AVAL = c(A = c(1, 1, 1, 1), B = c(0, 0, 1, 1), C = c(0, 0, 0, 0))
156		#' )
157		#'
158		#' basic_table() %>%
159		#' split_cols_by("ARM") %>%
160		#' estimate_proportion(vars = "AVAL") %>%
161		#' build_table(df = dta_test)
162		#'
163		#' @export
164		#' @order 2
165		estimate_proportion <- function(lyt,
166		vars,
167		conf_level = 0.95,
168		method = c(
169		"waldcc", "wald", "clopper-pearson",
170		"wilson", "wilsonc", "strat_wilson", "strat_wilsonc",
171		"agresti-coull", "jeffreys"
172		),
173		weights = NULL,
174		max_iterations = 50,
175		variables = list(strata = NULL),
176		long = FALSE,
177		na_str = default_na_str(),
178		nested = TRUE,
179		...,
180		show_labels = "hidden",
181		table_names = vars,
182		.stats = NULL,
183		.formats = NULL,
184		.labels = NULL,
185		.indent_mods = NULL) {
186	3x	extra_args <- list(
187	3x	conf_level = conf_level, method = method, weights = weights, max_iterations = max_iterations,
188	3x	variables = variables, long = long, ...
189		)
190
191	3x	afun <- make_afun(
192	3x	a_proportion,
193	3x	.stats = .stats,
194	3x	.formats = .formats,
195	3x	.labels = .labels,
196	3x	.indent_mods = .indent_mods
197		)
198	3x	analyze(
199	3x	lyt,
200	3x	vars,
201	3x	afun = afun,
202	3x	na_str = na_str,
203	3x	nested = nested,
204	3x	extra_args = extra_args,
205	3x	show_labels = show_labels,
206	3x	table_names = table_names
207		)
208		}
209
210		#' Helper Functions for Calculating Proportion Confidence Intervals
211		#'
212		#' @description `r lifecycle::badge("stable")`
213		#'
214		#' Functions to calculate different proportion confidence intervals for use in [estimate_proportion()].
215		#'
216		#' @inheritParams argument_convention
217		#' @inheritParams estimate_proportions
218		#'
219		#' @return Confidence interval of a proportion.
220		#'
221		#' @seealso [estimate_proportions], descriptive function [d_proportion()],
222		#' and helper functions [strata_normal_quantile()] and [update_weights_strat_wilson()].
223		#'
224		#' @name h_proportions
225		NULL
226
227		#' @describeIn h_proportions Calculates the Wilson interval by calling [stats::prop.test()].
228		#' Also referred to as Wilson score interval.
229		#'
230		#' @examples
231		#' rsp <- c(
232		#' TRUE, TRUE, TRUE, TRUE, TRUE,
233		#' FALSE, FALSE, FALSE, FALSE, FALSE
234		#' )
235		#' prop_wilson(rsp, conf_level = 0.9)
236		#'
237		#' @export
238		prop_wilson <- function(rsp, conf_level, correct = FALSE) {
239	5x	y <- stats::prop.test(
240	5x	sum(rsp),
241	5x	length(rsp),
242	5x	correct = correct,
243	5x	conf.level = conf_level
244		)
245
246	5x	as.numeric(y$conf.int)
247		}
248
249		#' @describeIn h_proportions Calculates the stratified Wilson confidence
250		#' interval for unequal proportions as described in \insertCite{Yan2010-jt;textual}{tern}
251		#'
252		#' @param strata (`factor`)\cr variable with one level per stratum and same length as `rsp`.
253		#' @param weights (`numeric` or `NULL`)\cr weights for each level of the strata. If `NULL`, they are
254		#' estimated using the iterative algorithm proposed in \insertCite{Yan2010-jt;textual}{tern} that
255		#' minimizes the weighted squared length of the confidence interval.
256		#' @param max_iterations (`count`)\cr maximum number of iterations for the iterative procedure used
257		#' to find estimates of optimal weights.
258		#' @param correct (`flag`)\cr include the continuity correction. For further information, see for example
259		#' [stats::prop.test()].
260		#'
261		#' @references
262		#' \insertRef{Yan2010-jt}{tern}
263		#'
264		#' @examples
265		#' # Stratified Wilson confidence interval with unequal probabilities
266		#'
267		#' set.seed(1)
268		#' rsp <- sample(c(TRUE, FALSE), 100, TRUE)
269		#' strata_data <- data.frame(
270		#' "f1" = sample(c("a", "b"), 100, TRUE),
271		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
272		#' stringsAsFactors = TRUE
273		#' )
274		#' strata <- interaction(strata_data)
275		#' n_strata <- ncol(table(rsp, strata)) # Number of strata
276		#'
277		#' prop_strat_wilson(
278		#' rsp = rsp, strata = strata,
279		#' conf_level = 0.90
280		#' )
281		#'
282		#' # Not automatic setting of weights
283		#' prop_strat_wilson(
284		#' rsp = rsp, strata = strata,
285		#' weights = rep(1 / n_strata, n_strata),
286		#' conf_level = 0.90
287		#' )
288		#'
289		#' @export
290		prop_strat_wilson <- function(rsp,
291		strata,
292		weights = NULL,
293		conf_level = 0.95,
294		max_iterations = NULL,
295		correct = FALSE) {
296	20x	checkmate::assert_logical(rsp, any.missing = FALSE)
297	20x	checkmate::assert_factor(strata, len = length(rsp))
298	20x	assert_proportion_value(conf_level)
299
300	20x	tbl <- table(rsp, strata)
301	20x	n_strata <- length(unique(strata))
302
303		# Checking the weights and maximum number of iterations.
304	20x	do_iter <- FALSE
305	20x	if (is.null(weights)) {
306	6x	weights <- rep(1 / n_strata, n_strata) # Initialization for iterative procedure
307	6x	do_iter <- TRUE
308
309		# Iteration parameters
310	2x	if (is.null(max_iterations)) max_iterations <- 10
311	6x	checkmate::assert_int(max_iterations, na.ok = FALSE, null.ok = FALSE, lower = 1)
312		}
313	20x	checkmate::assert_numeric(weights, lower = 0, upper = 1, any.missing = FALSE, len = n_strata)
314	20x	sum_weights <- checkmate::assert_int(sum(weights))
315	!	if (as.integer(sum_weights + 0.5) != 1L) stop("Sum of weights must be 1L.")
316
317
318	20x	xs <- tbl["TRUE", ]
319	20x	ns <- colSums(tbl)
320	20x	use_stratum <- (ns > 0)
321	20x	ns <- ns[use_stratum]
322	20x	xs <- xs[use_stratum]
323	20x	ests <- xs / ns
324	20x	vars <- ests * (1 - ests) / ns
325
326	20x	strata_qnorm <- strata_normal_quantile(vars, weights, conf_level)
327
328		# Iterative setting of weights if they were not set externally
329	20x	weights_new <- if (do_iter) {
330	6x	update_weights_strat_wilson(vars, strata_qnorm, weights, ns, max_iterations, conf_level)$weights
331		} else {
332	14x	weights
333		}
334
335	20x	strata_conf_level <- 2 * stats::pnorm(strata_qnorm) - 1
336
337	20x	ci_by_strata <- Map(
338	20x	function(x, n) {
339		# Classic Wilson's confidence interval
340	139x	suppressWarnings(stats::prop.test(x, n, correct = correct, conf.level = strata_conf_level)$conf.int)
341		},
342	20x	x = xs,
343	20x	n = ns
344		)
345	20x	lower_by_strata <- sapply(ci_by_strata, "[", 1L)
346	20x	upper_by_strata <- sapply(ci_by_strata, "[", 2L)
347
348	20x	lower <- sum(weights_new * lower_by_strata)
349	20x	upper <- sum(weights_new * upper_by_strata)
350
351		# Return values
352	20x	if (do_iter) {
353	6x	list(
354	6x	conf_int = c(
355	6x	lower = lower,
356	6x	upper = upper
357		),
358	6x	weights = weights_new
359		)
360		} else {
361	14x	list(
362	14x	conf_int = c(
363	14x	lower = lower,
364	14x	upper = upper
365		)
366		)
367		}
368		}
369
370		#' @describeIn h_proportions Calculates the Clopper-Pearson interval by calling [stats::binom.test()].
371		#' Also referred to as the `exact` method.
372		#'
373		#' @examples
374		#' prop_clopper_pearson(rsp, conf_level = .95)
375		#'
376		#' @export
377		prop_clopper_pearson <- function(rsp,
378		conf_level) {
379	1x	y <- stats::binom.test(
380	1x	x = sum(rsp),
381	1x	n = length(rsp),
382	1x	conf.level = conf_level
383		)
384	1x	as.numeric(y$conf.int)
385		}
386
387		#' @describeIn h_proportions Calculates the Wald interval by following the usual textbook definition
388		#' for a single proportion confidence interval using the normal approximation.
389		#'
390		#' @param correct (`flag`)\cr apply continuity correction.
391		#'
392		#' @examples
393		#' prop_wald(rsp, conf_level = 0.95)
394		#' prop_wald(rsp, conf_level = 0.95, correct = TRUE)
395		#'
396		#' @export
397		prop_wald <- function(rsp, conf_level, correct = FALSE) {
398	132x	n <- length(rsp)
399	132x	p_hat <- mean(rsp)
400	132x	z <- stats::qnorm((1 + conf_level) / 2)
401	132x	q_hat <- 1 - p_hat
402	132x	correct <- if (correct) 1 / (2 * n) else 0
403
404	132x	err <- z * sqrt(p_hat * q_hat) / sqrt(n) + correct
405	132x	l_ci <- max(0, p_hat - err)
406	132x	u_ci <- min(1, p_hat + err)
407
408	132x	c(l_ci, u_ci)
409		}
410
411		#' @describeIn h_proportions Calculates the `Agresti-Coull` interval (created by `Alan Agresti` and `Brent Coull`) by
412		#' (for 95% CI) adding two successes and two failures to the data and then using the Wald formula to construct a CI.
413		#'
414		#' @examples
415		#' prop_agresti_coull(rsp, conf_level = 0.95)
416		#'
417		#' @export
418		prop_agresti_coull <- function(rsp, conf_level) {
419	2x	n <- length(rsp)
420	2x	x_sum <- sum(rsp)
421	2x	z <- stats::qnorm((1 + conf_level) / 2)
422
423		# Add here both z^2 / 2 successes and failures.
424	2x	x_sum_tilde <- x_sum + z^2 / 2
425	2x	n_tilde <- n + z^2
426
427		# Then proceed as with the Wald interval.
428	2x	p_tilde <- x_sum_tilde / n_tilde
429	2x	q_tilde <- 1 - p_tilde
430	2x	err <- z * sqrt(p_tilde * q_tilde) / sqrt(n_tilde)
431	2x	l_ci <- max(0, p_tilde - err)
432	2x	u_ci <- min(1, p_tilde + err)
433
434	2x	c(l_ci, u_ci)
435		}
436
437		#' @describeIn h_proportions Calculates the Jeffreys interval, an equal-tailed interval based on the
438		#' non-informative Jeffreys prior for a binomial proportion.
439		#'
440		#' @examples
441		#' prop_jeffreys(rsp, conf_level = 0.95)
442		#'
443		#' @export
444		prop_jeffreys <- function(rsp,
445		conf_level) {
446	4x	n <- length(rsp)
447	4x	x_sum <- sum(rsp)
448
449	4x	alpha <- 1 - conf_level
450	4x	l_ci <- ifelse(
451	4x	x_sum == 0,
452	4x	0,
453	4x	stats::qbeta(alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
454		)
455
456	4x	u_ci <- ifelse(
457	4x	x_sum == n,
458	4x	1,
459	4x	stats::qbeta(1 - alpha / 2, x_sum + 0.5, n - x_sum + 0.5)
460		)
461
462	4x	c(l_ci, u_ci)
463		}
464
465		#' Description of the Proportion Summary
466		#'
467		#' @description `r lifecycle::badge("stable")`
468		#'
469		#' This is a helper function that describes the analysis in [s_proportion()].
470		#'
471		#' @inheritParams s_proportion
472		#' @param long (`flag`)\cr whether a long or a short (default) description is required.
473		#'
474		#' @return String describing the analysis.
475		#'
476		#' @export
477		d_proportion <- function(conf_level,
478		method,
479		long = FALSE) {
480	147x	label <- paste0(conf_level * 100, "% CI")
481
482	!	if (long) label <- paste(label, "for Response Rates")
483
484	147x	method_part <- switch(method,
485	147x	"clopper-pearson" = "Clopper-Pearson",
486	147x	"waldcc" = "Wald, with correction",
487	147x	"wald" = "Wald, without correction",
488	147x	"wilson" = "Wilson, without correction",
489	147x	"strat_wilson" = "Stratified Wilson, without correction",
490	147x	"wilsonc" = "Wilson, with correction",
491	147x	"strat_wilsonc" = "Stratified Wilson, with correction",
492	147x	"agresti-coull" = "Agresti-Coull",
493	147x	"jeffreys" = "Jeffreys",
494	147x	stop(paste(method, "does not have a description"))
495		)
496
497	147x	paste0(label, " (", method_part, ")")
498		}
499
500		#' Helper Function for the Estimation of Stratified Quantiles
501		#'
502		#' @description `r lifecycle::badge("stable")`
503		#'
504		#' This function wraps the estimation of stratified percentiles when we assume
505		#' the approximation for large numbers. This is necessary only in the case
506		#' proportions for each strata are unequal.
507		#'
508		#' @inheritParams argument_convention
509		#' @inheritParams prop_strat_wilson
510		#'
511		#' @return Stratified quantile.
512		#'
513		#' @seealso [prop_strat_wilson()]
514		#'
515		#' @examples
516		#' strata_data <- table(data.frame(
517		#' "f1" = sample(c(TRUE, FALSE), 100, TRUE),
518		#' "f2" = sample(c("x", "y", "z"), 100, TRUE),
519		#' stringsAsFactors = TRUE
520		#' ))
521		#' ns <- colSums(strata_data)
522		#' ests <- strata_data["TRUE", ] / ns
523		#' vars <- ests * (1 - ests) / ns
524		#' weights <- rep(1 / length(ns), length(ns))
525		#'
526		#' strata_normal_quantile(vars, weights, 0.95)
527		#'
528		#' @export
529		strata_normal_quantile <- function(vars, weights, conf_level) {
530	41x	summands <- weights^2 * vars
531		# Stratified quantile
532	41x	sqrt(sum(summands)) / sum(sqrt(summands)) * stats::qnorm((1 + conf_level) / 2)
533		}
534
535		#' Helper Function for the Estimation of Weights for `prop_strat_wilson`
536		#'
537		#' @description `r lifecycle::badge("stable")`
538		#'
539		#' This function wraps the iteration procedure that allows you to estimate
540		#' the weights for each proportional strata. This assumes to minimize the
541		#' weighted squared length of the confidence interval.
542		#'
543		#' @inheritParams prop_strat_wilson
544		#' @param vars (`numeric`)\cr normalized proportions for each strata.
545		#' @param strata_qnorm (`numeric`)\cr initial estimation with identical weights of the quantiles.
546		#' @param initial_weights (`numeric`)\cr initial weights used to calculate `strata_qnorm`. This can
547		#' be optimized in the future if we need to estimate better initial weights.
548		#' @param n_per_strata (`numeric`)\cr number of elements in each strata.
549		#' @param max_iterations (`count`)\cr maximum number of iterations to be tried. Convergence is always checked.
550		#' @param tol (`number`)\cr tolerance threshold for convergence.
551		#'
552		#' @return A `list` of 3 elements: `n_it`, `weights`, and `diff_v`.
553		#'
554		#' @seealso For references and details see [prop_strat_wilson()].
555		#'
556		#' @examples
557		#' vs <- c(0.011, 0.013, 0.012, 0.014, 0.017, 0.018)
558		#' sq <- 0.674
559		#' ws <- rep(1 / length(vs), length(vs))
560		#' ns <- c(22, 18, 17, 17, 14, 12)
561		#'
562		#' update_weights_strat_wilson(vs, sq, ws, ns, 100, 0.95, 0.001)
563		#'
564		#' @export
565		update_weights_strat_wilson <- function(vars,
566		strata_qnorm,
567		initial_weights,
568		n_per_strata,
569		max_iterations = 50,
570		conf_level = 0.95,
571		tol = 0.001) {
572	8x	it <- 0
573	8x	diff_v <- NULL
574
575	8x	while (it < max_iterations) {
576	19x	it <- it + 1
577	19x	weights_new_t <- (1 + strata_qnorm^2 / n_per_strata)^2
578	19x	weights_new_b <- (vars + strata_qnorm^2 / (4 * n_per_strata^2))
579	19x	weights_new <- weights_new_t / weights_new_b
580	19x	weights_new <- weights_new / sum(weights_new)
581	19x	strata_qnorm <- strata_normal_quantile(vars, weights_new, conf_level)
582	19x	diff_v <- c(diff_v, sum(abs(weights_new - initial_weights)))
583	8x	if (diff_v[length(diff_v)] < tol) break
584	11x	initial_weights <- weights_new
585		}
586
587	8x	if (it == max_iterations) {
588	!	warning("The heuristic to find weights did not converge with max_iterations = ", max_iterations)
589		}
590
591	8x	list(
592	8x	"n_it" = it,
593	8x	"weights" = weights_new,
594	8x	"diff_v" = diff_v
595		)
596		}

1		#' `rtables` Access Helper Functions
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' These are a couple of functions that help with accessing the data in `rtables` objects.
6		#' Currently these work for occurrence tables, which are defined as having a count as the first
7		#' element and a fraction as the second element in each cell.
8		#'
9		#' @seealso [prune_occurrences] for usage of these functions.
10		#'
11		#' @name rtables_access
12		NULL
13
14		#' @describeIn rtables_access Helper function to extract the first values from each content
15		#' cell and from specified columns in a `TableRow`. Defaults to all columns.
16		#'
17		#' @param table_row (`TableRow`)\cr an analysis row in a occurrence table.
18		#' @param col_names (`character`)\cr the names of the columns to extract from.
19		#' @param col_indices (`integer`)\cr the indices of the columns to extract from. If `col_names` are provided,
20		#' then these are inferred from the names of `table_row`. Note that this currently only works well with a single
21		#' column split.
22		#'
23		#' @return
24		#' * `h_row_first_values()` returns a `vector` of numeric values.
25		#'
26		#' @examples
27		#' tbl <- basic_table() %>%
28		#' split_cols_by("ARM") %>%
29		#' split_rows_by("RACE") %>%
30		#' analyze("AGE", function(x) {
31		#' list(
32		#' "mean (sd)" = rcell(c(mean(x), sd(x)), format = "xx.x (xx.x)"),
33		#' "n" = length(x),
34		#' "frac" = rcell(c(0.1, 0.1), format = "xx (xx)")
35		#' )
36		#' }) %>%
37		#' build_table(tern_ex_adsl) %>%
38		#' prune_table()
39		#' tree_row_elem <- collect_leaves(tbl[2, ])[[1]]
40		#' result <- max(h_row_first_values(tree_row_elem))
41		#' result
42		#'
43		#' @export
44		h_row_first_values <- function(table_row,
45		col_names = NULL,
46		col_indices = NULL) {
47	727x	col_indices <- check_names_indices(table_row, col_names, col_indices)
48	727x	checkmate::assert_integerish(col_indices)
49	727x	checkmate::assert_subset(col_indices, seq_len(ncol(table_row)))
50
51		# Main values are extracted
52	727x	row_vals <- row_values(table_row)[col_indices]
53
54		# Main return
55	727x	vapply(row_vals, function(rv) {
56	2066x	if (is.null(rv)) {
57	727x	NA_real_
58		} else {
59	2063x	rv[1L]
60		}
61	727x	}, FUN.VALUE = numeric(1))
62		}
63
64		#' @describeIn rtables_access Helper function that extracts row values and checks if they are
65		#' convertible to integers (`integerish` values).
66		#'
67		#' @return
68		#' * `h_row_counts()` returns a `vector` of numeric values.
69		#'
70		#' @examples
71		#' # Row counts (integer values)
72		#' # h_row_counts(tree_row_elem) # Fails because there are no integers
73		#' # Using values with integers
74		#' tree_row_elem <- collect_leaves(tbl[3, ])[[1]]
75		#' result <- h_row_counts(tree_row_elem)
76		#' # result
77		#'
78		#' @export
79		h_row_counts <- function(table_row,
80		col_names = NULL,
81		col_indices = NULL) {
82	727x	counts <- h_row_first_values(table_row, col_names, col_indices)
83	727x	checkmate::assert_integerish(counts)
84	727x	counts
85		}
86
87		#' @describeIn rtables_access helper function to extract fractions from specified columns in a `TableRow`.
88		#' More specifically it extracts the second values from each content cell and checks it is a fraction.
89		#'
90		#' @return
91		#' * `h_row_fractions()` returns a `vector` of proportions.
92		#'
93		#' @examples
94		#' # Row fractions
95		#' tree_row_elem <- collect_leaves(tbl[4, ])[[1]]
96		#' h_row_fractions(tree_row_elem)
97		#'
98		#' @export
99		h_row_fractions <- function(table_row,
100		col_names = NULL,
101		col_indices = NULL) {
102	243x	col_indices <- check_names_indices(table_row, col_names, col_indices)
103	243x	row_vals <- row_values(table_row)[col_indices]
104	243x	fractions <- sapply(row_vals, "[", 2L)
105	243x	checkmate::assert_numeric(fractions, lower = 0, upper = 1)
106	243x	fractions
107		}
108
109		#' @describeIn rtables_access Helper function to extract column counts from specified columns in a table.
110		#'
111		#' @param table (`VTableNodeInfo`)\cr an occurrence table or row.
112		#'
113		#' @return
114		#' * `h_col_counts()` returns a `vector` of column counts.
115		#'
116		#' @export
117		h_col_counts <- function(table,
118		col_names = NULL,
119		col_indices = NULL) {
120	304x	col_indices <- check_names_indices(table, col_names, col_indices)
121	304x	counts <- col_counts(table)[col_indices]
122	304x	stats::setNames(counts, col_names)
123		}
124
125		#' @describeIn rtables_access Helper function to get first row of content table of current table.
126		#'
127		#' @return
128		#' * `h_content_first_row()` returns a row from an `rtables` table.
129		#'
130		#' @export
131		h_content_first_row <- function(table) {
132	27x	ct <- content_table(table)
133	27x	tree_children(ct)[[1]]
134		}
135
136		#' @describeIn rtables_access Helper function which says whether current table is a leaf in the tree.
137		#'
138		#' @return
139		#' * `is_leaf_table()` returns a `logical` value indicating whether current table is a leaf.
140		#'
141		#' @keywords internal
142		is_leaf_table <- function(table) {
143	168x	children <- tree_children(table)
144	168x	child_classes <- unique(sapply(children, class))
145	168x	identical(child_classes, "ElementaryTable")
146		}
147
148		#' @describeIn rtables_access Internal helper function that tests standard inputs for column indices.
149		#'
150		#' @return
151		#' * `check_names_indices` returns column indices.
152		#'
153		#' @keywords internal
154		check_names_indices <- function(table_row,
155		col_names = NULL,
156		col_indices = NULL) {
157	1274x	if (!is.null(col_names)) {
158	1231x	if (!is.null(col_indices)) {
159	!	stop(
160	!	"Inserted both col_names and col_indices when selecting row values. ",
161	!	"Please choose one."
162		)
163		}
164	1231x	col_indices <- h_col_indices(table_row, col_names)
165		}
166	1274x	if (is.null(col_indices)) {
167	37x	ll <- ifelse(is.null(ncol(table_row)), length(table_row), ncol(table_row))
168	37x	col_indices <- seq_len(ll)
169		}
170
171	1274x	return(col_indices)
172		}

1		#' Control Function for Subgroup Treatment Effect Pattern (STEP) Calculations
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This is an auxiliary function for controlling arguments for STEP calculations.
6		#'
7		#' @param biomarker (`numeric` or `NULL`)\cr optional provision of the numeric biomarker variable, which
8		#' could be used to infer `bandwidth`, see below.
9		#' @param use_percentile (`flag`)\cr if `TRUE`, the running windows are created according to
10		#' quantiles rather than actual values, i.e. the bandwidth refers to the percentage of data
11		#' covered in each window. Suggest `TRUE` if the biomarker variable is not uniformly
12		#' distributed.
13		#' @param bandwidth (`number` or `NULL`)\cr indicating the bandwidth of each window.
14		#' Depending on the argument `use_percentile`, it can be either the length of actual-value
15		#' windows on the real biomarker scale, or percentage windows.
16		#' If `use_percentile = TRUE`, it should be a number between 0 and 1.
17		#' If `NULL`, treat the bandwidth to be infinity, which means only one global model will be fitted.
18		#' By default, `0.25` is used for percentage windows and one quarter of the range of the `biomarker`
19		#' variable for actual-value windows.
20		#' @param degree (`count`)\cr the degree of polynomial function of the biomarker as an interaction term
21		#' with the treatment arm fitted at each window. If 0 (default), then the biomarker variable
22		#' is not included in the model fitted in each biomarker window.
23		#' @param num_points (`count`)\cr the number of points at which the hazard ratios are estimated. The
24		#' smallest number is 2.
25		#'
26		#' @return A list of components with the same names as the arguments, except `biomarker` which is
27		#' just used to calculate the `bandwidth` in case that actual biomarker windows are requested.
28		#'
29		#' @examples
30		#' # Provide biomarker values and request actual values to be used,
31		#' # so that bandwidth is chosen from range.
32		#' control_step(biomarker = 1:10, use_percentile = FALSE)
33		#'
34		#' # Use a global model with quadratic biomarker interaction term.
35		#' control_step(bandwidth = NULL, degree = 2)
36		#'
37		#' # Reduce number of points to be used.
38		#' control_step(num_points = 10)
39		#'
40		#' @export
41		control_step <- function(biomarker = NULL,
42		use_percentile = TRUE,
43		bandwidth,
44		degree = 0L,
45		num_points = 39L) {
46	31x	checkmate::assert_numeric(biomarker, null.ok = TRUE)
47	30x	checkmate::assert_flag(use_percentile)
48	30x	checkmate::assert_int(num_points, lower = 2)
49	29x	checkmate::assert_count(degree)
50
51	29x	if (missing(bandwidth)) {
52		# Infer bandwidth
53	21x	bandwidth <- if (use_percentile) {
54	18x	0.25
55	21x	} else if (!is.null(biomarker)) {
56	3x	diff(range(biomarker, na.rm = TRUE)) / 4
57		} else {
58	!	NULL
59		}
60		} else {
61		# Check bandwidth
62	8x	if (!is.null(bandwidth)) {
63	5x	if (use_percentile) {
64	4x	assert_proportion_value(bandwidth)
65		} else {
66	1x	checkmate::assert_scalar(bandwidth)
67	1x	checkmate::assert_true(bandwidth > 0)
68		}
69		}
70		}
71	28x	list(
72	28x	use_percentile = use_percentile,
73	28x	bandwidth = bandwidth,
74	28x	degree = as.integer(degree),
75	28x	num_points = as.integer(num_points)
76		)
77		}

1		#' Confidence Interval for Mean
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Convenient function for calculating the mean confidence interval. It calculates the arithmetic as well as the
6		#' geometric mean. It can be used as a `ggplot` helper function for plotting.
7		#'
8		#' @inheritParams argument_convention
9		#' @param n_min (`number`)\cr a minimum number of non-missing `x` to estimate the confidence interval for mean.
10		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
11		#' @param geom_mean (`logical`)\cr `TRUE` when the geometric mean should be calculated.
12		#'
13		#' @return A named `vector` of values `mean_ci_lwr` and `mean_ci_upr`.
14		#'
15		#' @examples
16		#' stat_mean_ci(sample(10), gg_helper = FALSE)
17		#'
18		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
19		#' ggplot2::geom_point()
20		#'
21		#' p + ggplot2::stat_summary(
22		#' fun.data = stat_mean_ci,
23		#' geom = "errorbar"
24		#' )
25		#'
26		#' p + ggplot2::stat_summary(
27		#' fun.data = stat_mean_ci,
28		#' fun.args = list(conf_level = 0.5),
29		#' geom = "errorbar"
30		#' )
31		#'
32		#' p + ggplot2::stat_summary(
33		#' fun.data = stat_mean_ci,
34		#' fun.args = list(conf_level = 0.5, geom_mean = TRUE),
35		#' geom = "errorbar"
36		#' )
37		#'
38		#' @export
39		stat_mean_ci <- function(x,
40		conf_level = 0.95,
41		na.rm = TRUE, # nolint
42		n_min = 2,
43		gg_helper = TRUE,
44		geom_mean = FALSE) {
45	720x	if (na.rm) {
46	2x	x <- stats::na.omit(x)
47		}
48	720x	n <- length(x)
49
50	720x	if (!geom_mean) {
51	361x	m <- mean(x)
52		} else {
53	359x	negative_values_exist <- any(is.na(x[!is.na(x)]) <- x[!is.na(x)] <= 0)
54	359x	if (negative_values_exist) {
55	22x	m <- NA_real_
56		} else {
57	337x	x <- log(x)
58	337x	m <- mean(x)
59		}
60		}
61
62	720x	if (n < n_min \|\| is.na(m)) {
63	122x	ci <- c(mean_ci_lwr = NA_real_, mean_ci_upr = NA_real_)
64		} else {
65	598x	hci <- stats::qt((1 + conf_level) / 2, df = n - 1) * stats::sd(x) / sqrt(n)
66	598x	ci <- c(mean_ci_lwr = m - hci, mean_ci_upr = m + hci)
67	598x	if (geom_mean) {
68	291x	ci <- exp(ci)
69		}
70		}
71
72	720x	if (gg_helper) {
73	!	m <- ifelse(is.na(m), NA_real_, m)
74	!	ci <- data.frame(y = ifelse(geom_mean, exp(m), m), ymin = ci[[1]], ymax = ci[[2]])
75		}
76
77	720x	return(ci)
78		}
79
80		#' Confidence Interval for Median
81		#'
82		#' @description `r lifecycle::badge("stable")`
83		#'
84		#' Convenient function for calculating the median confidence interval. It can be used as a `ggplot` helper
85		#' function for plotting.
86		#'
87		#' @inheritParams argument_convention
88		#' @param gg_helper (`logical`)\cr `TRUE` when output should be aligned for the use with `ggplot`.
89		#'
90		#' @details The function was adapted from `DescTools/versions/0.99.35/source`
91		#'
92		#' @return A named `vector` of values `median_ci_lwr` and `median_ci_upr`.
93		#'
94		#' @examples
95		#' stat_median_ci(sample(10), gg_helper = FALSE)
96		#'
97		#' p <- ggplot2::ggplot(mtcars, ggplot2::aes(cyl, mpg)) +
98		#' ggplot2::geom_point()
99		#' p + ggplot2::stat_summary(
100		#' fun.data = stat_median_ci,
101		#' geom = "errorbar"
102		#' )
103		#'
104		#' @export
105		stat_median_ci <- function(x,
106		conf_level = 0.95,
107		na.rm = TRUE, # nolint
108		gg_helper = TRUE) {
109	362x	x <- unname(x)
110	362x	if (na.rm) {
111	3x	x <- x[!is.na(x)]
112		}
113	362x	n <- length(x)
114	362x	med <- stats::median(x)
115
116	362x	k <- stats::qbinom(p = (1 - conf_level) / 2, size = n, prob = 0.5, lower.tail = TRUE)
117
118		# k == 0 - for small samples (e.g. n <= 5) ci can be outside the observed range
119	362x	if (k == 0 \|\| is.na(med)) {
120	98x	ci <- c(median_ci_lwr = NA_real_, median_ci_upr = NA_real_)
121	98x	empir_conf_level <- NA_real_
122		} else {
123	264x	x_sort <- sort(x)
124	264x	ci <- c(median_ci_lwr = x_sort[k], median_ci_upr = x_sort[n - k + 1])
125	264x	empir_conf_level <- 1 - 2 * stats::pbinom(k - 1, size = n, prob = 0.5)
126		}
127
128	362x	if (gg_helper) {
129	!	ci <- data.frame(y = med, ymin = ci[[1]], ymax = ci[[2]])
130		}
131
132	362x	attr(ci, "conf_level") <- empir_conf_level
133
134	362x	return(ci)
135		}
136
137		#' p-Value of the Mean
138		#'
139		#' @description `r lifecycle::badge("stable")`
140		#'
141		#' Convenient function for calculating the two-sided p-value of the mean.
142		#'
143		#' @inheritParams argument_convention
144		#' @param n_min (`numeric`)\cr a minimum number of non-missing `x` to estimate the p-value of the mean.
145		#' @param test_mean (`numeric`)\cr mean value to test under the null hypothesis.
146		#'
147		#' @return A p-value.
148		#'
149		#' @examples
150		#' stat_mean_pval(sample(10))
151		#'
152		#' stat_mean_pval(rnorm(10), test_mean = 0.5)
153		#'
154		#' @export
155		stat_mean_pval <- function(x,
156		na.rm = TRUE, # nolint
157		n_min = 2,
158		test_mean = 0) {
159	363x	if (na.rm) {
160	4x	x <- stats::na.omit(x)
161		}
162	363x	n <- length(x)
163
164	363x	x_mean <- mean(x)
165	363x	x_sd <- stats::sd(x)
166
167	363x	if (n < n_min) {
168	53x	pv <- c(p_value = NA_real_)
169		} else {
170	310x	x_se <- stats::sd(x) / sqrt(n)
171	310x	ttest <- (x_mean - test_mean) / x_se
172	310x	pv <- c(p_value = 2 * stats::pt(-abs(ttest), df = n - 1))
173		}
174
175	363x	return(pv)
176		}
177
178		#' Proportion Difference and Confidence Interval
179		#'
180		#' @description `r lifecycle::badge("stable")`
181		#'
182		#' Function for calculating the proportion (or risk) difference and confidence interval between arm
183		#' X (reference group) and arm Y. Risk difference is calculated by subtracting cumulative incidence
184		#' in arm Y from cumulative incidence in arm X.
185		#'
186		#' @inheritParams argument_convention
187		#' @param x (`list` of `integer`)\cr list of number of occurrences in arm X (reference group).
188		#' @param y (`list` of `integer`)\cr list of number of occurrences in arm Y. Must be of equal length to `x`.
189		#' @param N_x (`numeric`)\cr total number of records in arm X.
190		#' @param N_y (`numeric`)\cr total number of records in arm Y.
191		#' @param list_names (`character`)\cr names of each variable/level corresponding to pair of proportions in
192		#' `x` and `y`. Must be of equal length to `x` and `y`.
193		#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
194		#'
195		#' @return List of proportion differences and CIs corresponding to each pair of number of occurrences in `x` and
196		#' `y`. Each list element consists of 3 statistics: proportion difference, CI lower bound, and CI upper bound.
197		#'
198		#' @seealso Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()]
199		#' with `riskdiff` argument is set to `TRUE` in subsequent analyze functions, adds a column containing
200		#' proportion (risk) difference to an `rtables` layout.
201		#'
202		#' @examples
203		#' stat_propdiff_ci(
204		#' x = list(0.375), y = list(0.01), N_x = 5, N_y = 5, list_names = "x", conf_level = 0.9
205		#' )
206		#'
207		#' stat_propdiff_ci(
208		#' x = list(0.5, 0.75, 1), y = list(0.25, 0.05, 0.5), N_x = 10, N_y = 20, pct = FALSE
209		#' )
210		#'
211		#' @export
212		stat_propdiff_ci <- function(x,
213		y,
214		N_x, # nolint
215		N_y, # nolint
216		list_names = NULL,
217		conf_level = 0.95,
218		pct = TRUE) {
219	13x	checkmate::assert_list(x, types = "numeric")
220	13x	checkmate::assert_list(y, types = "numeric", len = length(x))
221	13x	checkmate::assert_character(list_names, len = length(x), null.ok = TRUE)
222	13x	rd_list <- lapply(seq_along(x), function(i) {
223	31x	p_x <- x[[i]] / N_x
224	31x	p_y <- y[[i]] / N_y
225	31x	rd_ci <- p_x - p_y + c(-1, 1) * stats::qnorm((1 + conf_level) / 2) *
226	31x	sqrt(p_x * (1 - p_x) / N_x + p_y * (1 - p_y) / N_y)
227	31x	c(p_x - p_y, rd_ci) * ifelse(pct, 100, 1)
228		})
229	13x	names(rd_list) <- list_names
230	13x	rd_list
231		}

1		#' Survival Time Point Analysis
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Summarize patients' survival rate and difference of survival rates between groups at a time point.
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams s_surv_time
9		#' @param time_point (`number`)\cr survival time point of interest.
10		#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
11		#' [control_surv_timepoint()]. Some possible parameter options are:
12		#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival rate.
13		#' * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", "log-log",
14		#' see more in [survival::survfit()]. Note option "none" is no longer supported.
15		#' * `time_point` (`number`)\cr survival time point of interest.
16		#' @param method (`string`)\cr either `surv` (survival estimations),
17		#' `surv_diff` (difference in survival with the control) or `both`.
18		#' @param table_names_suffix (`string`)\cr optional suffix for the `table_names` used for the `rtables` to
19		#' avoid warnings from duplicate table names.
20		#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("surv_timepoint")`
21		#' to see available statistics for this function.
22		#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
23		#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
24		#' for that statistic's row label.
25		#'
26		#' @name survival_timepoint
27		#' @order 1
28		NULL
29
30		#' @describeIn survival_timepoint Statistics function which analyzes survival rate.
31		#'
32		#' @return
33		#' * `s_surv_timepoint()` returns the statistics:
34		#' * `pt_at_risk`: Patients remaining at risk.
35		#' * `event_free_rate`: Event-free rate (%).
36		#' * `rate_se`: Standard error of event free rate.
37		#' * `rate_ci`: Confidence interval for event free rate.
38		#'
39		#' @keywords internal
40		s_surv_timepoint <- function(df,
41		.var,
42		time_point,
43		is_event,
44		control = control_surv_timepoint()) {
45	23x	checkmate::assert_string(.var)
46	23x	assert_df_with_variables(df, list(tte = .var, is_event = is_event))
47	23x	checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
48	23x	checkmate::assert_number(time_point)
49	23x	checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
50
51	23x	conf_type <- control$conf_type
52	23x	conf_level <- control$conf_level
53
54	23x	formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
55	23x	srv_fit <- survival::survfit(
56	23x	formula = formula,
57	23x	data = df,
58	23x	conf.int = conf_level,
59	23x	conf.type = conf_type
60		)
61	23x	s_srv_fit <- summary(srv_fit, times = time_point, extend = TRUE)
62	23x	df_srv_fit <- as.data.frame(s_srv_fit[c("time", "n.risk", "surv", "lower", "upper", "std.err")])
63	23x	if (df_srv_fit[["n.risk"]] == 0) {
64	1x	pt_at_risk <- event_free_rate <- rate_se <- NA_real_
65	1x	rate_ci <- c(NA_real_, NA_real_)
66		} else {
67	22x	pt_at_risk <- df_srv_fit$n.risk
68	22x	event_free_rate <- df_srv_fit$surv
69	22x	rate_se <- df_srv_fit$std.err
70	22x	rate_ci <- c(df_srv_fit$lower, df_srv_fit$upper)
71		}
72	23x	list(
73	23x	pt_at_risk = formatters::with_label(pt_at_risk, "Patients remaining at risk"),
74	23x	event_free_rate = formatters::with_label(event_free_rate * 100, "Event Free Rate (%)"),
75	23x	rate_se = formatters::with_label(rate_se * 100, "Standard Error of Event Free Rate"),
76	23x	rate_ci = formatters::with_label(rate_ci * 100, f_conf_level(conf_level))
77		)
78		}
79
80		#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
81		#' when `method = "surv"`.
82		#'
83		#' @return
84		#' * `a_surv_timepoint()` returns the corresponding list with formatted [rtables::CellValue()].
85		#'
86		#' @keywords internal
87		a_surv_timepoint <- make_afun(
88		s_surv_timepoint,
89		.indent_mods = c(
90		pt_at_risk = 0L,
91		event_free_rate = 0L,
92		rate_se = 1L,
93		rate_ci = 1L
94		),
95		.formats = c(
96		pt_at_risk = "xx",
97		event_free_rate = "xx.xx",
98		rate_se = "xx.xx",
99		rate_ci = "(xx.xx, xx.xx)"
100		)
101		)
102
103		#' @describeIn survival_timepoint Statistics function which analyzes difference between two survival rates.
104		#'
105		#' @return
106		#' * `s_surv_timepoint_diff()` returns the statistics:
107		#' * `rate_diff`: Event-free rate difference between two groups.
108		#' * `rate_diff_ci`: Confidence interval for the difference.
109		#' * `ztest_pval`: p-value to test the difference is 0.
110		#'
111		#' @keywords internal
112		s_surv_timepoint_diff <- function(df,
113		.var,
114		.ref_group,
115		.in_ref_col,
116		time_point,
117		control = control_surv_timepoint(),
118		...) {
119	2x	if (.in_ref_col) {
120	!	return(
121	!	list(
122	!	rate_diff = formatters::with_label("", "Difference in Event Free Rate"),
123	!	rate_diff_ci = formatters::with_label("", f_conf_level(control$conf_level)),
124	!	ztest_pval = formatters::with_label("", "p-value (Z-test)")
125		)
126		)
127		}
128	2x	data <- rbind(.ref_group, df)
129	2x	group <- factor(rep(c("ref", "x"), c(nrow(.ref_group), nrow(df))), levels = c("ref", "x"))
130	2x	res_per_group <- lapply(split(data, group), function(x) {
131	4x	s_surv_timepoint(df = x, .var = .var, time_point = time_point, control = control, ...)
132		})
133
134	2x	res_x <- res_per_group[[2]]
135	2x	res_ref <- res_per_group[[1]]
136	2x	rate_diff <- res_x$event_free_rate - res_ref$event_free_rate
137	2x	se_diff <- sqrt(res_x$rate_se^2 + res_ref$rate_se^2)
138
139	2x	qs <- c(-1, 1) * stats::qnorm(1 - (1 - control$conf_level) / 2)
140	2x	rate_diff_ci <- rate_diff + qs * se_diff
141	2x	ztest_pval <- if (is.na(rate_diff)) {
142	2x	NA
143		} else {
144	2x	2 * (1 - stats::pnorm(abs(rate_diff) / se_diff))
145		}
146	2x	list(
147	2x	rate_diff = formatters::with_label(rate_diff, "Difference in Event Free Rate"),
148	2x	rate_diff_ci = formatters::with_label(rate_diff_ci, f_conf_level(control$conf_level)),
149	2x	ztest_pval = formatters::with_label(ztest_pval, "p-value (Z-test)")
150		)
151		}
152
153		#' @describeIn survival_timepoint Formatted analysis function which is used as `afun` in `surv_timepoint()`
154		#' when `method = "surv_diff"`.
155		#'
156		#' @return
157		#' * `a_surv_timepoint_diff()` returns the corresponding list with formatted [rtables::CellValue()].
158		#'
159		#' @keywords internal
160		a_surv_timepoint_diff <- make_afun(
161		s_surv_timepoint_diff,
162		.formats = c(
163		rate_diff = "xx.xx",
164		rate_diff_ci = "(xx.xx, xx.xx)",
165		ztest_pval = "x.xxxx \| (<0.0001)"
166		)
167		)
168
169		#' @describeIn survival_timepoint Layout-creating function which can take statistics function arguments
170		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
171		#'
172		#' @return
173		#' * `surv_timepoint()` returns a layout object suitable for passing to further layouting functions,
174		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
175		#' the statistics from `s_surv_timepoint()` and/or `s_surv_timepoint_diff()` to the table layout depending on
176		#' the value of `method`.
177		#'
178		#' @examples
179		#' library(dplyr)
180		#'
181		#' adtte_f <- tern_ex_adtte %>%
182		#' filter(PARAMCD == "OS") %>%
183		#' mutate(
184		#' AVAL = day2month(AVAL),
185		#' is_event = CNSR == 0
186		#' )
187		#'
188		#' # Survival at given time points.
189		#' basic_table() %>%
190		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
191		#' add_colcounts() %>%
192		#' surv_timepoint(
193		#' vars = "AVAL",
194		#' var_labels = "Months",
195		#' is_event = "is_event",
196		#' time_point = 7
197		#' ) %>%
198		#' build_table(df = adtte_f)
199		#'
200		#' # Difference in survival at given time points.
201		#' basic_table() %>%
202		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
203		#' add_colcounts() %>%
204		#' surv_timepoint(
205		#' vars = "AVAL",
206		#' var_labels = "Months",
207		#' is_event = "is_event",
208		#' time_point = 9,
209		#' method = "surv_diff",
210		#' .indent_mods = c("rate_diff" = 0L, "rate_diff_ci" = 2L, "ztest_pval" = 2L)
211		#' ) %>%
212		#' build_table(df = adtte_f)
213		#'
214		#' # Survival and difference in survival at given time points.
215		#' basic_table() %>%
216		#' split_cols_by(var = "ARMCD", ref_group = "ARM A") %>%
217		#' add_colcounts() %>%
218		#' surv_timepoint(
219		#' vars = "AVAL",
220		#' var_labels = "Months",
221		#' is_event = "is_event",
222		#' time_point = 9,
223		#' method = "both"
224		#' ) %>%
225		#' build_table(df = adtte_f)
226		#'
227		#' @export
228		#' @order 2
229		surv_timepoint <- function(lyt,
230		vars,
231		time_point,
232		is_event,
233		control = control_surv_timepoint(),
234		method = c("surv", "surv_diff", "both"),
235		na_str = default_na_str(),
236		nested = TRUE,
237		...,
238		table_names_suffix = "",
239		var_labels = "Time",
240		show_labels = "visible",
241		.stats = c(
242		"pt_at_risk", "event_free_rate", "rate_ci",
243		"rate_diff", "rate_diff_ci", "ztest_pval"
244		),
245		.formats = NULL,
246		.labels = NULL,
247		.indent_mods = if (method == "both") {
248	2x	c(rate_diff = 1L, rate_diff_ci = 2L, ztest_pval = 2L)
249		} else {
250	4x	c(rate_diff_ci = 1L, ztest_pval = 1L)
251		}) {
252	6x	method <- match.arg(method)
253	6x	checkmate::assert_string(table_names_suffix)
254
255	6x	extra_args <- list(time_point = time_point, is_event = is_event, control = control, ...)
256
257	6x	f <- list(
258	6x	surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
259	6x	surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
260		)
261	6x	.stats <- h_split_param(.stats, .stats, f = f)
262	6x	.formats <- h_split_param(.formats, names(.formats), f = f)
263	6x	.labels <- h_split_param(.labels, names(.labels), f = f)
264	6x	.indent_mods <- h_split_param(.indent_mods, names(.indent_mods), f = f)
265
266	6x	afun_surv <- make_afun(
267	6x	a_surv_timepoint,
268	6x	.stats = .stats$surv,
269	6x	.formats = .formats$surv,
270	6x	.labels = .labels$surv,
271	6x	.indent_mods = .indent_mods$surv
272		)
273
274	6x	afun_surv_diff <- make_afun(
275	6x	a_surv_timepoint_diff,
276	6x	.stats = .stats$surv_diff,
277	6x	.formats = .formats$surv_diff,
278	6x	.labels = .labels$surv_diff,
279	6x	.indent_mods = .indent_mods$surv_diff
280		)
281
282	6x	time_point <- extra_args$time_point
283
284	6x	for (i in seq_along(time_point)) {
285	6x	extra_args[["time_point"]] <- time_point[i]
286
287	6x	if (method %in% c("surv", "both")) {
288	4x	lyt <- analyze(
289	4x	lyt,
290	4x	vars,
291	4x	var_labels = paste(time_point[i], var_labels),
292	4x	table_names = paste0("surv_", time_point[i], table_names_suffix),
293	4x	show_labels = show_labels,
294	4x	afun = afun_surv,
295	4x	na_str = na_str,
296	4x	nested = nested,
297	4x	extra_args = extra_args
298		)
299		}
300
301	6x	if (method %in% c("surv_diff", "both")) {
302	4x	lyt <- analyze(
303	4x	lyt,
304	4x	vars,
305	4x	var_labels = paste(time_point[i], var_labels),
306	4x	table_names = paste0("surv_diff_", time_point[i], table_names_suffix),
307	4x	show_labels = ifelse(method == "both", "hidden", show_labels),
308	4x	afun = afun_surv_diff,
309	4x	na_str = na_str,
310	4x	nested = nested,
311	4x	extra_args = extra_args
312		)
313		}
314		}
315	6x	lyt
316		}

1		#' Survival Time Analysis
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Summarize median survival time and CIs, percentiles of survival times, survival
6		#' time range of censored/event patients.
7		#'
8		#' @inheritParams argument_convention
9		#' @param control (`list`)\cr parameters for comparison details, specified by using the helper function
10		#' [control_surv_time()]. Some possible parameter options are:
11		#' * `conf_level` (`proportion`)\cr confidence level of the interval for survival time.
12		#' * `conf_type` (`string`)\cr confidence interval type. Options are "plain" (default), "log", or "log-log",
13		#' see more in [survival::survfit()]. Note option "none" is not supported.
14		#' * `quantiles` (`numeric`)\cr vector of length two to specify the quantiles of survival time.
15		#' @param ref_fn_censor (`flag`)\cr whether referential footnotes indicating censored observations should be printed
16		#' when the `range` statistic is included.
17		#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("surv_time")`
18		#' to see available statistics for this function.
19		#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
20		#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
21		#' for that statistic's row label.
22		#'
23		#' @examples
24		#' library(dplyr)
25		#'
26		#' adtte_f <- tern_ex_adtte %>%
27		#' filter(PARAMCD == "OS") %>%
28		#' mutate(
29		#' AVAL = day2month(AVAL),
30		#' is_event = CNSR == 0
31		#' )
32		#' df <- adtte_f %>% filter(ARMCD == "ARM A")
33		#'
34		#' @name survival_time
35		#' @order 1
36		NULL
37
38		#' @describeIn survival_time Statistics function which analyzes survival times.
39		#'
40		#' @return
41		#' * `s_surv_time()` returns the statistics:
42		#' * `median`: Median survival time.
43		#' * `median_ci`: Confidence interval for median time.
44		#' * `quantiles`: Survival time for two specified quantiles.
45		#' * `range_censor`: Survival time range for censored observations.
46		#' * `range_event`: Survival time range for observations with events.
47		#' * `range`: Survival time range for all observations.
48		#'
49		#' @keywords internal
50		s_surv_time <- function(df,
51		.var,
52		is_event,
53		control = control_surv_time()) {
54	182x	checkmate::assert_string(.var)
55	182x	assert_df_with_variables(df, list(tte = .var, is_event = is_event))
56	182x	checkmate::assert_numeric(df[[.var]], min.len = 1, any.missing = FALSE)
57	182x	checkmate::assert_logical(df[[is_event]], min.len = 1, any.missing = FALSE)
58
59	182x	conf_type <- control$conf_type
60	182x	conf_level <- control$conf_level
61	182x	quantiles <- control$quantiles
62
63	182x	formula <- stats::as.formula(paste0("survival::Surv(", .var, ", ", is_event, ") ~ 1"))
64	182x	srv_fit <- survival::survfit(
65	182x	formula = formula,
66	182x	data = df,
67	182x	conf.int = conf_level,
68	182x	conf.type = conf_type
69		)
70	182x	srv_tab <- summary(srv_fit, extend = TRUE)$table
71	182x	srv_qt_tab <- stats::quantile(srv_fit, probs = quantiles)$quantile
72	182x	range_censor <- range_noinf(df[[.var]][!df[[is_event]]], na.rm = TRUE)
73	182x	range_event <- range_noinf(df[[.var]][df[[is_event]]], na.rm = TRUE)
74	182x	range <- range_noinf(df[[.var]], na.rm = TRUE)
75	182x	list(
76	182x	median = formatters::with_label(unname(srv_tab["median"]), "Median"),
77	182x	median_ci = formatters::with_label(
78	182x	unname(srv_tab[paste0(srv_fit$conf.int, c("LCL", "UCL"))]), f_conf_level(conf_level)
79		),
80	182x	quantiles = formatters::with_label(
81	182x	unname(srv_qt_tab), paste0(quantiles[1] * 100, "% and ", quantiles[2] * 100, "%-ile")
82		),
83	182x	range_censor = formatters::with_label(range_censor, "Range (censored)"),
84	182x	range_event = formatters::with_label(range_event, "Range (event)"),
85	182x	range = formatters::with_label(range, "Range")
86		)
87		}
88
89		#' @describeIn survival_time Formatted analysis function which is used as `afun` in `surv_time()`.
90		#'
91		#' @return
92		#' * `a_surv_time()` returns the corresponding list with formatted [rtables::CellValue()].
93		#'
94		#' @examples
95		#' a_surv_time(
96		#' df,
97		#' .df_row = df,
98		#' .var = "AVAL",
99		#' is_event = "is_event"
100		#' )
101		#'
102		#' @export
103		a_surv_time <- function(df,
104		labelstr = "",
105		.var = NULL,
106		.df_row = NULL,
107		is_event,
108		control = control_surv_time(),
109		ref_fn_censor = TRUE,
110		.stats = NULL,
111		.formats = NULL,
112		.labels = NULL,
113		.indent_mods = NULL,
114		na_str = default_na_str()) {
115	12x	x_stats <- s_surv_time(
116	12x	df = df, .var = .var, is_event = is_event, control = control
117		)
118	12x	rng_censor_lwr <- x_stats[["range_censor"]][1]
119	12x	rng_censor_upr <- x_stats[["range_censor"]][2]
120
121		# Use method-specific defaults
122	12x	fmts <- c(median_ci = "(xx.x, xx.x)", quantiles = "xx.x, xx.x", range = "xx.x to xx.x")
123	12x	lbls <- c(median_ci = "95% CI", range = "Range", range_censor = "Range (censored)", range_event = "Range (event)")
124	12x	lbls_custom <- .labels
125	12x	.formats <- c(.formats, fmts[setdiff(names(fmts), names(.formats))])
126	12x	.labels <- c(.labels, lbls[setdiff(names(lbls), names(lbls_custom))])
127
128		# Fill in with formatting defaults if needed
129	12x	.stats <- get_stats("surv_time", stats_in = .stats)
130	12x	.formats <- get_formats_from_stats(.stats, .formats)
131	12x	.labels <- get_labels_from_stats(.stats, .labels) %>% labels_use_control(control, lbls_custom)
132	12x	.indent_mods <- get_indents_from_stats(.stats, .indent_mods)
133
134	12x	x_stats <- x_stats[.stats]
135
136		# Auto format handling
137	12x	.formats <- apply_auto_formatting(.formats, x_stats, .df_row, .var)
138
139	12x	cell_fns <- setNames(vector("list", length = length(x_stats)), .labels)
140	12x	if ("range" %in% names(x_stats) && ref_fn_censor) {
141	12x	if (x_stats[["range"]][1] == rng_censor_lwr && x_stats[["range"]][2] == rng_censor_upr) {
142	1x	cell_fns[[.labels[["range"]]]] <- "Censored observations: range minimum & maximum"
143	11x	} else if (x_stats[["range"]][1] == rng_censor_lwr) {
144	2x	cell_fns[[.labels[["range"]]]] <- "Censored observation: range minimum"
145	9x	} else if (x_stats[["range"]][2] == rng_censor_upr) {
146	1x	cell_fns[[.labels[["range"]]]] <- "Censored observation: range maximum"
147		}
148		}
149
150	12x	in_rows(
151	12x	.list = x_stats,
152	12x	.formats = .formats,
153	12x	.names = .labels,
154	12x	.labels = .labels,
155	12x	.indent_mods = .indent_mods,
156	12x	.format_na_strs = na_str,
157	12x	.cell_footnotes = cell_fns
158		)
159		}
160
161		#' @describeIn survival_time Layout-creating function which can take statistics function arguments
162		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
163		#'
164		#' @return
165		#' * `surv_time()` returns a layout object suitable for passing to further layouting functions,
166		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
167		#' the statistics from `s_surv_time()` to the table layout.
168		#'
169		#' @examples
170		#' basic_table() %>%
171		#' split_cols_by(var = "ARMCD") %>%
172		#' add_colcounts() %>%
173		#' surv_time(
174		#' vars = "AVAL",
175		#' var_labels = "Survival Time (Months)",
176		#' is_event = "is_event",
177		#' control = control_surv_time(conf_level = 0.9, conf_type = "log-log")
178		#' ) %>%
179		#' build_table(df = adtte_f)
180		#'
181		#' @export
182		#' @order 2
183		surv_time <- function(lyt,
184		vars,
185		is_event,
186		control = control_surv_time(),
187		ref_fn_censor = TRUE,
188		na_str = default_na_str(),
189		nested = TRUE,
190		...,
191		var_labels = "Time to Event",
192		show_labels = "visible",
193		table_names = vars,
194		.stats = c("median", "median_ci", "quantiles", "range"),
195		.formats = NULL,
196		.labels = NULL,
197		.indent_mods = c(median_ci = 1L)) {
198	3x	extra_args <- list(
199	3x	.stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str,
200	3x	is_event = is_event, control = control, ref_fn_censor = ref_fn_censor, ...
201		)
202
203	3x	analyze(
204	3x	lyt = lyt,
205	3x	vars = vars,
206	3x	afun = a_surv_time,
207	3x	var_labels = var_labels,
208	3x	show_labels = show_labels,
209	3x	table_names = table_names,
210	3x	na_str = na_str,
211	3x	nested = nested,
212	3x	extra_args = extra_args
213		)
214		}

1		#' Cox Proportional Hazards Regression
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Fits a Cox regression model and estimates hazard ratio to describe the effect size in a survival analysis.
6		#'
7		#' @inheritParams argument_convention
8		#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("summarize_coxreg")`
9		#' to see available statistics for this function.
10		#'
11		#' @details Cox models are the most commonly used methods to estimate the magnitude of
12		#' the effect in survival analysis. It assumes proportional hazards: the ratio
13		#' of the hazards between groups (e.g., two arms) is constant over time.
14		#' This ratio is referred to as the "hazard ratio" (HR) and is one of the
15		#' most commonly reported metrics to describe the effect size in survival
16		#' analysis (NEST Team, 2020).
17		#'
18		#' @seealso [fit_coxreg] for relevant fitting functions, [h_cox_regression] for relevant
19		#' helper functions, and [tidy_coxreg] for custom tidy methods.
20		#'
21		#' @examples
22		#' library(survival)
23		#'
24		#' # Testing dataset [survival::bladder].
25		#' set.seed(1, kind = "Mersenne-Twister")
26		#' dta_bladder <- with(
27		#' data = bladder[bladder$enum < 5, ],
28		#' tibble::tibble(
29		#' TIME = stop,
30		#' STATUS = event,
31		#' ARM = as.factor(rx),
32		#' COVAR1 = as.factor(enum) %>% formatters::with_label("A Covariate Label"),
33		#' COVAR2 = factor(
34		#' sample(as.factor(enum)),
35		#' levels = 1:4, labels = c("F", "F", "M", "M")
36		#' ) %>% formatters::with_label("Sex (F/M)")
37		#' )
38		#' )
39		#' dta_bladder$AGE <- sample(20:60, size = nrow(dta_bladder), replace = TRUE)
40		#' dta_bladder$STUDYID <- factor("X")
41		#'
42		#' u1_variables <- list(
43		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
44		#' )
45		#'
46		#' u2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
47		#'
48		#' m1_variables <- list(
49		#' time = "TIME", event = "STATUS", arm = "ARM", covariates = c("COVAR1", "COVAR2")
50		#' )
51		#'
52		#' m2_variables <- list(time = "TIME", event = "STATUS", covariates = c("COVAR1", "COVAR2"))
53		#'
54		#' @name cox_regression
55		#' @order 1
56		NULL
57
58		#' @describeIn cox_regression Statistics function that transforms results tabulated
59		#' from [fit_coxreg_univar()] or [fit_coxreg_multivar()] into a list.
60		#'
61		#' @param model_df (`data.frame`)\cr contains the resulting model fit from a [fit_coxreg]
62		#' function with tidying applied via [broom::tidy()].
63		#' @param .stats (`character`)\cr the name of statistics to be reported among:
64		#' * `n`: number of observations (univariate only)
65		#' * `hr`: hazard ratio
66		#' * `ci`: confidence interval
67		#' * `pval`: p-value of the treatment effect
68		#' * `pval_inter`: p-value of the interaction effect between the treatment and the covariate (univariate only)
69		#' @param .which_vars (`character`)\cr which rows should statistics be returned for from the given model.
70		#' Defaults to "all". Other options include "var_main" for main effects, `"inter"` for interaction effects,
71		#' and `"multi_lvl"` for multivariate model covariate level rows. When `.which_vars` is "all" specific
72		#' variables can be selected by specifying `.var_nms`.
73		#' @param .var_nms (`character`)\cr the `term` value of rows in `df` for which `.stats` should be returned. Typically
74		#' this is the name of a variable. If using variable labels, `var` should be a vector of both the desired
75		#' variable name and the variable label in that order to see all `.stats` related to that variable. When `.which_vars`
76		#' is `"var_main"` `.var_nms` should be only the variable name.
77		#'
78		#' @return
79		#' * `s_coxreg()` returns the selected statistic for from the Cox regression model for the selected variable(s).
80		#'
81		#' @examples
82		#' # s_coxreg
83		#'
84		#' # Univariate
85		#' univar_model <- fit_coxreg_univar(variables = u1_variables, data = dta_bladder)
86		#' df1 <- broom::tidy(univar_model)
87		#'
88		#' s_coxreg(model_df = df1, .stats = "hr")
89		#'
90		#' # Univariate with interactions
91		#' univar_model_inter <- fit_coxreg_univar(
92		#' variables = u1_variables, control = control_coxreg(interaction = TRUE), data = dta_bladder
93		#' )
94		#' df1_inter <- broom::tidy(univar_model_inter)
95		#'
96		#' s_coxreg(model_df = df1_inter, .stats = "hr", .which_vars = "inter", .var_nms = "COVAR1")
97		#'
98		#' # Univariate without treatment arm - only "COVAR2" covariate effects
99		#' univar_covs_model <- fit_coxreg_univar(variables = u2_variables, data = dta_bladder)
100		#' df1_covs <- broom::tidy(univar_covs_model)
101		#'
102		#' s_coxreg(model_df = df1_covs, .stats = "hr", .var_nms = c("COVAR2", "Sex (F/M)"))
103		#'
104		#' # Multivariate.
105		#' multivar_model <- fit_coxreg_multivar(variables = m1_variables, data = dta_bladder)
106		#' df2 <- broom::tidy(multivar_model)
107		#'
108		#' s_coxreg(model_df = df2, .stats = "pval", .which_vars = "var_main", .var_nms = "COVAR1")
109		#' s_coxreg(
110		#' model_df = df2, .stats = "pval", .which_vars = "multi_lvl",
111		#' .var_nms = c("COVAR1", "A Covariate Label")
112		#' )
113		#'
114		#' # Multivariate without treatment arm - only "COVAR1" main effect
115		#' multivar_covs_model <- fit_coxreg_multivar(variables = m2_variables, data = dta_bladder)
116		#' df2_covs <- broom::tidy(multivar_covs_model)
117		#'
118		#' s_coxreg(model_df = df2_covs, .stats = "hr")
119		#'
120		#' @export
121		s_coxreg <- function(model_df, .stats, .which_vars = "all", .var_nms = NULL) {
122	194x	assert_df_with_variables(model_df, list(term = "term", stat = .stats))
123	194x	checkmate::assert_multi_class(model_df$term, classes = c("factor", "character"))
124	194x	model_df$term <- as.character(model_df$term)
125	194x	.var_nms <- .var_nms[!is.na(.var_nms)]
126
127	192x	if (length(.var_nms) > 0) model_df <- model_df[model_df$term %in% .var_nms, ]
128	39x	if (.which_vars == "multi_lvl") model_df$term <- tail(.var_nms, 1)
129
130		# We need a list with names corresponding to the stats to display of equal length to the list of stats.
131	194x	y <- split(model_df, f = model_df$term, drop = FALSE)
132	194x	y <- stats::setNames(y, nm = rep(.stats, length(y)))
133
134	194x	if (.which_vars == "var_main") {
135	84x	y <- lapply(y, function(x) x[1, ]) # only main effect
136	110x	} else if (.which_vars %in% c("inter", "multi_lvl")) {
137	80x	y <- lapply(y, function(x) if (nrow(y[[1]]) > 1) x[-1, ] else x) # exclude main effect
138		}
139
140	194x	lapply(
141	194x	X = y,
142	194x	FUN = function(x) {
143	198x	z <- as.list(x[[.stats]])
144	198x	stats::setNames(z, nm = x$term_label)
145		}
146		)
147		}
148
149		#' @describeIn cox_regression Analysis function which is used as `afun` in [rtables::analyze()]
150		#' and `cfun` in [rtables::summarize_row_groups()] within `summarize_coxreg()`.
151		#'
152		#' @param eff (`flag`)\cr whether treatment effect should be calculated. Defaults to `FALSE`.
153		#' @param var_main (`flag`)\cr whether main effects should be calculated. Defaults to `FALSE`.
154		#' @param na_str (`string`)\cr custom string to replace all `NA` values with. Defaults to `""`.
155		#' @param cache_env (`environment`)\cr an environment object used to cache the regression model in order to
156		#' avoid repeatedly fitting the same model for every row in the table. Defaults to `NULL` (no caching).
157		#' @param varlabels (`list`)\cr a named list corresponds to the names of variables found in data, passed
158		#' as a named list and corresponding to time, event, arm, strata, and covariates terms. If arm is missing
159		#' from variables, then only Cox model(s) including the covariates will be fitted and the corresponding
160		#' effect estimates will be tabulated later.
161		#'
162		#' @return
163		#' * `a_coxreg()` returns formatted [rtables::CellValue()].
164		#'
165		#' @examples
166		#' a_coxreg(
167		#' df = dta_bladder,
168		#' labelstr = "Label 1",
169		#' variables = u1_variables,
170		#' .spl_context = list(value = "COVAR1"),
171		#' .stats = "n",
172		#' .formats = "xx"
173		#' )
174		#'
175		#' a_coxreg(
176		#' df = dta_bladder,
177		#' labelstr = "",
178		#' variables = u1_variables,
179		#' .spl_context = list(value = "COVAR2"),
180		#' .stats = "pval",
181		#' .formats = "xx.xxxx"
182		#' )
183		#'
184		#' @export
185		a_coxreg <- function(df,
186		labelstr,
187		eff = FALSE,
188		var_main = FALSE,
189		multivar = FALSE,
190		variables,
191		at = list(),
192		control = control_coxreg(),
193		.spl_context,
194		.stats,
195		.formats,
196		.indent_mods = NULL,
197		na_level = lifecycle::deprecated(),
198		na_str = "",
199		cache_env = NULL) {
200	191x	if (lifecycle::is_present(na_level)) {
201	!	lifecycle::deprecate_warn("0.9.1", "a_coxreg(na_level)", "a_coxreg(na_str)")
202	!	na_str <- na_level
203		}
204
205	191x	cov_no_arm <- !multivar && !"arm" %in% names(variables) && control$interaction # special case: univar no arm
206	191x	cov <- tail(.spl_context$value, 1) # current variable/covariate
207	191x	var_lbl <- formatters::var_labels(df)[cov] # check for df labels
208	191x	if (length(labelstr) > 1) {
209	!	labelstr <- if (cov %in% names(labelstr)) labelstr[[cov]] else var_lbl # use df labels if none
210	191x	} else if (!is.na(var_lbl) && labelstr == cov && cov %in% variables$covariates) {
211	62x	labelstr <- var_lbl
212		}
213	191x	if (eff \|\| multivar \|\| cov_no_arm) {
214	82x	control$interaction <- FALSE
215		} else {
216	109x	variables$covariates <- cov
217	40x	if (var_main) control$interaction <- TRUE
218		}
219
220	191x	if (is.null(cache_env[[cov]])) {
221	30x	if (!multivar) {
222	23x	model <- fit_coxreg_univar(variables = variables, data = df, at = at, control = control) %>% broom::tidy()
223		} else {
224	7x	model <- fit_coxreg_multivar(variables = variables, data = df, control = control) %>% broom::tidy()
225		}
226	30x	cache_env[[cov]] <- model
227		} else {
228	161x	model <- cache_env[[cov]]
229		}
230	109x	if (!multivar && !var_main) model[, "pval_inter"] <- NA_real_
231
232	191x	if (cov_no_arm \|\| (!cov_no_arm && !"arm" %in% names(variables) && is.numeric(df[[cov]]))) {
233	15x	multivar <- TRUE
234	3x	if (!cov_no_arm) var_main <- TRUE
235		}
236
237	191x	vars_coxreg <- list(which_vars = "all", var_nms = NULL)
238	191x	if (eff) {
239	40x	if (multivar && !var_main) { # multivar treatment level
240	6x	var_lbl_arm <- formatters::var_labels(df)[[variables$arm]]
241	6x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(variables$arm, var_lbl_arm), "multi_lvl")
242		} else { # treatment effect
243	34x	vars_coxreg["var_nms"] <- variables$arm
244	6x	if (var_main) vars_coxreg["which_vars"] <- "var_main"
245		}
246		} else {
247	151x	if (!multivar \|\| (multivar && var_main && !is.numeric(df[[cov]]))) { # covariate effect/level
248	118x	vars_coxreg[c("var_nms", "which_vars")] <- list(cov, "var_main")
249	33x	} else if (multivar) { # multivar covariate level
250	33x	vars_coxreg[c("var_nms", "which_vars")] <- list(c(cov, var_lbl), "multi_lvl")
251	6x	if (var_main) model[cov, .stats] <- NA_real_
252		}
253	40x	if (!multivar && !var_main && control$interaction) vars_coxreg["which_vars"] <- "inter" # interaction effect
254		}
255	191x	var_vals <- s_coxreg(model, .stats, .which_vars = vars_coxreg$which_vars, .var_nms = vars_coxreg$var_nms)[[1]]
256	191x	var_names <- if (all(grepl("\\(reference = ", names(var_vals))) && labelstr != tail(.spl_context$value, 1)) {
257	21x	paste(c(labelstr, tail(strsplit(names(var_vals), " ")[[1]], 3)), collapse = " ") # "reference" main effect labels
258	191x	} else if ((!multivar && !eff && !(!var_main && control$interaction) && nchar(labelstr) > 0) \|\|
259	191x	(multivar && var_main && is.numeric(df[[cov]]))) { # nolint
260	47x	labelstr # other main effect labels
261	191x	} else if (multivar && !eff && !var_main && is.numeric(df[[cov]])) {
262	6x	"All" # multivar numeric covariate
263		} else {
264	117x	names(var_vals)
265		}
266	191x	in_rows(
267	191x	.list = var_vals, .names = var_names, .labels = var_names, .indent_mods = .indent_mods,
268	191x	.formats = stats::setNames(rep(.formats, length(var_names)), var_names),
269	191x	.format_na_strs = stats::setNames(rep(na_str, length(var_names)), var_names)
270		)
271		}
272
273		#' @describeIn cox_regression Layout-creating function which creates a Cox regression summary table
274		#' layout. This function is a wrapper for several `rtables` layouting functions. This function
275		#' is a wrapper for [rtables::analyze_colvars()] and [rtables::summarize_row_groups()].
276		#'
277		#' @inheritParams fit_coxreg_univar
278		#' @param multivar (`flag`)\cr Defaults to `FALSE`. If `TRUE` multivariate Cox regression will run, otherwise
279		#' univariate Cox regression will run.
280		#' @param common_var (`character`)\cr the name of a factor variable in the dataset which takes the same value
281		#' for all rows. This should be created during pre-processing if no such variable currently exists.
282		#' @param .section_div (`character`)\cr string which should be repeated as a section divider between sections.
283		#' Defaults to `NA` for no section divider. If a vector of two strings are given, the first will be used between
284		#' treatment and covariate sections and the second between different covariates.
285		#'
286		#' @return
287		#' * `summarize_coxreg()` returns a layout object suitable for passing to further layouting functions,
288		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add a Cox regression table
289		#' containing the chosen statistics to the table layout.
290		#'
291		#' @seealso [fit_coxreg_univar()] and [fit_coxreg_multivar()] which also take the `variables`, `data`,
292		#' `at` (univariate only), and `control` arguments but return unformatted univariate and multivariate
293		#' Cox regression models, respectively.
294		#'
295		#' @examples
296		#' # summarize_coxreg
297		#'
298		#' result_univar <- basic_table() %>%
299		#' summarize_coxreg(variables = u1_variables) %>%
300		#' build_table(dta_bladder)
301		#' result_univar
302		#'
303		#' result_univar_covs <- basic_table() %>%
304		#' summarize_coxreg(
305		#' variables = u2_variables,
306		#' ) %>%
307		#' build_table(dta_bladder)
308		#' result_univar_covs
309		#'
310		#' result_multivar <- basic_table() %>%
311		#' summarize_coxreg(
312		#' variables = m1_variables,
313		#' multivar = TRUE,
314		#' ) %>%
315		#' build_table(dta_bladder)
316		#' result_multivar
317		#'
318		#' result_multivar_covs <- basic_table() %>%
319		#' summarize_coxreg(
320		#' variables = m2_variables,
321		#' multivar = TRUE,
322		#' varlabels = c("Covariate 1", "Covariate 2") # custom labels
323		#' ) %>%
324		#' build_table(dta_bladder)
325		#' result_multivar_covs
326		#'
327		#' @export
328		#' @order 2
329		summarize_coxreg <- function(lyt,
330		variables,
331		control = control_coxreg(),
332		at = list(),
333		multivar = FALSE,
334		common_var = "STUDYID",
335		.stats = c("n", "hr", "ci", "pval", "pval_inter"),
336		.formats = c(
337		n = "xx", hr = "xx.xx", ci = "(xx.xx, xx.xx)",
338		pval = "x.xxxx \| (<0.0001)", pval_inter = "x.xxxx \| (<0.0001)"
339		),
340		varlabels = NULL,
341		.indent_mods = NULL,
342		na_level = lifecycle::deprecated(),
343		na_str = "",
344		.section_div = NA_character_) {
345	11x	if (lifecycle::is_present(na_level)) {
346	!	lifecycle::deprecate_warn("0.9.1", "summarize_coxreg(na_level)", "summarize_coxreg(na_str)")
347	!	na_str <- na_level
348		}
349
350	11x	if (multivar && control$interaction) {
351	1x	warning(paste(
352	1x	"Interactions are not available for multivariate cox regression using summarize_coxreg.",
353	1x	"The model will be calculated without interaction effects."
354		))
355		}
356	11x	if (control$interaction && !"arm" %in% names(variables)) {
357	1x	stop("To include interactions please specify 'arm' in variables.")
358		}
359
360	10x	.stats <- if (!"arm" %in% names(variables) \|\| multivar) { # only valid statistics
361	4x	intersect(c("hr", "ci", "pval"), .stats)
362	10x	} else if (control$interaction) {
363	4x	intersect(c("n", "hr", "ci", "pval", "pval_inter"), .stats)
364		} else {
365	2x	intersect(c("n", "hr", "ci", "pval"), .stats)
366		}
367	10x	stat_labels <- c(
368	10x	n = "n", hr = "Hazard Ratio", ci = paste0(control$conf_level * 100, "% CI"),
369	10x	pval = "p-value", pval_inter = "Interaction p-value"
370		)
371	10x	stat_labels <- stat_labels[names(stat_labels) %in% .stats]
372	10x	.formats <- .formats[names(.formats) %in% .stats]
373	10x	env <- new.env() # create caching environment
374
375	10x	lyt <- lyt %>%
376	10x	split_cols_by_multivar(
377	10x	vars = rep(common_var, length(.stats)),
378	10x	varlabels = stat_labels,
379	10x	extra_args = list(
380	10x	.stats = .stats, .formats = .formats, .indent_mods = .indent_mods, na_str = rep(na_str, length(.stats)),
381	10x	cache_env = replicate(length(.stats), list(env))
382		)
383		)
384
385	10x	if ("arm" %in% names(variables)) { # treatment effect
386	8x	lyt <- lyt %>%
387	8x	split_rows_by(
388	8x	common_var,
389	8x	split_label = "Treatment:",
390	8x	label_pos = "visible",
391	8x	child_labels = "hidden",
392	8x	section_div = head(.section_div, 1)
393		)
394	8x	if (!multivar) {
395	6x	lyt <- lyt %>%
396	6x	analyze_colvars(
397	6x	afun = a_coxreg,
398	6x	na_str = na_str,
399	6x	extra_args = list(
400	6x	variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar,
401	6x	labelstr = ""
402		)
403		)
404		} else { # treatment level effects
405	2x	lyt <- lyt %>%
406	2x	summarize_row_groups(
407	2x	cfun = a_coxreg,
408	2x	na_str = na_str,
409	2x	extra_args = list(
410	2x	variables = variables, control = control, multivar = multivar, eff = TRUE, var_main = multivar
411		)
412		) %>%
413	2x	analyze_colvars(
414	2x	afun = a_coxreg,
415	2x	na_str = na_str,
416	2x	extra_args = list(eff = TRUE, control = control, variables = variables, multivar = multivar, labelstr = "")
417		)
418		}
419		}
420
421	10x	if ("covariates" %in% names(variables)) { # covariate main effects
422	10x	lyt <- lyt %>%
423	10x	split_rows_by_multivar(
424	10x	vars = variables$covariates,
425	10x	varlabels = varlabels,
426	10x	split_label = "Covariate:",
427	10x	nested = FALSE,
428	10x	child_labels = if (multivar \|\| control$interaction \|\| !"arm" %in% names(variables)) "default" else "hidden",
429	10x	section_div = tail(.section_div, 1)
430		)
431	10x	if (multivar \|\| control$interaction \|\| !"arm" %in% names(variables)) {
432	8x	lyt <- lyt %>%
433	8x	summarize_row_groups(
434	8x	cfun = a_coxreg,
435	8x	na_str = na_str,
436	8x	extra_args = list(
437	8x	variables = variables, at = at, control = control, multivar = multivar,
438	8x	var_main = if (multivar) multivar else control$interaction
439		)
440		)
441		} else {
442	!	if (!is.null(varlabels)) names(varlabels) <- variables$covariates
443	2x	lyt <- lyt %>%
444	2x	analyze_colvars(
445	2x	afun = a_coxreg,
446	2x	na_str = na_str,
447	2x	extra_args = list(
448	2x	variables = variables, at = at, control = control, multivar = multivar,
449	2x	var_main = if (multivar) multivar else control$interaction,
450	2x	labelstr = if (is.null(varlabels)) "" else varlabels
451		)
452		)
453		}
454
455	2x	if (!"arm" %in% names(variables)) control$interaction <- TRUE # special case: univar no arm
456	10x	if (multivar \|\| control$interaction) { # covariate level effects
457	8x	lyt <- lyt %>%
458	8x	analyze_colvars(
459	8x	afun = a_coxreg,
460	8x	na_str = na_str,
461	8x	extra_args = list(variables = variables, at = at, control = control, multivar = multivar, labelstr = ""),
462	8x	indent_mod = if (!"arm" %in% names(variables) \|\| multivar) 0L else -1L
463		)
464		}
465		}
466
467	10x	lyt
468		}

1		#' Helper Functions for Tabulating Survival Duration by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that tabulate in a data frame statistics such as median survival
6		#' time and hazard ratio for population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @inheritParams survival_coxph_pairwise
10		#' @inheritParams survival_duration_subgroups
11		#' @param arm (`factor`)\cr the treatment group variable.
12		#'
13		#' @details Main functionality is to prepare data for use in a layout-creating function.
14		#'
15		#' @examples
16		#' library(dplyr)
17		#' library(forcats)
18		#'
19		#' adtte <- tern_ex_adtte
20		#'
21		#' # Save variable labels before data processing steps.
22		#' adtte_labels <- formatters::var_labels(adtte)
23		#'
24		#' adtte_f <- adtte %>%
25		#' filter(
26		#' PARAMCD == "OS",
27		#' ARM %in% c("B: Placebo", "A: Drug X"),
28		#' SEX %in% c("M", "F")
29		#' ) %>%
30		#' mutate(
31		#' # Reorder levels of ARM to display reference arm before treatment arm.
32		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
33		#' SEX = droplevels(SEX),
34		#' is_event = CNSR == 0
35		#' )
36		#' labels <- c("ARM" = adtte_labels[["ARM"]], "SEX" = adtte_labels[["SEX"]], "is_event" = "Event Flag")
37		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
38		#'
39		#' @name h_survival_duration_subgroups
40		NULL
41
42		#' @describeIn h_survival_duration_subgroups helper to prepare a data frame of median survival times by arm.
43		#'
44		#' @return
45		#' * `h_survtime_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, and `median`.
46		#'
47		#' @examples
48		#' # Extract median survival time for one group.
49		#' h_survtime_df(
50		#' tte = adtte_f$AVAL,
51		#' is_event = adtte_f$is_event,
52		#' arm = adtte_f$ARM
53		#' )
54		#'
55		#' @export
56		h_survtime_df <- function(tte, is_event, arm) {
57	61x	checkmate::assert_numeric(tte)
58	60x	checkmate::assert_logical(is_event, len = length(tte))
59	60x	assert_valid_factor(arm, len = length(tte))
60
61	60x	df_tte <- data.frame(
62	60x	tte = tte,
63	60x	is_event = is_event,
64	60x	stringsAsFactors = FALSE
65		)
66
67		# Delete NAs
68	60x	non_missing_rows <- stats::complete.cases(df_tte)
69	60x	df_tte <- df_tte[non_missing_rows, ]
70	60x	arm <- arm[non_missing_rows]
71
72	60x	lst_tte <- split(df_tte, arm)
73	60x	lst_results <- Map(function(x, arm) {
74	120x	if (nrow(x) > 0) {
75	116x	s_surv <- s_surv_time(x, .var = "tte", is_event = "is_event")
76	116x	median_est <- unname(as.numeric(s_surv$median))
77	116x	n_events <- sum(x$is_event)
78		} else {
79	4x	median_est <- NA
80	4x	n_events <- NA
81		}
82
83	120x	data.frame(
84	120x	arm = arm,
85	120x	n = nrow(x),
86	120x	n_events = n_events,
87	120x	median = median_est,
88	120x	stringsAsFactors = FALSE
89		)
90	60x	}, lst_tte, names(lst_tte))
91
92	60x	df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
93	60x	df$arm <- factor(df$arm, levels = levels(arm))
94	60x	df
95		}
96
97		#' @describeIn h_survival_duration_subgroups summarizes median survival times by arm and across subgroups
98		#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
99		#' requires elements `tte`, `is_event`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
100		#' groupings for `subgroups` variables.
101		#'
102		#' @return
103		#' * `h_survtime_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_events`, `median`, `subgroup`,
104		#' `var`, `var_label`, and `row_type`.
105		#'
106		#' @examples
107		#' # Extract median survival time for multiple groups.
108		#' h_survtime_subgroups_df(
109		#' variables = list(
110		#' tte = "AVAL",
111		#' is_event = "is_event",
112		#' arm = "ARM",
113		#' subgroups = c("SEX", "BMRKR2")
114		#' ),
115		#' data = adtte_f
116		#' )
117		#'
118		#' # Define groupings for BMRKR2 levels.
119		#' h_survtime_subgroups_df(
120		#' variables = list(
121		#' tte = "AVAL",
122		#' is_event = "is_event",
123		#' arm = "ARM",
124		#' subgroups = c("SEX", "BMRKR2")
125		#' ),
126		#' data = adtte_f,
127		#' groups_lists = list(
128		#' BMRKR2 = list(
129		#' "low" = "LOW",
130		#' "low/medium" = c("LOW", "MEDIUM"),
131		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
132		#' )
133		#' )
134		#' )
135		#'
136		#' @export
137		h_survtime_subgroups_df <- function(variables,
138		data,
139		groups_lists = list(),
140		label_all = "All Patients") {
141	12x	checkmate::assert_character(variables$tte)
142	12x	checkmate::assert_character(variables$is_event)
143	12x	checkmate::assert_character(variables$arm)
144	12x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
145
146	12x	assert_df_with_variables(data, variables)
147
148	12x	checkmate::assert_string(label_all)
149
150		# Add All Patients.
151	12x	result_all <- h_survtime_df(data[[variables$tte]], data[[variables$is_event]], data[[variables$arm]])
152	12x	result_all$subgroup <- label_all
153	12x	result_all$var <- "ALL"
154	12x	result_all$var_label <- label_all
155	12x	result_all$row_type <- "content"
156
157		# Add Subgroups.
158	12x	if (is.null(variables$subgroups)) {
159	3x	result_all
160		} else {
161	9x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
162	9x	l_result <- lapply(l_data, function(grp) {
163	45x	result <- h_survtime_df(grp$df[[variables$tte]], grp$df[[variables$is_event]], grp$df[[variables$arm]])
164	45x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
165	45x	cbind(result, result_labels)
166		})
167	9x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
168	9x	result_subgroups$row_type <- "analysis"
169	9x	rbind(
170	9x	result_all,
171	9x	result_subgroups
172		)
173		}
174		}
175
176		#' @describeIn h_survival_duration_subgroups helper to prepare a data frame with estimates of
177		#' treatment hazard ratio.
178		#'
179		#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
180		#'
181		#' @return
182		#' * `h_coxph_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`,
183		#' `conf_level`, `pval` and `pval_label`.
184		#'
185		#' @examples
186		#' # Extract hazard ratio for one group.
187		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM)
188		#'
189		#' # Extract hazard ratio for one group with stratification factor.
190		#' h_coxph_df(adtte_f$AVAL, adtte_f$is_event, adtte_f$ARM, strata_data = adtte_f$STRATA1)
191		#'
192		#' @export
193		h_coxph_df <- function(tte, is_event, arm, strata_data = NULL, control = control_coxph()) {
194	64x	checkmate::assert_numeric(tte)
195	64x	checkmate::assert_logical(is_event, len = length(tte))
196	64x	assert_valid_factor(arm, n.levels = 2, len = length(tte))
197
198	64x	df_tte <- data.frame(tte = tte, is_event = is_event)
199	64x	strata_vars <- NULL
200
201	64x	if (!is.null(strata_data)) {
202	5x	if (is.data.frame(strata_data)) {
203	4x	strata_vars <- names(strata_data)
204	4x	checkmate::assert_data_frame(strata_data, nrows = nrow(df_tte))
205	4x	assert_df_with_factors(strata_data, as.list(stats::setNames(strata_vars, strata_vars)))
206		} else {
207	1x	assert_valid_factor(strata_data, len = nrow(df_tte))
208	1x	strata_vars <- "strata_data"
209		}
210	5x	df_tte[strata_vars] <- strata_data
211		}
212
213	64x	l_df <- split(df_tte, arm)
214
215	64x	if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
216		# Hazard ratio and CI.
217	60x	result <- s_coxph_pairwise(
218	60x	df = l_df[[2]],
219	60x	.ref_group = l_df[[1]],
220	60x	.in_ref_col = FALSE,
221	60x	.var = "tte",
222	60x	is_event = "is_event",
223	60x	strat = strata_vars,
224	60x	control = control
225		)
226
227	60x	df <- data.frame(
228		# Dummy column needed downstream to create a nested header.
229	60x	arm = " ",
230	60x	n_tot = unname(as.numeric(result$n_tot)),
231	60x	n_tot_events = unname(as.numeric(result$n_tot_events)),
232	60x	hr = unname(as.numeric(result$hr)),
233	60x	lcl = unname(result$hr_ci[1]),
234	60x	ucl = unname(result$hr_ci[2]),
235	60x	conf_level = control[["conf_level"]],
236	60x	pval = as.numeric(result$pvalue),
237	60x	pval_label = obj_label(result$pvalue),
238	60x	stringsAsFactors = FALSE
239		)
240		} else if (
241	4x	(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) \|\|
242	4x	(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
243		) {
244	4x	df_tte_complete <- df_tte[stats::complete.cases(df_tte), ]
245	4x	df <- data.frame(
246		# Dummy column needed downstream to create a nested header.
247	4x	arm = " ",
248	4x	n_tot = nrow(df_tte_complete),
249	4x	n_tot_events = sum(df_tte_complete$is_event),
250	4x	hr = NA,
251	4x	lcl = NA,
252	4x	ucl = NA,
253	4x	conf_level = control[["conf_level"]],
254	4x	pval = NA,
255	4x	pval_label = NA,
256	4x	stringsAsFactors = FALSE
257		)
258		} else {
259	!	df <- data.frame(
260		# Dummy column needed downstream to create a nested header.
261	!	arm = " ",
262	!	n_tot = 0L,
263	!	n_tot_events = 0L,
264	!	hr = NA,
265	!	lcl = NA,
266	!	ucl = NA,
267	!	conf_level = control[["conf_level"]],
268	!	pval = NA,
269	!	pval_label = NA,
270	!	stringsAsFactors = FALSE
271		)
272		}
273
274	64x	df
275		}
276
277		#' @describeIn h_survival_duration_subgroups summarizes estimates of the treatment hazard ratio
278		#' across subgroups in a data frame. `variables` corresponds to the names of variables found in
279		#' `data`, passed as a named list and requires elements `tte`, `is_event`, `arm` and
280		#' optionally `subgroups` and `strat`. `groups_lists` optionally specifies
281		#' groupings for `subgroups` variables.
282		#'
283		#' @return
284		#' * `h_coxph_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `n_tot_events`, `hr`,
285		#' `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
286		#'
287		#' @examples
288		#' # Extract hazard ratio for multiple groups.
289		#' h_coxph_subgroups_df(
290		#' variables = list(
291		#' tte = "AVAL",
292		#' is_event = "is_event",
293		#' arm = "ARM",
294		#' subgroups = c("SEX", "BMRKR2")
295		#' ),
296		#' data = adtte_f
297		#' )
298		#'
299		#' # Define groupings of BMRKR2 levels.
300		#' h_coxph_subgroups_df(
301		#' variables = list(
302		#' tte = "AVAL",
303		#' is_event = "is_event",
304		#' arm = "ARM",
305		#' subgroups = c("SEX", "BMRKR2")
306		#' ),
307		#' data = adtte_f,
308		#' groups_lists = list(
309		#' BMRKR2 = list(
310		#' "low" = "LOW",
311		#' "low/medium" = c("LOW", "MEDIUM"),
312		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
313		#' )
314		#' )
315		#' )
316		#'
317		#' # Extract hazard ratio for multiple groups with stratification factors.
318		#' h_coxph_subgroups_df(
319		#' variables = list(
320		#' tte = "AVAL",
321		#' is_event = "is_event",
322		#' arm = "ARM",
323		#' subgroups = c("SEX", "BMRKR2"),
324		#' strat = c("STRATA1", "STRATA2")
325		#' ),
326		#' data = adtte_f
327		#' )
328		#'
329		#' @export
330		h_coxph_subgroups_df <- function(variables,
331		data,
332		groups_lists = list(),
333		control = control_coxph(),
334		label_all = "All Patients") {
335	13x	checkmate::assert_character(variables$tte)
336	13x	checkmate::assert_character(variables$is_event)
337	13x	checkmate::assert_character(variables$arm)
338	13x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
339	13x	checkmate::assert_character(variables$strat, null.ok = TRUE)
340	13x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
341	13x	assert_df_with_variables(data, variables)
342	13x	checkmate::assert_string(label_all)
343
344		# Add All Patients.
345	13x	result_all <- h_coxph_df(
346	13x	tte = data[[variables$tte]],
347	13x	is_event = data[[variables$is_event]],
348	13x	arm = data[[variables$arm]],
349	13x	strata_data = if (is.null(variables$strat)) NULL else data[variables$strat],
350	13x	control = control
351		)
352	13x	result_all$subgroup <- label_all
353	13x	result_all$var <- "ALL"
354	13x	result_all$var_label <- label_all
355	13x	result_all$row_type <- "content"
356
357		# Add Subgroups.
358	13x	if (is.null(variables$subgroups)) {
359	3x	result_all
360		} else {
361	10x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
362
363	10x	l_result <- lapply(l_data, function(grp) {
364	47x	result <- h_coxph_df(
365	47x	tte = grp$df[[variables$tte]],
366	47x	is_event = grp$df[[variables$is_event]],
367	47x	arm = grp$df[[variables$arm]],
368	47x	strata_data = if (is.null(variables$strat)) NULL else grp$df[variables$strat],
369	47x	control = control
370		)
371	47x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
372	47x	cbind(result, result_labels)
373		})
374
375	10x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
376	10x	result_subgroups$row_type <- "analysis"
377
378	10x	rbind(
379	10x	result_all,
380	10x	result_subgroups
381		)
382		}
383		}
384
385		#' Split Dataframe by Subgroups
386		#'
387		#' @description `r lifecycle::badge("stable")`
388		#'
389		#' Split a dataframe into a non-nested list of subsets.
390		#'
391		#' @inheritParams argument_convention
392		#' @inheritParams survival_duration_subgroups
393		#' @param data (`data.frame`)\cr dataset to split.
394		#' @param subgroups (`character`)\cr names of factor variables from `data` used to create subsets.
395		#' Unused levels not present in `data` are dropped. Note that the order in this vector
396		#' determines the order in the downstream table.
397		#'
398		#' @return A list with subset data (`df`) and metadata about the subset (`df_labels`).
399		#'
400		#' @details Main functionality is to prepare data for use in forest plot layouts.
401		#'
402		#' @examples
403		#' df <- data.frame(
404		#' x = c(1:5),
405		#' y = factor(c("A", "B", "A", "B", "A"), levels = c("A", "B", "C")),
406		#' z = factor(c("C", "C", "D", "D", "D"), levels = c("D", "C"))
407		#' )
408		#' formatters::var_labels(df) <- paste("label for", names(df))
409		#'
410		#' h_split_by_subgroups(
411		#' data = df,
412		#' subgroups = c("y", "z")
413		#' )
414		#'
415		#' h_split_by_subgroups(
416		#' data = df,
417		#' subgroups = c("y", "z"),
418		#' groups_lists = list(
419		#' y = list("AB" = c("A", "B"), "C" = "C")
420		#' )
421		#' )
422		#'
423		#' @export
424		h_split_by_subgroups <- function(data,
425		subgroups,
426		groups_lists = list()) {
427	52x	checkmate::assert_character(subgroups, min.len = 1, any.missing = FALSE)
428	52x	checkmate::assert_list(groups_lists, names = "named")
429	52x	checkmate::assert_subset(names(groups_lists), subgroups)
430	52x	assert_df_with_factors(data, as.list(stats::setNames(subgroups, subgroups)))
431
432	52x	data_labels <- unname(formatters::var_labels(data))
433	52x	df_subgroups <- data[, subgroups, drop = FALSE]
434	52x	subgroup_labels <- formatters::var_labels(df_subgroups, fill = TRUE)
435
436	52x	l_labels <- Map(function(grp_i, name_i) {
437	93x	existing_levels <- levels(droplevels(grp_i))
438	93x	grp_levels <- if (name_i %in% names(groups_lists)) {
439		# For this variable groupings are defined. We check which groups are contained in the data.
440	11x	group_list_i <- groups_lists[[name_i]]
441	11x	group_has_levels <- vapply(group_list_i, function(lvls) any(lvls %in% existing_levels), TRUE)
442	11x	names(which(group_has_levels))
443		} else {
444	82x	existing_levels
445		}
446	93x	df_labels <- data.frame(
447	93x	subgroup = grp_levels,
448	93x	var = name_i,
449	93x	var_label = unname(subgroup_labels[name_i]),
450	93x	stringsAsFactors = FALSE # Rationale is that subgroups may not be unique.
451		)
452	52x	}, df_subgroups, names(df_subgroups))
453
454		# Create a dataframe with one row per subgroup.
455	52x	df_labels <- do.call(rbind, args = c(l_labels, make.row.names = FALSE))
456	52x	row_label <- paste0(df_labels$var, ".", df_labels$subgroup)
457	52x	row_split_var <- factor(row_label, levels = row_label)
458
459		# Create a list of data subsets.
460	52x	lapply(split(df_labels, row_split_var), function(row_i) {
461	233x	which_row <- if (row_i$var %in% names(groups_lists)) {
462	31x	data[[row_i$var]] %in% groups_lists[[row_i$var]][[row_i$subgroup]]
463		} else {
464	202x	data[[row_i$var]] == row_i$subgroup
465		}
466	233x	df <- data[which_row, ]
467	233x	rownames(df) <- NULL
468	233x	formatters::var_labels(df) <- data_labels
469
470	233x	list(
471	233x	df = df,
472	233x	df_labels = data.frame(row_i, row.names = NULL)
473		)
474		})
475		}

1		#' Helper Function for Deriving Analysis Datasets for `LBT13` and `LBT14`
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper function that merges `ADSL` and `ADLB` datasets so that missing lab test records are inserted in the
6		#' output dataset. Remember that `na_level` must match the needed pre-processing
7		#' done with [df_explicit_na()] to have the desired output.
8		#'
9		#' @param adsl (`data.frame`)\cr `ADSL` dataframe.
10		#' @param adlb (`data.frame`)\cr `ADLB` dataframe.
11		#' @param worst_flag (named `vector`)\cr Worst post-baseline lab flag variable.
12		#' @param by_visit (`logical`)\cr defaults to `FALSE` to generate worst grade per patient.
13		#' If worst grade per patient per visit is specified for `worst_flag`, then
14		#' `by_visit` should be `TRUE` to generate worst grade patient per visit.
15		#' @param no_fillin_visits (named `character`)\cr Visits that are not considered for post-baseline worst toxicity
16		#' grade. Defaults to `c("SCREENING", "BASELINE")`.
17		#'
18		#' @return `df` containing variables shared between `adlb` and `adsl` along with variables `PARAM`, `PARAMCD`,
19		#' `ATOXGR`, and `BTOXGR` relevant for analysis. Optionally, `AVISIT` are `AVISITN` are included when
20		#' `by_visit = TRUE` and `no_fillin_visits = c("SCREENING", "BASELINE")`.
21		#'
22		#' @details In the result data missing records will be created for the following situations:
23		#' * Patients who are present in `adsl` but have no lab data in `adlb` (both baseline and post-baseline).
24		#' * Patients who do not have any post-baseline lab values.
25		#' * Patients without any post-baseline values flagged as the worst.
26		#'
27		#' @examples
28		#' # `h_adsl_adlb_merge_using_worst_flag`
29		#' adlb_out <- h_adsl_adlb_merge_using_worst_flag(
30		#' tern_ex_adsl,
31		#' tern_ex_adlb,
32		#' worst_flag = c("WGRHIFL" = "Y")
33		#' )
34		#'
35		#' # `h_adsl_adlb_merge_using_worst_flag` by visit example
36		#' adlb_out_by_visit <- h_adsl_adlb_merge_using_worst_flag(
37		#' tern_ex_adsl,
38		#' tern_ex_adlb,
39		#' worst_flag = c("WGRLOVFL" = "Y"),
40		#' by_visit = TRUE
41		#' )
42		#'
43		#' @export
44		h_adsl_adlb_merge_using_worst_flag <- function(adsl, # nolint
45		adlb,
46		worst_flag = c("WGRHIFL" = "Y"),
47		by_visit = FALSE,
48		no_fillin_visits = c("SCREENING", "BASELINE")) {
49	5x	col_names <- names(worst_flag)
50	5x	filter_values <- worst_flag
51
52	5x	temp <- Map(
53	5x	function(x, y) which(adlb[[x]] == y),
54	5x	col_names,
55	5x	filter_values
56		)
57
58	5x	position_satisfy_filters <- Reduce(intersect, temp)
59
60	5x	adsl_adlb_common_columns <- intersect(colnames(adsl), colnames(adlb))
61	5x	columns_from_adlb <- c("USUBJID", "PARAM", "PARAMCD", "AVISIT", "AVISITN", "ATOXGR", "BTOXGR")
62
63	5x	adlb_f <- adlb[position_satisfy_filters, ] %>%
64	5x	dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits)
65	5x	adlb_f <- adlb_f[, columns_from_adlb]
66
67	5x	avisits_grid <- adlb %>%
68	5x	dplyr::filter(!.data[["AVISIT"]] %in% no_fillin_visits) %>%
69	5x	dplyr::pull(.data[["AVISIT"]]) %>%
70	5x	unique()
71
72	5x	if (by_visit) {
73	1x	adsl_lb <- expand.grid(
74	1x	USUBJID = unique(adsl$USUBJID),
75	1x	AVISIT = avisits_grid,
76	1x	PARAMCD = unique(adlb$PARAMCD)
77		)
78
79	1x	adsl_lb <- adsl_lb %>%
80	1x	dplyr::left_join(unique(adlb[c("AVISIT", "AVISITN")]), by = "AVISIT") %>%
81	1x	dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")
82
83	1x	adsl1 <- adsl[, adsl_adlb_common_columns]
84	1x	adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")
85
86	1x	by_variables_from_adlb <- c("USUBJID", "AVISIT", "AVISITN", "PARAMCD", "PARAM")
87
88	1x	adlb_btoxgr <- adlb %>%
89	1x	dplyr::select(c("USUBJID", "PARAMCD", "BTOXGR")) %>%
90	1x	unique() %>%
91	1x	dplyr::rename("BTOXGR_MAP" = "BTOXGR")
92
93	1x	adlb_out <- merge(
94	1x	adlb_f,
95	1x	adsl_lb,
96	1x	by = by_variables_from_adlb,
97	1x	all = TRUE,
98	1x	sort = FALSE
99		)
100	1x	adlb_out <- adlb_out %>%
101	1x	dplyr::left_join(adlb_btoxgr, by = c("USUBJID", "PARAMCD")) %>%
102	1x	dplyr::mutate(BTOXGR = .data$BTOXGR_MAP) %>%
103	1x	dplyr::select(-"BTOXGR_MAP")
104
105	1x	adlb_var_labels <- c(
106	1x	formatters::var_labels(adlb[by_variables_from_adlb]),
107	1x	formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
108	1x	formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
109		)
110		} else {
111	4x	adsl_lb <- expand.grid(
112	4x	USUBJID = unique(adsl$USUBJID),
113	4x	PARAMCD = unique(adlb$PARAMCD)
114		)
115
116	4x	adsl_lb <- adsl_lb %>% dplyr::left_join(unique(adlb[c("PARAM", "PARAMCD")]), by = "PARAMCD")
117
118	4x	adsl1 <- adsl[, adsl_adlb_common_columns]
119	4x	adsl_lb <- adsl1 %>% merge(adsl_lb, by = "USUBJID")
120
121	4x	by_variables_from_adlb <- c("USUBJID", "PARAMCD", "PARAM")
122
123	4x	adlb_out <- merge(
124	4x	adlb_f,
125	4x	adsl_lb,
126	4x	by = by_variables_from_adlb,
127	4x	all = TRUE,
128	4x	sort = FALSE
129		)
130
131	4x	adlb_var_labels <- c(
132	4x	formatters::var_labels(adlb[by_variables_from_adlb]),
133	4x	formatters::var_labels(adlb[columns_from_adlb[!columns_from_adlb %in% by_variables_from_adlb]]),
134	4x	formatters::var_labels(adsl[adsl_adlb_common_columns[adsl_adlb_common_columns != "USUBJID"]])
135		)
136		}
137
138	5x	adlb_out$ATOXGR <- as.factor(adlb_out$ATOXGR)
139	5x	adlb_out$BTOXGR <- as.factor(adlb_out$BTOXGR)
140
141	5x	formatters::var_labels(adlb_out) <- adlb_var_labels
142
143	5x	adlb_out
144		}

1		# Utility functions to cooperate with {rtables} package
2
3		#' Convert Table into Matrix of Strings
4		#'
5		#' @description `r lifecycle::badge("stable")`
6		#'
7		#' Helper function to use mostly within tests. `with_spaces`parameter allows
8		#' to test not only for content but also indentation and table structure.
9		#' `print_txt_to_copy` instead facilitate the testing development by returning a well
10		#' formatted text that needs only to be copied and pasted in the expected output.
11		#'
12		#' @inheritParams formatters::toString
13		#' @param x `rtables` table.
14		#' @param with_spaces (`logical`)\cr should the tested table keep the indentation and other relevant spaces?
15		#' @param print_txt_to_copy (`logical`)\cr utility to have a way to copy the input table directly
16		#' into the expected variable instead of copying it too manually.
17		#'
18		#' @return A `matrix` of `string`s. If `print_txt_to_copy = TRUE` the well formatted printout of the
19		#' table will be printed to console, ready to be copied as a expected value.
20		#'
21		#' @examples
22		#' tbl <- basic_table() %>%
23		#' split_rows_by("SEX") %>%
24		#' split_cols_by("ARM") %>%
25		#' analyze("AGE") %>%
26		#' build_table(tern_ex_adsl)
27		#'
28		#' to_string_matrix(tbl, widths = ceiling(propose_column_widths(tbl) / 2))
29		#'
30		#' @export
31		to_string_matrix <- function(x, widths = NULL, max_width = NULL,
32		hsep = formatters::default_hsep(),
33		with_spaces = TRUE, print_txt_to_copy = FALSE) {
34	5x	checkmate::assert_flag(with_spaces)
35	5x	checkmate::assert_flag(print_txt_to_copy)
36	5x	checkmate::assert_int(max_width, null.ok = TRUE)
37
38	5x	if (inherits(x, "MatrixPrintForm")) {
39	!	tx <- x
40		} else {
41	5x	tx <- matrix_form(x, TRUE)
42		}
43
44	5x	tf_wrap <- FALSE
45	5x	if (!is.null(max_width)) {
46	!	tf_wrap <- TRUE
47		}
48
49		# Producing the matrix to test
50	5x	if (with_spaces) {
51	!	out <- strsplit(toString(tx, widths = widths, tf_wrap = tf_wrap, max_width = max_width, hsep = hsep), "\\n")[[1]]
52		} else {
53	5x	out <- tx$string
54		}
55
56		# Printing to console formatted output that needs to be copied in "expected"
57	5x	if (print_txt_to_copy) {
58	!	out_tmp <- out
59	!	if (!with_spaces) {
60	!	out_tmp <- apply(out, 1, paste0, collapse = '", "')
61		}
62	!	cat(paste0('c(\n "', paste0(out_tmp, collapse = '",\n "'), '"\n)'))
63		}
64
65		# Return values
66	5x	return(out)
67		}
68
69		#' Blank for Missing Input
70		#'
71		#' Helper function to use in tabulating model results.
72		#'
73		#' @param x (`vector`)\cr input for a cell.
74		#'
75		#' @return An empty `character` vector if all entries in `x` are missing (`NA`), otherwise
76		#' the unlisted version of `x`.
77		#'
78		#' @keywords internal
79		unlist_and_blank_na <- function(x) {
80	267x	unl <- unlist(x)
81	267x	if (all(is.na(unl))) {
82	161x	character()
83		} else {
84	106x	unl
85		}
86		}
87
88		#' Constructor for Content Functions given Data Frame with Flag Input
89		#'
90		#' This can be useful for tabulating model results.
91		#'
92		#' @param analysis_var (`string`)\cr variable name for the column containing values to be returned by the
93		#' content function.
94		#' @param flag_var (`string`)\cr variable name for the logical column identifying which row should be returned.
95		#' @param format (`string`)\cr `rtables` format to use.
96		#'
97		#' @return A content function which gives `df$analysis_var` at the row identified by
98		#' `.df_row$flag` in the given format.
99		#'
100		#' @keywords internal
101		cfun_by_flag <- function(analysis_var,
102		flag_var,
103		format = "xx",
104		.indent_mods = NULL) {
105	61x	checkmate::assert_string(analysis_var)
106	61x	checkmate::assert_string(flag_var)
107	61x	function(df, labelstr) {
108	265x	row_index <- which(df[[flag_var]])
109	265x	x <- unlist_and_blank_na(df[[analysis_var]][row_index])
110	265x	formatters::with_label(
111	265x	rcell(x, format = format, indent_mod = .indent_mods),
112	265x	labelstr
113		)
114		}
115		}
116
117		#' Content Row Function to Add Row Total to Labels
118		#'
119		#' This takes the label of the latest row split level and adds the row total from `df` in parentheses.
120		#' This function differs from [c_label_n_alt()] by taking row counts from `df` rather than
121		#' `alt_counts_df`, and is used by [add_rowcounts()] when `alt_counts` is set to `FALSE`.
122		#'
123		#' @inheritParams argument_convention
124		#'
125		#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
126		#'
127		#' @note It is important here to not use `df` but rather `.N_row` in the implementation, because
128		#' the former is already split by columns and will refer to the first column of the data only.
129		#'
130		#' @seealso [c_label_n_alt()] which performs the same function but retrieves row counts from
131		#' `alt_counts_df` instead of `df`.
132		#'
133		#' @keywords internal
134		c_label_n <- function(df,
135		labelstr,
136		.N_row) { # nolint
137	270x	label <- paste0(labelstr, " (N=", .N_row, ")")
138	270x	in_rows(
139	270x	.list = list(row_count = formatters::with_label(c(.N_row, .N_row), label)),
140	270x	.formats = c(row_count = function(x, ...) "")
141		)
142		}
143
144		#' Content Row Function to Add `alt_counts_df` Row Total to Labels
145		#'
146		#' This takes the label of the latest row split level and adds the row total from `alt_counts_df`
147		#' in parentheses. This function differs from [c_label_n()] by taking row counts from `alt_counts_df`
148		#' rather than `df`, and is used by [add_rowcounts()] when `alt_counts` is set to `TRUE`.
149		#'
150		#' @inheritParams argument_convention
151		#'
152		#' @return A list with formatted [rtables::CellValue()] with the row count value and the correct label.
153		#'
154		#' @seealso [c_label_n()] which performs the same function but retrieves row counts from `df` instead
155		#' of `alt_counts_df`.
156		#'
157		#' @keywords internal
158		c_label_n_alt <- function(df,
159		labelstr,
160		.alt_df_row) {
161	7x	N_row_alt <- nrow(.alt_df_row) # nolint
162	7x	label <- paste0(labelstr, " (N=", N_row_alt, ")")
163	7x	in_rows(
164	7x	.list = list(row_count = formatters::with_label(c(N_row_alt, N_row_alt), label)),
165	7x	.formats = c(row_count = function(x, ...) "")
166		)
167		}
168
169		#' Layout Creating Function to Add Row Total Counts
170		#'
171		#' @description `r lifecycle::badge("stable")`
172		#'
173		#' This works analogously to [rtables::add_colcounts()] but on the rows. This function
174		#' is a wrapper for [rtables::summarize_row_groups()].
175		#'
176		#' @inheritParams argument_convention
177		#' @param alt_counts (`flag`)\cr whether row counts should be taken from `alt_counts_df` (`TRUE`)
178		#' or from `df` (`FALSE`). Defaults to `FALSE`.
179		#'
180		#' @return A modified layout where the latest row split labels now have the row-wise
181		#' total counts (i.e. without column-based subsetting) attached in parentheses.
182		#'
183		#' @note Row count values are contained in these row count rows but are not displayed
184		#' so that they are not considered zero rows by default when pruning.
185		#'
186		#' @examples
187		#' basic_table() %>%
188		#' split_cols_by("ARM") %>%
189		#' add_colcounts() %>%
190		#' split_rows_by("RACE", split_fun = drop_split_levels) %>%
191		#' add_rowcounts() %>%
192		#' analyze("AGE", afun = list_wrap_x(summary), format = "xx.xx") %>%
193		#' build_table(DM)
194		#'
195		#' @export
196		add_rowcounts <- function(lyt, alt_counts = FALSE) {
197	6x	summarize_row_groups(
198	6x	lyt,
199	6x	cfun = if (alt_counts) c_label_n_alt else c_label_n
200		)
201		}
202
203		#' Obtain Column Indices
204		#'
205		#' @description `r lifecycle::badge("stable")`
206		#'
207		#' Helper function to extract column indices from a `VTableTree` for a given
208		#' vector of column names.
209		#'
210		#' @param table_tree (`VTableTree`)\cr table to extract the indices from.
211		#' @param col_names (`character`)\cr vector of column names.
212		#'
213		#' @return A vector of column indices.
214		#'
215		#' @export
216		h_col_indices <- function(table_tree, col_names) {
217	1232x	checkmate::assert_class(table_tree, "VTableNodeInfo")
218	1232x	checkmate::assert_subset(col_names, names(attr(col_info(table_tree), "cextra_args")), empty.ok = FALSE)
219	1232x	match(col_names, names(attr(col_info(table_tree), "cextra_args")))
220		}
221
222		#' Labels or Names of List Elements
223		#'
224		#' Internal helper function for working with nested statistic function results which typically
225		#' don't have labels but names that we can use.
226		#'
227		#' @param x a list.
228		#'
229		#' @return A `character` vector with the labels or names for the list elements.
230		#'
231		#' @keywords internal
232		labels_or_names <- function(x) {
233	131x	checkmate::assert_multi_class(x, c("data.frame", "list"))
234	131x	labs <- sapply(x, obj_label)
235	131x	nams <- rlang::names2(x)
236	131x	label_is_null <- sapply(labs, is.null)
237	131x	result <- unlist(ifelse(label_is_null, nams, labs))
238	131x	return(result)
239		}
240
241		#' Convert to `rtable`
242		#'
243		#' @description `r lifecycle::badge("stable")`
244		#'
245		#' This is a new generic function to convert objects to `rtable` tables.
246		#'
247		#' @param x the object which should be converted to an `rtable`.
248		#' @param ... additional arguments for methods.
249		#'
250		#' @return An `rtables` table object. Note that the concrete class will depend on the method used.
251		#'
252		#' @export
253		as.rtable <- function(x, ...) { # nolint
254	3x	UseMethod("as.rtable", x)
255		}
256
257		#' @describeIn as.rtable method for converting `data.frame` that contain numeric columns to `rtable`.
258		#'
259		#' @param format the format which should be used for the columns.
260		#'
261		#' @method as.rtable data.frame
262		#'
263		#' @examples
264		#' x <- data.frame(
265		#' a = 1:10,
266		#' b = rnorm(10)
267		#' )
268		#' as.rtable(x)
269		#'
270		#' @export
271		as.rtable.data.frame <- function(x, format = "xx.xx", ...) {
272	3x	checkmate::assert_numeric(unlist(x))
273	2x	do.call(
274	2x	rtable,
275	2x	c(
276	2x	list(
277	2x	header = labels_or_names(x),
278	2x	format = format
279		),
280	2x	Map(
281	2x	function(row, row_name) {
282	20x	do.call(
283	20x	rrow,
284	20x	c(as.list(unname(row)),
285	20x	row.name = row_name
286		)
287		)
288		},
289	2x	row = as.data.frame(t(x)),
290	2x	row_name = rownames(x)
291		)
292		)
293		)
294		}
295
296		#' Split parameters
297		#'
298		#' @description `r lifecycle::badge("stable")`
299		#'
300		#' It divides the data in the vector `param` into the groups defined by `f` based on specified `values`. It is relevant
301		#' in `rtables` layers so as to distribute parameters `.stats` or' `.formats` into lists with items corresponding to
302		#' specific analysis function.
303		#'
304		#' @param param (`vector`)\cr the parameter to be split.
305		#' @param value (`vector`)\cr the value used to split.
306		#' @param f (`list` of `vectors`)\cr the reference to make the split
307		#'
308		#' @return A named `list` with the same element names as `f`, each containing the elements specified in `.stats`.
309		#'
310		#' @examples
311		#' f <- list(
312		#' surv = c("pt_at_risk", "event_free_rate", "rate_se", "rate_ci"),
313		#' surv_diff = c("rate_diff", "rate_diff_ci", "ztest_pval")
314		#' )
315		#'
316		#' .stats <- c("pt_at_risk", "rate_diff")
317		#' h_split_param(.stats, .stats, f = f)
318		#'
319		#' # $surv
320		#' # [1] "pt_at_risk"
321		#' #
322		#' # $surv_diff
323		#' # [1] "rate_diff"
324		#'
325		#' .formats <- c("pt_at_risk" = "xx", "event_free_rate" = "xxx")
326		#' h_split_param(.formats, names(.formats), f = f)
327		#'
328		#' # $surv
329		#' # pt_at_risk event_free_rate
330		#' # "xx" "xxx"
331		#' #
332		#' # $surv_diff
333		#' # NULL
334		#'
335		#' @export
336		h_split_param <- function(param,
337		value,
338		f) {
339	25x	y <- lapply(f, function(x) param[value %in% x])
340	25x	lapply(y, function(x) if (length(x) == 0) NULL else x)
341		}
342
343		#' Get Selected Statistics Names
344		#'
345		#' Helper function to be used for creating `afun`.
346		#'
347		#' @param .stats (`vector` or `NULL`)\cr input to the layout creating function. Note that `NULL` means
348		#' in this context that all default statistics should be used.
349		#' @param all_stats (`character`)\cr all statistics which can be selected here potentially.
350		#'
351		#' @return A `character` vector with the selected statistics.
352		#'
353		#' @keywords internal
354		afun_selected_stats <- function(.stats, all_stats) {
355	2x	checkmate::assert_character(.stats, null.ok = TRUE)
356	2x	checkmate::assert_character(all_stats)
357	2x	if (is.null(.stats)) {
358	1x	all_stats
359		} else {
360	1x	intersect(.stats, all_stats)
361		}
362		}
363
364		#' Add Variable Labels to Top Left Corner in Table
365		#'
366		#' @description `r lifecycle::badge("stable")`
367		#'
368		#' Helper layout creating function to just append the variable labels of a given variables vector
369		#' from a given dataset in the top left corner. If a variable label is not found then the
370		#' variable name itself is used instead. Multiple variable labels are concatenated with slashes.
371		#'
372		#' @inheritParams argument_convention
373		#' @param vars (`character`)\cr variable names of which the labels are to be looked up in `df`.
374		#' @param indent (`integer`)\cr non-negative number of nested indent space, default to 0L which means no indent.
375		#' 1L means two spaces indent, 2L means four spaces indent and so on.
376		#'
377		#' @return A modified layout with the new variable label(s) added to the top-left material.
378		#'
379		#' @note This is not an optimal implementation of course, since we are using here the data set
380		#' itself during the layout creation. When we have a more mature `rtables` implementation then
381		#' this will also be improved or not necessary anymore.
382		#'
383		#' @examples
384		#' lyt <- basic_table() %>%
385		#' split_cols_by("ARM") %>%
386		#' add_colcounts() %>%
387		#' split_rows_by("SEX") %>%
388		#' append_varlabels(DM, "SEX") %>%
389		#' analyze("AGE", afun = mean) %>%
390		#' append_varlabels(DM, "AGE", indent = 1)
391		#' build_table(lyt, DM)
392		#'
393		#' lyt <- basic_table() %>%
394		#' split_cols_by("ARM") %>%
395		#' split_rows_by("SEX") %>%
396		#' analyze("AGE", afun = mean) %>%
397		#' append_varlabels(DM, c("SEX", "AGE"))
398		#' build_table(lyt, DM)
399		#'
400		#' @export
401		append_varlabels <- function(lyt, df, vars, indent = 0L) {
402	3x	if (checkmate::test_flag(indent)) {
403	!	warning("indent argument is now accepting integers. Boolean indent will be converted to integers.")
404	!	indent <- as.integer(indent)
405		}
406
407	3x	checkmate::assert_data_frame(df)
408	3x	checkmate::assert_character(vars)
409	3x	checkmate::assert_count(indent)
410
411	3x	lab <- formatters::var_labels(df[vars], fill = TRUE)
412	3x	lab <- paste(lab, collapse = " / ")
413	3x	space <- paste(rep(" ", indent * 2), collapse = "")
414	3x	lab <- paste0(space, lab)
415
416	3x	append_topleft(lyt, lab)
417		}
418
419		#' Default string replacement for `NA` values
420		#'
421		#' @description `r lifecycle::badge("stable")`
422		#'
423		#' The default string used to represent `NA` values. This value is used as the default
424		#' value for the `na_str` argument throughout the `tern` package, and printed in place
425		#' of `NA` values in output tables. If not specified for each `tern` function by the user
426		#' via the `na_str` argument, or in the R environment options via [set_default_na_str()],
427		#' then `NA` is used.
428		#'
429		#' @param na_str (`string`)\cr Single string value to set in the R environment options as
430		#' the default value to replace `NA`s. Use `getOption("tern_default_na_str")` to check the
431		#' current value set in the R environment (defaults to `NULL` if not set).
432		#'
433		#' @name default_na_str
434		NULL
435
436		#' @describeIn default_na_str Getter for default `NA` value replacement string.
437		#'
438		#' @return
439		#' * `default_na_str` returns the current value if an R environment option has been set
440		#' for `"tern_default_na_str"`, or `NA_character_` otherwise.
441		#'
442		#' @examples
443		#' # Default settings
444		#' default_na_str()
445		#' getOption("tern_default_na_str")
446		#'
447		#' # Set custom value
448		#' set_default_na_str("<Missing>")
449		#'
450		#' # Settings after value has been set
451		#' default_na_str()
452		#' getOption("tern_default_na_str")
453		#'
454		#' @export
455		default_na_str <- function() {
456	242x	getOption("tern_default_na_str", default = NA_character_)
457		}
458
459		#' @describeIn default_na_str Setter for default `NA` value replacement string. Sets the
460		#' option `"tern_default_na_str"` within the R environment.
461		#'
462		#' @return
463		#' * `set_default_na_str` has no return value.
464		#'
465		#' @export
466		set_default_na_str <- function(na_str) {
467	3x	checkmate::assert_character(na_str, len = 1, null.ok = TRUE)
468	3x	options("tern_default_na_str" = na_str)
469		}

1		#' Helper Functions for Tabulating Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that tabulate in a data frame statistics such as response rate
6		#' and odds ratio for population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @inheritParams response_subgroups
10		#' @param arm (`factor`)\cr the treatment group variable.
11		#'
12		#' @details Main functionality is to prepare data for use in a layout-creating function.
13		#'
14		#' @examples
15		#' library(dplyr)
16		#' library(forcats)
17		#'
18		#' adrs <- tern_ex_adrs
19		#' adrs_labels <- formatters::var_labels(adrs)
20		#'
21		#' adrs_f <- adrs %>%
22		#' filter(PARAMCD == "BESRSPI") %>%
23		#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
24		#' droplevels() %>%
25		#' mutate(
26		#' # Reorder levels of factor to make the placebo group the reference arm.
27		#' ARM = fct_relevel(ARM, "B: Placebo"),
28		#' rsp = AVALC == "CR"
29		#' )
30		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
31		#'
32		#' @name h_response_subgroups
33		NULL
34
35		#' @describeIn h_response_subgroups helper to prepare a data frame of binary responses by arm.
36		#'
37		#' @return
38		#' * `h_proportion_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, and `prop`.
39		#'
40		#' @examples
41		#' h_proportion_df(
42		#' c(TRUE, FALSE, FALSE),
43		#' arm = factor(c("A", "A", "B"), levels = c("A", "B"))
44		#' )
45		#'
46		#' @export
47		h_proportion_df <- function(rsp, arm) {
48	64x	checkmate::assert_logical(rsp)
49	63x	assert_valid_factor(arm, len = length(rsp))
50	63x	non_missing_rsp <- !is.na(rsp)
51	63x	rsp <- rsp[non_missing_rsp]
52	63x	arm <- arm[non_missing_rsp]
53
54	63x	lst_rsp <- split(rsp, arm)
55	63x	lst_results <- Map(function(x, arm) {
56	126x	if (length(x) > 0) {
57	124x	s_prop <- s_proportion(df = x)
58	124x	data.frame(
59	124x	arm = arm,
60	124x	n = length(x),
61	124x	n_rsp = unname(s_prop$n_prop[1]),
62	124x	prop = unname(s_prop$n_prop[2]),
63	124x	stringsAsFactors = FALSE
64		)
65		} else {
66	2x	data.frame(
67	2x	arm = arm,
68	2x	n = 0L,
69	2x	n_rsp = NA,
70	2x	prop = NA,
71	2x	stringsAsFactors = FALSE
72		)
73		}
74	63x	}, lst_rsp, names(lst_rsp))
75
76	63x	df <- do.call(rbind, args = c(lst_results, make.row.names = FALSE))
77	63x	df$arm <- factor(df$arm, levels = levels(arm))
78	63x	df
79		}
80
81		#' @describeIn h_response_subgroups summarizes proportion of binary responses by arm and across subgroups
82		#' in a data frame. `variables` corresponds to the names of variables found in `data`, passed as a named list and
83		#' requires elements `rsp`, `arm` and optionally `subgroups`. `groups_lists` optionally specifies
84		#' groupings for `subgroups` variables.
85		#'
86		#' @return
87		#' * `h_proportion_subgroups_df()` returns a `data.frame` with columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`,
88		#' `var`, `var_label`, and `row_type`.
89		#'
90		#' @examples
91		#' h_proportion_subgroups_df(
92		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
93		#' data = adrs_f
94		#' )
95		#'
96		#' # Define groupings for BMRKR2 levels.
97		#' h_proportion_subgroups_df(
98		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
99		#' data = adrs_f,
100		#' groups_lists = list(
101		#' BMRKR2 = list(
102		#' "low" = "LOW",
103		#' "low/medium" = c("LOW", "MEDIUM"),
104		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
105		#' )
106		#' )
107		#' )
108		#'
109		#' @export
110		h_proportion_subgroups_df <- function(variables,
111		data,
112		groups_lists = list(),
113		label_all = "All Patients") {
114	14x	checkmate::assert_character(variables$rsp)
115	14x	checkmate::assert_character(variables$arm)
116	14x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
117	14x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
118	14x	assert_df_with_variables(data, variables)
119	14x	checkmate::assert_string(label_all)
120
121		# Add All Patients.
122	14x	result_all <- h_proportion_df(data[[variables$rsp]], data[[variables$arm]])
123	14x	result_all$subgroup <- label_all
124	14x	result_all$var <- "ALL"
125	14x	result_all$var_label <- label_all
126	14x	result_all$row_type <- "content"
127
128		# Add Subgroups.
129	14x	if (is.null(variables$subgroups)) {
130	3x	result_all
131		} else {
132	11x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
133
134	11x	l_result <- lapply(l_data, function(grp) {
135	46x	result <- h_proportion_df(grp$df[[variables$rsp]], grp$df[[variables$arm]])
136	46x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
137	46x	cbind(result, result_labels)
138		})
139	11x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
140	11x	result_subgroups$row_type <- "analysis"
141
142	11x	rbind(
143	11x	result_all,
144	11x	result_subgroups
145		)
146		}
147		}
148
149		#' @describeIn h_response_subgroups helper to prepare a data frame with estimates of
150		#' the odds ratio between a treatment and a control arm.
151		#'
152		#' @inheritParams response_subgroups
153		#' @param strata_data (`factor`, `data.frame` or `NULL`)\cr required if stratified analysis is performed.
154		#'
155		#' @return
156		#' * `h_odds_ratio_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`, and
157		#' optionally `pval` and `pval_label`.
158		#'
159		#' @examples
160		#' # Unstratatified analysis.
161		#' h_odds_ratio_df(
162		#' c(TRUE, FALSE, FALSE, TRUE),
163		#' arm = factor(c("A", "A", "B", "B"), levels = c("A", "B"))
164		#' )
165		#'
166		#' # Include p-value.
167		#' h_odds_ratio_df(adrs_f$rsp, adrs_f$ARM, method = "chisq")
168		#'
169		#' # Stratatified analysis.
170		#' h_odds_ratio_df(
171		#' rsp = adrs_f$rsp,
172		#' arm = adrs_f$ARM,
173		#' strata_data = adrs_f[, c("STRATA1", "STRATA2")],
174		#' method = "cmh"
175		#' )
176		#'
177		#' @export
178		h_odds_ratio_df <- function(rsp, arm, strata_data = NULL, conf_level = 0.95, method = NULL) {
179	69x	assert_valid_factor(arm, n.levels = 2, len = length(rsp))
180
181	69x	df_rsp <- data.frame(
182	69x	rsp = rsp,
183	69x	arm = arm
184		)
185
186	69x	if (!is.null(strata_data)) {
187	11x	strata_var <- interaction(strata_data, drop = TRUE)
188	11x	strata_name <- "strata"
189
190	11x	assert_valid_factor(strata_var, len = nrow(df_rsp))
191
192	11x	df_rsp[[strata_name]] <- strata_var
193		} else {
194	58x	strata_name <- NULL
195		}
196
197	69x	l_df <- split(df_rsp, arm)
198
199	69x	if (nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) > 0) {
200		# Odds ratio and CI.
201	67x	result_odds_ratio <- s_odds_ratio(
202	67x	df = l_df[[2]],
203	67x	.var = "rsp",
204	67x	.ref_group = l_df[[1]],
205	67x	.in_ref_col = FALSE,
206	67x	.df_row = df_rsp,
207	67x	variables = list(arm = "arm", strata = strata_name),
208	67x	conf_level = conf_level
209		)
210
211	67x	df <- data.frame(
212		# Dummy column needed downstream to create a nested header.
213	67x	arm = " ",
214	67x	n_tot = unname(result_odds_ratio$n_tot["n_tot"]),
215	67x	or = unname(result_odds_ratio$or_ci["est"]),
216	67x	lcl = unname(result_odds_ratio$or_ci["lcl"]),
217	67x	ucl = unname(result_odds_ratio$or_ci["ucl"]),
218	67x	conf_level = conf_level,
219	67x	stringsAsFactors = FALSE
220		)
221
222	67x	if (!is.null(method)) {
223		# Test for difference.
224	34x	result_test <- s_test_proportion_diff(
225	34x	df = l_df[[2]],
226	34x	.var = "rsp",
227	34x	.ref_group = l_df[[1]],
228	34x	.in_ref_col = FALSE,
229	34x	variables = list(strata = strata_name),
230	34x	method = method
231		)
232
233	34x	df$pval <- as.numeric(result_test$pval)
234	34x	df$pval_label <- obj_label(result_test$pval)
235		}
236
237		# In those cases cannot go through the model so will obtain n_tot from data.
238		} else if (
239	2x	(nrow(l_df[[1]]) == 0 && nrow(l_df[[2]]) > 0) \|\|
240	2x	(nrow(l_df[[1]]) > 0 && nrow(l_df[[2]]) == 0)
241		) {
242	2x	df <- data.frame(
243		# Dummy column needed downstream to create a nested header.
244	2x	arm = " ",
245	2x	n_tot = sum(stats::complete.cases(df_rsp)),
246	2x	or = NA,
247	2x	lcl = NA,
248	2x	ucl = NA,
249	2x	conf_level = conf_level,
250	2x	stringsAsFactors = FALSE
251		)
252	2x	if (!is.null(method)) {
253	2x	df$pval <- NA
254	2x	df$pval_label <- NA
255		}
256		} else {
257	!	df <- data.frame(
258		# Dummy column needed downstream to create a nested header.
259	!	arm = " ",
260	!	n_tot = 0L,
261	!	or = NA,
262	!	lcl = NA,
263	!	ucl = NA,
264	!	conf_level = conf_level,
265	!	stringsAsFactors = FALSE
266		)
267
268	!	if (!is.null(method)) {
269	!	df$pval <- NA
270	!	df$pval_label <- NA
271		}
272		}
273
274	69x	df
275		}
276
277		#' @describeIn h_response_subgroups summarizes estimates of the odds ratio between a treatment and a control
278		#' arm across subgroups in a data frame. `variables` corresponds to the names of variables found in
279		#' `data`, passed as a named list and requires elements `rsp`, `arm` and optionally `subgroups`
280		#' and `strat`. `groups_lists` optionally specifies groupings for `subgroups` variables.
281		#'
282		#' @return
283		#' * `h_odds_ratio_subgroups_df()` returns a `data.frame` with columns `arm`, `n_tot`, `or`, `lcl`, `ucl`,
284		#' `conf_level`, `subgroup`, `var`, `var_label`, and `row_type`.
285		#'
286		#' @examples
287		#' # Unstratified analysis.
288		#' h_odds_ratio_subgroups_df(
289		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
290		#' data = adrs_f
291		#' )
292		#'
293		#' # Stratified analysis.
294		#' h_odds_ratio_subgroups_df(
295		#' variables = list(
296		#' rsp = "rsp",
297		#' arm = "ARM",
298		#' subgroups = c("SEX", "BMRKR2"),
299		#' strat = c("STRATA1", "STRATA2")
300		#' ),
301		#' data = adrs_f
302		#' )
303		#'
304		#' # Define groupings of BMRKR2 levels.
305		#' h_odds_ratio_subgroups_df(
306		#' variables = list(
307		#' rsp = "rsp",
308		#' arm = "ARM",
309		#' subgroups = c("SEX", "BMRKR2")
310		#' ),
311		#' data = adrs_f,
312		#' groups_lists = list(
313		#' BMRKR2 = list(
314		#' "low" = "LOW",
315		#' "low/medium" = c("LOW", "MEDIUM"),
316		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
317		#' )
318		#' )
319		#' )
320		#'
321		#' @export
322		h_odds_ratio_subgroups_df <- function(variables,
323		data,
324		groups_lists = list(),
325		conf_level = 0.95,
326		method = NULL,
327		label_all = "All Patients") {
328	15x	checkmate::assert_character(variables$rsp)
329	15x	checkmate::assert_character(variables$arm)
330	15x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
331	15x	checkmate::assert_character(variables$strat, null.ok = TRUE)
332	15x	assert_df_with_factors(data, list(val = variables$arm), min.levels = 2, max.levels = 2)
333	15x	assert_df_with_variables(data, variables)
334	15x	checkmate::assert_string(label_all)
335
336	15x	strata_data <- if (is.null(variables$strat)) {
337	13x	NULL
338		} else {
339	2x	data[, variables$strat, drop = FALSE]
340		}
341
342		# Add All Patients.
343	15x	result_all <- h_odds_ratio_df(
344	15x	rsp = data[[variables$rsp]],
345	15x	arm = data[[variables$arm]],
346	15x	strata_data = strata_data,
347	15x	conf_level = conf_level,
348	15x	method = method
349		)
350	15x	result_all$subgroup <- label_all
351	15x	result_all$var <- "ALL"
352	15x	result_all$var_label <- label_all
353	15x	result_all$row_type <- "content"
354
355	15x	if (is.null(variables$subgroups)) {
356	3x	result_all
357		} else {
358	12x	l_data <- h_split_by_subgroups(data, variables$subgroups, groups_lists = groups_lists)
359
360	12x	l_result <- lapply(l_data, function(grp) {
361	50x	grp_strata_data <- if (is.null(variables$strat)) {
362	42x	NULL
363		} else {
364	8x	grp$df[, variables$strat, drop = FALSE]
365		}
366
367	50x	result <- h_odds_ratio_df(
368	50x	rsp = grp$df[[variables$rsp]],
369	50x	arm = grp$df[[variables$arm]],
370	50x	strata_data = grp_strata_data,
371	50x	conf_level = conf_level,
372	50x	method = method
373		)
374	50x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
375	50x	cbind(result, result_labels)
376		})
377
378	12x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
379	12x	result_subgroups$row_type <- "analysis"
380
381	12x	rbind(
382	12x	result_all,
383	12x	result_subgroups
384		)
385		}
386		}

1		#' Re-implemented [range()] Default S3 method for numerical objects
2		#'
3		#' This function returns `c(NA, NA)` instead of `c(-Inf, Inf)` for zero-length data
4		#' without any warnings.
5		#'
6		#' @param x (`numeric`)\cr a sequence of numbers for which the range is computed.
7		#' @param na.rm (`logical`)\cr indicating if `NA` should be omitted.
8		#' @param finite (`logical`)\cr indicating if non-finite elements should be removed.
9		#'
10		#' @return A 2-element vector of class `numeric`.
11		#'
12		#' @keywords internal
13		range_noinf <- function(x, na.rm = FALSE, finite = FALSE) { # nolint
14
15	953x	checkmate::assert_numeric(x)
16
17	953x	if (finite) {
18	24x	x <- x[is.finite(x)] # removes NAs too
19	929x	} else if (na.rm) {
20	558x	x <- x[!is.na(x)]
21		}
22
23	953x	if (length(x) == 0) {
24	52x	rval <- c(NA, NA)
25	52x	mode(rval) <- typeof(x)
26		} else {
27	901x	rval <- c(min(x, na.rm = FALSE), max(x, na.rm = FALSE))
28		}
29
30	953x	return(rval)
31		}
32
33		#' Utility function to create label for confidence interval
34		#'
35		#' @description `r lifecycle::badge("stable")`
36		#'
37		#' @inheritParams argument_convention
38		#'
39		#' @return A `string`.
40		#'
41		#' @export
42		f_conf_level <- function(conf_level) {
43	1514x	assert_proportion_value(conf_level)
44	1512x	paste0(conf_level * 100, "% CI")
45		}
46
47		#' Utility function to create label for p-value
48		#'
49		#' @description `r lifecycle::badge("stable")`
50		#'
51		#' @param test_mean (`number`)\cr mean value to test under the null hypothesis.
52		#'
53		#' @return A `string`.
54		#'
55		#' @export
56		f_pval <- function(test_mean) {
57	363x	checkmate::assert_numeric(test_mean, len = 1)
58	361x	paste0("p-value (H0: mean = ", test_mean, ")")
59		}
60
61		#' Utility function to return a named list of covariate names.
62		#'
63		#' @param covariates (`character`)\cr a vector that can contain single variable names (such as
64		#' `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
65		#'
66		#' @return A named `list` of `character` vector.
67		#'
68		#' @keywords internal
69		get_covariates <- function(covariates) {
70	14x	checkmate::assert_character(covariates)
71	12x	cov_vars <- unique(trimws(unlist(strsplit(covariates, "\\*"))))
72	12x	stats::setNames(as.list(cov_vars), cov_vars)
73		}
74
75		#' Replicate Entries of a Vector if Required
76		#'
77		#' @description `r lifecycle::badge("stable")`
78		#'
79		#' Replicate entries of a vector if required.
80		#'
81		#' @inheritParams argument_convention
82		#' @param n (`count`)\cr how many entries we need.
83		#'
84		#' @return `x` if it has the required length already or is `NULL`,
85		#' otherwise if it is scalar the replicated version of it with `n` entries.
86		#'
87		#' @note This function will fail if `x` is not of length `n` and/or is not a scalar.
88		#'
89		#' @export
90		to_n <- function(x, n) {
91	1x	if (is.null(x)) {
92	!	NULL
93	1x	} else if (length(x) == 1) {
94	!	rep(x, n)
95	1x	} else if (length(x) == n) {
96	1x	x
97		} else {
98	!	stop("dimension mismatch")
99		}
100		}
101
102		#' Check Element Dimension
103		#'
104		#' Checks if the elements in `...` have the same dimension.
105		#'
106		#' @param ... (`data.frame`s or `vector`s)\cr any data frames/vectors.
107		#' @param omit_null (`logical`)\cr whether `NULL` elements in `...` should be omitted from the check.
108		#'
109		#' @return A `logical` value.
110		#'
111		#' @keywords internal
112		check_same_n <- function(..., omit_null = TRUE) {
113	2x	dots <- list(...)
114
115	2x	n_list <- Map(
116	2x	function(x, name) {
117	5x	if (is.null(x)) {
118	!	if (omit_null) {
119	2x	NA_integer_
120		} else {
121	!	stop("arg", name, "is not supposed to be NULL")
122		}
123	5x	} else if (is.data.frame(x)) {
124	!	nrow(x)
125	5x	} else if (is.atomic(x)) {
126	5x	length(x)
127		} else {
128	!	stop("data structure for ", name, "is currently not supported")
129		}
130		},
131	2x	dots, names(dots)
132		)
133
134	2x	n <- stats::na.omit(unlist(n_list))
135
136	2x	if (length(unique(n)) > 1) {
137	!	sel <- which(n != n[1])
138	!	stop("dimension mismatch:", paste(names(n)[sel], collapse = ", "), " do not have N=", n[1])
139		}
140
141	2x	TRUE
142		}
143
144		#' Make Names Without Dots
145		#'
146		#' @param nams (`character`)\cr vector of original names.
147		#'
148		#' @return A `character` `vector` of proper names, which does not use dots in contrast to [make.names()].
149		#'
150		#' @keywords internal
151		make_names <- function(nams) {
152	6x	orig <- make.names(nams)
153	6x	gsub(".", "", x = orig, fixed = TRUE)
154		}
155
156		#' Conversion of Months to Days
157		#'
158		#' @description `r lifecycle::badge("stable")`
159		#'
160		#' Conversion of Months to Days. This is an approximative calculation because it
161		#' considers each month as having an average of 30.4375 days.
162		#'
163		#' @param x (`numeric`)\cr time in months.
164		#'
165		#' @return A `numeric` vector with the time in days.
166		#'
167		#' @examples
168		#' x <- c(13.25, 8.15, 1, 2.834)
169		#' month2day(x)
170		#'
171		#' @export
172		month2day <- function(x) {
173	1x	checkmate::assert_numeric(x)
174	1x	x * 30.4375
175		}
176
177		#' Conversion of Days to Months
178		#'
179		#' @param x (`numeric`)\cr time in days.
180		#'
181		#' @return A `numeric` vector with the time in months.
182		#'
183		#' @examples
184		#' x <- c(403, 248, 30, 86)
185		#' day2month(x)
186		#'
187		#' @export
188		day2month <- function(x) {
189	19x	checkmate::assert_numeric(x)
190	19x	x / 30.4375
191		}
192
193		#' Return an empty numeric if all elements are `NA`.
194		#'
195		#' @param x (`numeric`)\cr vector.
196		#'
197		#' @return An empty `numeric` if all elements of `x` are `NA`, otherwise `x`.
198		#'
199		#' @examples
200		#' x <- c(NA, NA, NA)
201		#' # Internal function - empty_vector_if_na
202		#' @keywords internal
203		empty_vector_if_na <- function(x) {
204	683x	if (all(is.na(x))) {
205	220x	numeric()
206		} else {
207	463x	x
208		}
209		}
210
211		#' Combine Two Vectors Element Wise
212		#'
213		#' @param x (`vector`)\cr first vector to combine.
214		#' @param y (`vector`)\cr second vector to combine.
215		#'
216		#' @return A `list` where each element combines corresponding elements of `x` and `y`.
217		#'
218		#' @examples
219		#' combine_vectors(1:3, 4:6)
220		#'
221		#' @export
222		combine_vectors <- function(x, y) {
223	67x	checkmate::assert_vector(x)
224	67x	checkmate::assert_vector(y, len = length(x))
225
226	67x	result <- lapply(as.data.frame(rbind(x, y)), `c`)
227	67x	names(result) <- NULL
228	67x	result
229		}
230
231		#' Extract Elements by Name
232		#'
233		#' This utility function extracts elements from a vector `x` by `names`.
234		#' Differences to the standard `[` function are:
235		#'
236		#' - If `x` is `NULL`, then still always `NULL` is returned (same as in base function).
237		#' - If `x` is not `NULL`, then the intersection of its names is made with `names` and those
238		#' elements are returned. That is, `names` which don't appear in `x` are not returned as `NA`s.
239		#'
240		#' @param x (named `vector`)\cr where to extract named elements from.
241		#' @param names (`character`)\cr vector of names to extract.
242		#'
243		#' @return `NULL` if `x` is `NULL`, otherwise the extracted elements from `x`.
244		#'
245		#' @keywords internal
246		extract_by_name <- function(x, names) {
247	3x	if (is.null(x)) {
248	1x	return(NULL)
249		}
250	2x	checkmate::assert_named(x)
251	2x	checkmate::assert_character(names)
252	2x	which_extract <- intersect(names(x), names)
253	2x	if (length(which_extract) > 0) {
254	1x	x[which_extract]
255		} else {
256	1x	NULL
257		}
258		}
259
260		#' Labels for Adverse Event Baskets
261		#'
262		#' @description `r lifecycle::badge("stable")`
263		#'
264		#' @param aesi (`character`)\cr with standardized `MedDRA` query name (e.g. `SMQzzNAM`) or customized query
265		#' name (e.g. `CQzzNAM`).
266		#' @param scope (`character`)\cr with scope of query (e.g. `SMQzzSC`).
267		#'
268		#' @return A `string` with the standard label for the `AE` basket.
269		#'
270		#' @examples
271		#' adae <- tern_ex_adae
272		#'
273		#' # Standardized query label includes scope.
274		#' aesi_label(adae$SMQ01NAM, scope = adae$SMQ01SC)
275		#'
276		#' # Customized query label.
277		#' aesi_label(adae$CQ01NAM)
278		#'
279		#' @export
280		aesi_label <- function(aesi, scope = NULL) {
281	3x	checkmate::assert_character(aesi)
282	3x	checkmate::assert_character(scope, null.ok = TRUE)
283	3x	aesi_label <- obj_label(aesi)
284	3x	aesi <- sas_na(aesi)
285	3x	aesi <- unique(aesi)[!is.na(unique(aesi))]
286
287	3x	lbl <- if (length(aesi) == 1 && !is.null(scope)) {
288	1x	scope <- sas_na(scope)
289	1x	scope <- unique(scope)[!is.na(unique(scope))]
290	1x	checkmate::assert_string(scope)
291	1x	paste0(aesi, " (", scope, ")")
292	3x	} else if (length(aesi) == 1 && is.null(scope)) {
293	1x	aesi
294		} else {
295	1x	aesi_label
296		}
297
298	3x	lbl
299		}
300
301		#' Indicate Study Arm Variable in Formula
302		#'
303		#' We use `study_arm` to indicate the study arm variable in `tern` formulas.
304		#'
305		#' @param x arm information
306		#'
307		#' @return `x`
308		#'
309		#' @keywords internal
310		study_arm <- function(x) {
311	!	structure(x, varname = deparse(substitute(x)))
312		}
313
314		#' Smooth Function with Optional Grouping
315		#'
316		#' @description `r lifecycle::badge("stable")`
317		#'
318		#' This produces `loess` smoothed estimates of `y` with Student confidence intervals.
319		#'
320		#' @param df (`data.frame`)\cr data set containing all analysis variables.
321		#' @param x (`character`)\cr value with x column name.
322		#' @param y (`character`)\cr value with y column name.
323		#' @param groups (`character`)\cr vector with optional grouping variables names.
324		#' @param level (`numeric`)\cr level of confidence interval to use (0.95 by default).
325		#'
326		#' @return A `data.frame` with original `x`, smoothed `y`, `ylow`, and `yhigh`, and
327		#' optional `groups` variables formatted as `factor` type.
328		#'
329		#' @export
330		get_smooths <- function(df, x, y, groups = NULL, level = 0.95) {
331	5x	checkmate::assert_data_frame(df)
332	5x	df_cols <- colnames(df)
333	5x	checkmate::assert_string(x)
334	5x	checkmate::assert_subset(x, df_cols)
335	5x	checkmate::assert_numeric(df[[x]])
336	5x	checkmate::assert_string(y)
337	5x	checkmate::assert_subset(y, df_cols)
338	5x	checkmate::assert_numeric(df[[y]])
339
340	5x	if (!is.null(groups)) {
341	4x	checkmate::assert_character(groups)
342	4x	checkmate::assert_subset(groups, df_cols)
343		}
344
345	5x	smooths <- function(x, y) {
346	18x	stats::predict(stats::loess(y ~ x), se = TRUE)
347		}
348
349	5x	if (!is.null(groups)) {
350	4x	cc <- stats::complete.cases(df[c(x, y, groups)])
351	4x	df_c <- df[cc, c(x, y, groups)]
352	4x	df_c_ordered <- df_c[do.call("order", as.list(df_c[, groups, drop = FALSE])), , drop = FALSE]
353	4x	df_c_g <- data.frame(Map(as.factor, df_c_ordered[groups]))
354
355	4x	df_smooth_raw <-
356	4x	by(df_c_ordered, df_c_g, function(d) {
357	17x	plx <- smooths(d[[x]], d[[y]])
358	17x	data.frame(
359	17x	x = d[[x]],
360	17x	y = plx$fit,
361	17x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
362	17x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
363		)
364		})
365
366	4x	df_smooth <- do.call(rbind, df_smooth_raw)
367	4x	df_smooth[groups] <- df_c_g
368
369	4x	df_smooth
370		} else {
371	1x	cc <- stats::complete.cases(df[c(x, y)])
372	1x	df_c <- df[cc, ]
373	1x	plx <- smooths(df_c[[x]], df_c[[y]])
374
375	1x	df_smooth <- data.frame(
376	1x	x = df_c[[x]],
377	1x	y = plx$fit,
378	1x	ylow = plx$fit - stats::qt(level, plx$df) * plx$se,
379	1x	yhigh = plx$fit + stats::qt(level, plx$df) * plx$se
380		)
381
382	1x	df_smooth
383		}
384		}
385
386		#' Number of Available (Non-Missing Entries) in a Vector
387		#'
388		#' Small utility function for better readability.
389		#'
390		#' @param x (`any`)\cr vector in which to count non-missing values.
391		#'
392		#' @return Number of non-missing values.
393		#'
394		#' @keywords internal
395		n_available <- function(x) {
396	258x	sum(!is.na(x))
397		}
398
399		#' Reapply Variable Labels
400		#'
401		#' This is a helper function that is used in tests.
402		#'
403		#' @param x (`vector`)\cr vector of elements that needs new labels.
404		#' @param varlabels (`character`)\cr vector of labels for `x`.
405		#' @param ... further parameters to be added to the list.
406		#'
407		#' @return `x` with variable labels reapplied.
408		#'
409		#' @export
410		reapply_varlabels <- function(x, varlabels, ...) {
411	10x	named_labels <- c(as.list(varlabels), list(...))
412	10x	formatters::var_labels(x)[names(named_labels)] <- as.character(named_labels)
413	10x	x
414		}
415
416		# Wrapper function of survival::clogit so that when model fitting failed, a more useful message would show
417		clogit_with_tryCatch <- function(formula, data, ...) { # nolint
418	30x	tryCatch(
419	30x	survival::clogit(formula = formula, data = data, ...),
420	30x	error = function(e) stop("model not built successfully with survival::clogit")
421		)
422		}

1		#' Tabulate Biomarker Effects on Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate the estimated effects of multiple continuous biomarker variables
6		#' on a binary response endpoint across population subgroups.
7		#'
8		#' @inheritParams argument_convention
9		#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
10		#' [extract_rsp_biomarkers()].
11		#' @param vars (`character`)\cr the names of statistics to be reported among:
12		#' * `n_tot`: Total number of patients per group.
13		#' * `n_rsp`: Total number of responses per group.
14		#' * `prop`: Total response proportion per group.
15		#' * `or`: Odds ratio.
16		#' * `ci`: Confidence interval of odds ratio.
17		#' * `pval`: p-value of the effect.
18		#' Note, the statistics `n_tot`, `or` and `ci` are required.
19		#'
20		#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
21		#'
22		#' @details These functions create a layout starting from a data frame which contains
23		#' the required statistics. The tables are then typically used as input for forest plots.
24		#'
25		#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
26		#' not start from an input layout `lyt`. This is because internally the table is
27		#' created by combining multiple subtables.
28		#'
29		#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
30		#'
31		#' @examples
32		#' library(dplyr)
33		#' library(forcats)
34		#'
35		#' adrs <- tern_ex_adrs
36		#' adrs_labels <- formatters::var_labels(adrs)
37		#'
38		#' adrs_f <- adrs %>%
39		#' filter(PARAMCD == "BESRSPI") %>%
40		#' mutate(rsp = AVALC == "CR")
41		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
42		#'
43		#' df <- extract_rsp_biomarkers(
44		#' variables = list(
45		#' rsp = "rsp",
46		#' biomarkers = c("BMRKR1", "AGE"),
47		#' covariates = "SEX",
48		#' subgroups = "BMRKR2"
49		#' ),
50		#' data = adrs_f
51		#' )
52		#'
53		#' \donttest{
54		#' ## Table with default columns.
55		#' tabulate_rsp_biomarkers(df)
56		#'
57		#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
58		#' tab <- tabulate_rsp_biomarkers(
59		#' df = df,
60		#' vars = c("n_rsp", "ci", "n_tot", "prop", "or")
61		#' )
62		#'
63		#' ## Finally produce the forest plot.
64		#' g_forest(tab, xlim = c(0.7, 1.4))
65		#' }
66		#'
67		#' @export
68		#' @name response_biomarkers_subgroups
69		tabulate_rsp_biomarkers <- function(df,
70		vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
71		na_str = default_na_str(),
72		.indent_mods = 0L) {
73	4x	checkmate::assert_data_frame(df)
74	4x	checkmate::assert_character(df$biomarker)
75	4x	checkmate::assert_character(df$biomarker_label)
76	4x	checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers"))
77
78	4x	df_subs <- split(df, f = df$biomarker)
79	4x	tabs <- lapply(df_subs, FUN = function(df_sub) {
80	7x	tab_sub <- h_tab_rsp_one_biomarker(
81	7x	df = df_sub,
82	7x	vars = vars,
83	7x	na_str = na_str,
84	7x	.indent_mods = .indent_mods
85		)
86		# Insert label row as first row in table.
87	7x	label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
88	7x	tab_sub
89		})
90	4x	result <- do.call(rbind, tabs)
91
92	4x	n_id <- grep("n_tot", vars)
93	4x	or_id <- match("or", vars)
94	4x	ci_id <- match("ci", vars)
95	4x	structure(
96	4x	result,
97	4x	forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
98	4x	col_x = or_id,
99	4x	col_ci = ci_id,
100	4x	col_symbol_size = n_id
101		)
102		}
103
104		#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
105		#'
106		#' @description `r lifecycle::badge("stable")`
107		#'
108		#' Prepares estimates for number of responses, patients and overall response rate,
109		#' as well as odds ratio estimates, confidence intervals and p-values,
110		#' for multiple biomarkers across population subgroups in a single data frame.
111		#' `variables` corresponds to the names of variables found in `data`, passed as a
112		#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
113		#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
114		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
115		#'
116		#' @inheritParams argument_convention
117		#' @inheritParams response_subgroups
118		#' @param control (named `list`)\cr controls for the response definition and the
119		#' confidence level produced by [control_logistic()].
120		#'
121		#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
122		#' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
123		#' `var_label`, and `row_type`.
124		#'
125		#' @note You can also specify a continuous variable in `rsp` and then use the
126		#' `response_definition` control to convert that internally to a logical
127		#' variable reflecting binary response.
128		#'
129		#' @seealso [h_logistic_mult_cont_df()] which is used internally.
130		#'
131		#' @examples
132		#' library(dplyr)
133		#' library(forcats)
134		#'
135		#' adrs <- tern_ex_adrs
136		#' adrs_labels <- formatters::var_labels(adrs)
137		#'
138		#' adrs_f <- adrs %>%
139		#' filter(PARAMCD == "BESRSPI") %>%
140		#' mutate(rsp = AVALC == "CR")
141		#'
142		#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
143		#' # in logistic regression models with one covariate `RACE`. The subgroups
144		#' # are defined by the levels of `BMRKR2`.
145		#' df <- extract_rsp_biomarkers(
146		#' variables = list(
147		#' rsp = "rsp",
148		#' biomarkers = c("BMRKR1", "AGE"),
149		#' covariates = "SEX",
150		#' subgroups = "BMRKR2"
151		#' ),
152		#' data = adrs_f
153		#' )
154		#' df
155		#'
156		#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
157		#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
158		#' # which is then binarized internally (response is defined as this variable
159		#' # being larger than 500).
160		#' df_grouped <- extract_rsp_biomarkers(
161		#' variables = list(
162		#' rsp = "EOSDY",
163		#' biomarkers = c("BMRKR1", "AGE"),
164		#' covariates = "SEX",
165		#' subgroups = "BMRKR2",
166		#' strat = "STRATA1"
167		#' ),
168		#' data = adrs_f,
169		#' groups_lists = list(
170		#' BMRKR2 = list(
171		#' "low" = "LOW",
172		#' "low/medium" = c("LOW", "MEDIUM"),
173		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
174		#' )
175		#' ),
176		#' control = control_logistic(
177		#' response_definition = "I(response > 500)"
178		#' )
179		#' )
180		#' df_grouped
181		#'
182		#' @export
183		extract_rsp_biomarkers <- function(variables,
184		data,
185		groups_lists = list(),
186		control = control_logistic(),
187		label_all = "All Patients") {
188	5x	assert_list_of_variables(variables)
189	5x	checkmate::assert_string(variables$rsp)
190	5x	checkmate::assert_character(variables$subgroups, null.ok = TRUE)
191	5x	checkmate::assert_string(label_all)
192
193		# Start with all patients.
194	5x	result_all <- h_logistic_mult_cont_df(
195	5x	variables = variables,
196	5x	data = data,
197	5x	control = control
198		)
199	5x	result_all$subgroup <- label_all
200	5x	result_all$var <- "ALL"
201	5x	result_all$var_label <- label_all
202	5x	result_all$row_type <- "content"
203	5x	if (is.null(variables$subgroups)) {
204		# Only return result for all patients.
205	1x	result_all
206		} else {
207		# Add subgroups results.
208	4x	l_data <- h_split_by_subgroups(
209	4x	data,
210	4x	variables$subgroups,
211	4x	groups_lists = groups_lists
212		)
213	4x	l_result <- lapply(l_data, function(grp) {
214	20x	result <- h_logistic_mult_cont_df(
215	20x	variables = variables,
216	20x	data = grp$df,
217	20x	control = control
218		)
219	20x	result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
220	20x	cbind(result, result_labels)
221		})
222	4x	result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
223	4x	result_subgroups$row_type <- "analysis"
224	4x	rbind(
225	4x	result_all,
226	4x	result_subgroups
227		)
228		}
229		}

1		#' Univariate Formula Special Term
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' The special term `univariate` indicate that the model should be fitted individually for
6		#' every variable included in univariate.
7		#'
8		#' @param x A vector of variable name separated by commas.
9		#'
10		#' @return When used within a model formula, produces univariate models for each variable provided.
11		#'
12		#' @details
13		#' If provided alongside with pairwise specification, the model
14		#' `y ~ ARM + univariate(SEX, AGE, RACE)` lead to the study and comparison of the models
15		#' + `y ~ ARM`
16		#' + `y ~ ARM + SEX`
17		#' + `y ~ ARM + AGE`
18		#' + `y ~ ARM + RACE`
19		#'
20		#' @export
21		univariate <- function(x) {
22	1x	structure(x, varname = deparse(substitute(x)))
23		}
24
25		# Get the right-hand-term of a formula
26		rht <- function(x) {
27	4x	checkmate::assert_formula(x)
28	4x	y <- as.character(rev(x)[[1]])
29	4x	return(y)
30		}
31
32		#' Hazard Ratio Estimation in Interactions
33		#'
34		#' This function estimates the hazard ratios between arms when an interaction variable is given with
35		#' specific values.
36		#'
37		#' @param variable,given Names of two variable in interaction. We seek the estimation of the levels of `variable`
38		#' given the levels of `given`.
39		#' @param lvl_var,lvl_given corresponding levels has given by `levels`.
40		#' @param mmat A name numeric filled with 0 used as template to obtain the design matrix.
41		#' @param coef Numeric of estimated coefficients.
42		#' @param vcov Variance-covariance matrix of underlying model.
43		#' @param conf_level Single numeric for the confidence level of estimate intervals.
44		#'
45		#' @details Given the cox regression investigating the effect of Arm (A, B, C; reference A)
46		#' and Sex (F, M; reference Female). The model is abbreviated: y ~ Arm + Sex + Arm x Sex.
47		#' The cox regression estimates the coefficients along with a variance-covariance matrix for:
48		#'
49		#' - b1 (arm b), b2 (arm c)
50		#' - b3 (sex m)
51		#' - b4 (arm b: sex m), b5 (arm c: sex m)
52		#'
53		#' Given that I want an estimation of the Hazard Ratio for arm C/sex M, the estimation
54		#' will be given in reference to arm A/Sex M by exp(b2 + b3 + b5)/ exp(b3) = exp(b2 + b5),
55		#' therefore the interaction coefficient is given by b2 + b5 while the standard error is obtained
56		#' as $1.96 * sqrt(Var b2 + Var b5 + 2 * covariance (b2,b5))$ for a confidence level of 0.95.
57		#'
58		#' @return A list of matrix (one per level of variable) with rows corresponding to the combinations of
59		#' `variable` and `given`, with columns:
60		#' * `coef_hat`: Estimation of the coefficient.
61		#' * `coef_se`: Standard error of the estimation.
62		#' * `hr`: Hazard ratio.
63		#' * `lcl, ucl`: Lower/upper confidence limit of the hazard ratio.
64		#'
65		#' @seealso [s_cox_multivariate()].
66		#'
67		#' @examples
68		#' library(dplyr)
69		#' library(survival)
70		#'
71		#' ADSL <- tern_ex_adsl %>%
72		#' filter(SEX %in% c("F", "M"))
73		#'
74		#' adtte <- tern_ex_adtte %>% filter(PARAMCD == "PFS")
75		#' adtte$ARMCD <- droplevels(adtte$ARMCD)
76		#' adtte$SEX <- droplevels(adtte$SEX)
77		#'
78		#' mod <- coxph(
79		#' formula = Surv(time = AVAL, event = 1 - CNSR) ~ (SEX + ARMCD)^2,
80		#' data = adtte
81		#' )
82		#'
83		#' mmat <- stats::model.matrix(mod)[1, ]
84		#' mmat[!mmat == 0] <- 0
85		#'
86		#' @keywords internal
87		estimate_coef <- function(variable, given,
88		lvl_var, lvl_given,
89		coef,
90		mmat,
91		vcov,
92		conf_level = 0.95) {
93	8x	var_lvl <- paste0(variable, lvl_var[-1]) # [-1]: reference level
94	8x	giv_lvl <- paste0(given, lvl_given)
95
96	8x	design_mat <- expand.grid(variable = var_lvl, given = giv_lvl)
97	8x	design_mat <- design_mat[order(design_mat$variable, design_mat$given), ]
98	8x	design_mat <- within(
99	8x	data = design_mat,
100	8x	expr = {
101	8x	inter <- paste0(variable, ":", given)
102	8x	rev_inter <- paste0(given, ":", variable)
103		}
104		)
105
106	8x	split_by_variable <- design_mat$variable
107	8x	interaction_names <- paste(design_mat$variable, design_mat$given, sep = "/")
108
109	8x	design_mat <- apply(
110	8x	X = design_mat, MARGIN = 1, FUN = function(x) {
111	27x	mmat[names(mmat) %in% x[-which(names(x) == "given")]] <- 1
112	27x	return(mmat)
113		}
114		)
115	8x	colnames(design_mat) <- interaction_names
116
117	8x	betas <- as.matrix(coef)
118
119	8x	coef_hat <- t(design_mat) %*% betas
120	8x	dimnames(coef_hat)[2] <- "coef"
121
122	8x	coef_se <- apply(design_mat, 2, function(x) {
123	27x	vcov_el <- as.logical(x)
124	27x	y <- vcov[vcov_el, vcov_el]
125	27x	y <- sum(y)
126	27x	y <- sqrt(y)
127	27x	return(y)
128		})
129
130	8x	q_norm <- stats::qnorm((1 + conf_level) / 2)
131	8x	y <- cbind(coef_hat, `se(coef)` = coef_se)
132
133	8x	y <- apply(y, 1, function(x) {
134	27x	x["hr"] <- exp(x["coef"])
135	27x	x["lcl"] <- exp(x["coef"] - q_norm * x["se(coef)"])
136	27x	x["ucl"] <- exp(x["coef"] + q_norm * x["se(coef)"])
137
138	27x	return(x)
139		})
140
141	8x	y <- t(y)
142	8x	y <- by(y, split_by_variable, identity)
143	8x	y <- lapply(y, as.matrix)
144
145	8x	attr(y, "details") <- paste0(
146	8x	"Estimations of ", variable,
147	8x	" hazard ratio given the level of ", given, " compared to ",
148	8x	variable, " level ", lvl_var[1], "."
149		)
150	8x	return(y)
151		}
152
153		#' `tryCatch` around `car::Anova`
154		#'
155		#' Captures warnings when executing [car::Anova].
156		#'
157		#' @inheritParams car::Anova
158		#'
159		#' @return A list with item `aov` for the result of the model and `error_text` for the captured warnings.
160		#'
161		#' @examples
162		#' # `car::Anova` on cox regression model including strata and expected
163		#' # a likelihood ratio test triggers a warning as only `Wald` method is
164		#' # accepted.
165		#'
166		#' library(survival)
167		#'
168		#' mod <- coxph(
169		#' formula = Surv(time = futime, event = fustat) ~ factor(rx) + strata(ecog.ps),
170		#' data = ovarian
171		#' )
172		#'
173		#' @keywords internal
174		try_car_anova <- function(mod,
175		test.statistic) { # nolint
176	2x	y <- tryCatch(
177	2x	withCallingHandlers(
178	2x	expr = {
179	2x	warn_text <- c()
180	2x	list(
181	2x	aov = car::Anova(
182	2x	mod,
183	2x	test.statistic = test.statistic,
184	2x	type = "III"
185		),
186	2x	warn_text = warn_text
187		)
188		},
189	2x	warning = function(w) {
190		# If a warning is detected it is handled as "w".
191	!	warn_text <<- trimws(paste0("Warning in `try_car_anova`: ", w))
192
193		# A warning is sometimes expected, then, we want to restart
194		# the execution while ignoring the warning.
195	!	invokeRestart("muffleWarning")
196		}
197		),
198	2x	finally = {
199		}
200		)
201
202	2x	return(y)
203		}
204
205		#' Fit the Cox Regression Model and `Anova`
206		#'
207		#' The functions allows to derive from the [survival::coxph()] results the effect p.values using [car::Anova()].
208		#' This last package introduces more flexibility to get the effect p.values.
209		#'
210		#' @inheritParams t_coxreg
211		#'
212		#' @return A list with items `mod` (results of [survival::coxph()]), `msum` (result of `summary`) and
213		#' `aov` (result of [car::Anova()]).
214		#'
215		#' @noRd
216		fit_n_aov <- function(formula,
217		data = data,
218		conf_level = conf_level,
219		pval_method = c("wald", "likelihood"),
220		...) {
221	1x	pval_method <- match.arg(pval_method)
222
223	1x	environment(formula) <- environment()
224	1x	suppressWarnings({
225		# We expect some warnings due to coxph which fails strict programming.
226	1x	mod <- survival::coxph(formula, data = data, ...)
227	1x	msum <- summary(mod, conf.int = conf_level)
228		})
229
230	1x	aov <- try_car_anova(
231	1x	mod,
232	1x	test.statistic = switch(pval_method,
233	1x	"wald" = "Wald",
234	1x	"likelihood" = "LR"
235		)
236		)
237
238	1x	warn_attr <- aov$warn_text
239	!	if (!is.null(aov$warn_text)) message(warn_attr)
240
241	1x	aov <- aov$aov
242	1x	y <- list(mod = mod, msum = msum, aov = aov)
243	1x	attr(y, "message") <- warn_attr
244
245	1x	return(y)
246		}
247
248		# argument_checks
249		check_formula <- function(formula) {
250	1x	if (!(inherits(formula, "formula"))) {
251	1x	stop("Check `formula`. A formula should resemble `Surv(time = AVAL, event = 1 - CNSR) ~ study_arm(ARMCD)`.")
252		}
253
254	!	invisible()
255		}
256
257		check_covariate_formulas <- function(covariates) {
258	1x	if (!all(vapply(X = covariates, FUN = inherits, what = "formula", FUN.VALUE = TRUE)) \|\| is.null(covariates)) {
259	1x	stop("Check `covariates`, it should be a list of right-hand-term formulas, e.g. list(Age = ~AGE).")
260		}
261
262	!	invisible()
263		}
264
265		name_covariate_names <- function(covariates) {
266	1x	miss_names <- names(covariates) == ""
267	1x	no_names <- is.null(names(covariates))
268	!	if (any(miss_names)) names(covariates)[miss_names] <- vapply(covariates[miss_names], FUN = rht, FUN.VALUE = "name")
269	!	if (no_names) names(covariates) <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
270	1x	return(covariates)
271		}
272
273		check_increments <- function(increments, covariates) {
274	1x	if (!is.null(increments)) {
275	1x	covariates <- vapply(covariates, FUN = rht, FUN.VALUE = "name")
276	1x	lapply(
277	1x	X = names(increments), FUN = function(x) {
278	3x	if (!x %in% covariates) {
279	1x	warning(
280	1x	paste(
281	1x	"Check `increments`, the `increment` for ", x,
282	1x	"doesn't match any names in investigated covariate(s)."
283		)
284		)
285		}
286		}
287		)
288		}
289
290	1x	invisible()
291		}
292
293		#' Multivariate Cox Model - Summarized Results
294		#'
295		#' Analyses based on multivariate Cox model are usually not performed for the Controlled Substance Reporting or
296		#' regulatory documents but serve exploratory purposes only (e.g., for publication). In practice, the model usually
297		#' includes only the main effects (without interaction terms). It produces the hazard ratio estimates for each of the
298		#' covariates included in the model.
299		#' The analysis follows the same principles (e.g., stratified vs. unstratified analysis and tie handling) as the
300		#' usual Cox model analysis. Since there is usually no pre-specified hypothesis testing for such analysis,
301		#' the p.values need to be interpreted with caution. (Statistical Analysis of Clinical Trials Data with R,
302		#' `NEST's bookdown`)
303		#'
304		#' @param formula (`formula`)\cr A formula corresponding to the investigated [survival::Surv()] survival model
305		#' including covariates.
306		#' @param data (`data.frame`)\cr A data frame which includes the variable in formula and covariates.
307		#' @param conf_level (`proportion`)\cr The confidence level for the hazard ratio interval estimations. Default is 0.95.
308		#' @param pval_method (`character`)\cr The method used for the estimation of p-values, should be one of
309		#' `"wald"` (default) or `"likelihood"`.
310		#' @param ... Optional parameters passed to [survival::coxph()]. Can include `ties`, a character string specifying the
311		#' method for tie handling, one of `exact` (default), `efron`, `breslow`.
312		#'
313		#' @return A `list` with elements `mod`, `msum`, `aov`, and `coef_inter`.
314		#'
315		#' @details The output is limited to single effect terms. Work in ongoing for estimation of interaction terms
316		#' but is out of scope as defined by the Global Data Standards Repository
317		#' (`GDS_Standard_TLG_Specs_Tables_2.doc`).
318		#'
319		#' @seealso [estimate_coef()].
320		#'
321		#' @examples
322		#' library(dplyr)
323		#'
324		#' adtte <- tern_ex_adtte
325		#' adtte_f <- subset(adtte, PARAMCD == "OS") # _f: filtered
326		#' adtte_f <- filter(
327		#' adtte_f,
328		#' PARAMCD == "OS" &
329		#' SEX %in% c("F", "M") &
330		#' RACE %in% c("ASIAN", "BLACK OR AFRICAN AMERICAN", "WHITE")
331		#' )
332		#' adtte_f$SEX <- droplevels(adtte_f$SEX)
333		#' adtte_f$RACE <- droplevels(adtte_f$RACE)
334		#'
335		#' @keywords internal
336		s_cox_multivariate <- function(formula, data,
337		conf_level = 0.95,
338		pval_method = c("wald", "likelihood"),
339		...) {
340	1x	tf <- stats::terms(formula, specials = c("strata"))
341	1x	covariates <- rownames(attr(tf, "factors"))[-c(1, unlist(attr(tf, "specials")))]
342	1x	lapply(
343	1x	X = covariates,
344	1x	FUN = function(x) {
345	3x	if (is.character(data[[x]])) {
346	1x	data[[x]] <<- as.factor(data[[x]])
347		}
348	3x	invisible()
349		}
350		)
351	1x	pval_method <- match.arg(pval_method)
352
353		# Results directly exported from environment(fit_n_aov) to environment(s_function_draft)
354	1x	y <- fit_n_aov(
355	1x	formula = formula,
356	1x	data = data,
357	1x	conf_level = conf_level,
358	1x	pval_method = pval_method,
359		...
360		)
361	1x	mod <- y$mod
362	1x	aov <- y$aov
363	1x	msum <- y$msum
364	1x	list2env(as.list(y), environment())
365
366	1x	all_term_labs <- attr(mod$terms, "term.labels")
367	1x	term_labs <- all_term_labs[which(attr(mod$terms, "order") == 1)]
368	1x	names(term_labs) <- term_labs
369
370	1x	coef_inter <- NULL
371	1x	if (any(attr(mod$terms, "order") > 1)) {
372	1x	for_inter <- all_term_labs[attr(mod$terms, "order") > 1]
373	1x	names(for_inter) <- for_inter
374	1x	mmat <- stats::model.matrix(mod)[1, ]
375	1x	mmat[!mmat == 0] <- 0
376	1x	mcoef <- stats::coef(mod)
377	1x	mvcov <- stats::vcov(mod)
378
379	1x	estimate_coef_local <- function(variable, given) {
380	6x	estimate_coef(
381	6x	variable, given,
382	6x	coef = mcoef, mmat = mmat, vcov = mvcov, conf_level = conf_level,
383	6x	lvl_var = levels(data[[variable]]), lvl_given = levels(data[[given]])
384		)
385		}
386
387	1x	coef_inter <- lapply(
388	1x	for_inter, function(x) {
389	3x	y <- attr(mod$terms, "factor")[, x]
390	3x	y <- names(y[y > 0])
391	3x	Map(estimate_coef_local, variable = y, given = rev(y))
392		}
393		)
394		}
395
396	1x	list(mod = mod, msum = msum, aov = aov, coef_inter = coef_inter)
397		}

1		#' Tabulate Survival Duration by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate statistics such as median survival time and hazard ratio for population subgroups.
6		#'
7		#' @inheritParams argument_convention
8		#' @inheritParams survival_coxph_pairwise
9		#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
10		#' created using [extract_survival_subgroups()].
11		#' @param vars (`character`)\cr the name of statistics to be reported among:
12		#' * `n_tot_events`: Total number of events per group.
13		#' * `n_events`: Number of events per group.
14		#' * `n_tot`: Total number of observations per group.
15		#' * `n`: Number of observations per group.
16		#' * `median`: Median survival time.
17		#' * `hr`: Hazard ratio.
18		#' * `ci`: Confidence interval of hazard ratio.
19		#' * `pval`: p-value of the effect.
20		#' Note, one of the statistics `n_tot` and `n_tot_events`, as well as both `hr` and `ci`
21		#' are required.
22		#' @param time_unit (`string`)\cr label with unit of median survival time. Default `NULL` skips displaying unit.
23		#'
24		#' @details These functions create a layout starting from a data frame which contains
25		#' the required statistics. Tables typically used as part of forest plot.
26		#'
27		#' @seealso [extract_survival_subgroups()]
28		#'
29		#' @examples
30		#' library(dplyr)
31		#' library(forcats)
32		#'
33		#' adtte <- tern_ex_adtte
34		#'
35		#' # Save variable labels before data processing steps.
36		#' adtte_labels <- formatters::var_labels(adtte)
37		#'
38		#' adtte_f <- adtte %>%
39		#' filter(
40		#' PARAMCD == "OS",
41		#' ARM %in% c("B: Placebo", "A: Drug X"),
42		#' SEX %in% c("M", "F")
43		#' ) %>%
44		#' mutate(
45		#' # Reorder levels of ARM to display reference arm before treatment arm.
46		#' ARM = droplevels(fct_relevel(ARM, "B: Placebo")),
47		#' SEX = droplevels(SEX),
48		#' AVALU = as.character(AVALU),
49		#' is_event = CNSR == 0
50		#' )
51		#' labels <- c(
52		#' "ARM" = adtte_labels[["ARM"]],
53		#' "SEX" = adtte_labels[["SEX"]],
54		#' "AVALU" = adtte_labels[["AVALU"]],
55		#' "is_event" = "Event Flag"
56		#' )
57		#' formatters::var_labels(adtte_f)[names(labels)] <- labels
58		#'
59		#' df <- extract_survival_subgroups(
60		#' variables = list(
61		#' tte = "AVAL",
62		#' is_event = "is_event",
63		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
64		#' ),
65		#' data = adtte_f
66		#' )
67		#' df
68		#'
69		#' df_grouped <- extract_survival_subgroups(
70		#' variables = list(
71		#' tte = "AVAL",
72		#' is_event = "is_event",
73		#' arm = "ARM", subgroups = c("SEX", "BMRKR2")
74		#' ),
75		#' data = adtte_f,
76		#' groups_lists = list(
77		#' BMRKR2 = list(
78		#' "low" = "LOW",
79		#' "low/medium" = c("LOW", "MEDIUM"),
80		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
81		#' )
82		#' )
83		#' )
84		#' df_grouped
85		#'
86		#' @name survival_duration_subgroups
87		#' @order 1
88		NULL
89
90		#' Prepares Survival Data for Population Subgroups in Data Frames
91		#'
92		#' @description `r lifecycle::badge("stable")`
93		#'
94		#' Prepares estimates of median survival times and treatment hazard ratios for population subgroups in
95		#' data frames. Simple wrapper for [h_survtime_subgroups_df()] and [h_coxph_subgroups_df()]. Result is a `list`
96		#' of two `data.frame`s: `survtime` and `hr`. `variables` corresponds to the names of variables found in `data`,
97		#' passed as a named `list` and requires elements `tte`, `is_event`, `arm` and optionally `subgroups` and `strat`.
98		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
99		#'
100		#' @inheritParams argument_convention
101		#' @inheritParams survival_duration_subgroups
102		#' @inheritParams survival_coxph_pairwise
103		#'
104		#' @return A named `list` of two elements:
105		#' * `survtime`: A `data.frame` containing columns `arm`, `n`, `n_events`, `median`, `subgroup`, `var`,
106		#' `var_label`, and `row_type`.
107		#' * `hr`: A `data.frame` containing columns `arm`, `n_tot`, `n_tot_events`, `hr`, `lcl`, `ucl`, `conf_level`,
108		#' `pval`, `pval_label`, `subgroup`, `var`, `var_label`, and `row_type`.
109		#'
110		#' @seealso [survival_duration_subgroups]
111		#'
112		#' @export
113		extract_survival_subgroups <- function(variables,
114		data,
115		groups_lists = list(),
116		control = control_coxph(),
117		label_all = "All Patients") {
118	9x	df_survtime <- h_survtime_subgroups_df(
119	9x	variables,
120	9x	data,
121	9x	groups_lists = groups_lists,
122	9x	label_all = label_all
123		)
124	9x	df_hr <- h_coxph_subgroups_df(
125	9x	variables,
126	9x	data,
127	9x	groups_lists = groups_lists,
128	9x	control = control,
129	9x	label_all = label_all
130		)
131
132	9x	list(survtime = df_survtime, hr = df_hr)
133		}
134
135		#' @describeIn survival_duration_subgroups Formatted analysis function which is used as
136		#' `afun` in `tabulate_survival_subgroups()`.
137		#'
138		#' @return
139		#' * `a_survival_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
140		#'
141		#' @keywords internal
142		a_survival_subgroups <- function(.formats = list( # nolint start
143		n = "xx",
144		n_events = "xx",
145		n_tot_events = "xx",
146		median = "xx.x",
147		n_tot = "xx",
148		hr = list(format_extreme_values(2L)),
149		ci = list(format_extreme_values_ci(2L)),
150		pval = "x.xxxx \| (<0.0001)"
151		),
152		na_str = default_na_str()) { # nolint end
153	15x	checkmate::assert_list(.formats)
154	15x	checkmate::assert_subset(
155	15x	names(.formats),
156	15x	c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
157		)
158
159	15x	afun_lst <- Map(
160	15x	function(stat, fmt, na_str) {
161	114x	if (stat == "ci") {
162	14x	function(df, labelstr = "", ...) {
163	29x	in_rows(
164	29x	.list = combine_vectors(df$lcl, df$ucl),
165	29x	.labels = as.character(df$subgroup),
166	29x	.formats = fmt,
167	29x	.format_na_strs = na_str
168		)
169		}
170		} else {
171	100x	function(df, labelstr = "", ...) {
172	159x	in_rows(
173	159x	.list = as.list(df[[stat]]),
174	159x	.labels = as.character(df$subgroup),
175	159x	.formats = fmt,
176	159x	.format_na_strs = na_str
177		)
178		}
179		}
180		},
181	15x	stat = names(.formats),
182	15x	fmt = .formats,
183	15x	na_str = na_str
184		)
185
186	15x	afun_lst
187		}
188
189		#' @describeIn survival_duration_subgroups Table-creating function which creates a table
190		#' summarizing survival by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
191		#' and [rtables::summarize_row_groups()].
192		#'
193		#' @return An `rtables` table summarizing survival by subgroup.
194		#'
195		#' @examples
196		#' ## Table with default columns.
197		#' basic_table() %>%
198		#' tabulate_survival_subgroups(df, time_unit = adtte_f$AVALU[1])
199		#'
200		#' ## Table with a manually chosen set of columns: adding "pval".
201		#' basic_table() %>%
202		#' tabulate_survival_subgroups(
203		#' df = df,
204		#' vars = c("n_tot_events", "n_events", "median", "hr", "ci", "pval"),
205		#' time_unit = adtte_f$AVALU[1]
206		#' )
207		#'
208		#' @export
209		#' @order 2
210		tabulate_survival_subgroups <- function(lyt,
211		df,
212		vars = c("n_tot_events", "n_events", "median", "hr", "ci"),
213		groups_lists = list(),
214		label_all = "All Patients",
215		time_unit = NULL,
216		na_str = default_na_str()) {
217	6x	conf_level <- df$hr$conf_level[1]
218	6x	method <- df$hr$pval_label[1]
219
220	6x	extra_args <- list(groups_lists = groups_lists, conf_level = conf_level, method = method, label_all = label_all)
221
222	6x	afun_lst <- a_survival_subgroups(na_str = na_str)
223	6x	colvars <- d_survival_subgroups_colvars(
224	6x	vars,
225	6x	conf_level = conf_level,
226	6x	method = method,
227	6x	time_unit = time_unit
228		)
229
230	6x	colvars_survtime <- list(
231	6x	vars = colvars$vars[names(colvars$labels) %in% c("n", "n_events", "median")],
232	6x	labels = colvars$labels[names(colvars$labels) %in% c("n", "n_events", "median")]
233		)
234	6x	colvars_hr <- list(
235	6x	vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")],
236	6x	labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "n_tot_events", "hr", "ci", "pval")]
237		)
238
239		# Columns from table_survtime are optional.
240	6x	if (length(colvars_survtime$vars) > 0) {
241	5x	lyt_survtime <- split_cols_by(lyt = lyt, var = "arm")
242	5x	lyt_survtime <- split_rows_by(
243	5x	lyt = lyt_survtime,
244	5x	var = "row_type",
245	5x	split_fun = keep_split_levels("content"),
246	5x	nested = FALSE
247		)
248	5x	lyt_survtime <- summarize_row_groups(
249	5x	lyt = lyt_survtime,
250	5x	var = "var_label",
251	5x	cfun = afun_lst[names(colvars_survtime$labels)],
252	5x	na_str = na_str,
253	5x	extra_args = extra_args
254		)
255	5x	lyt_survtime <- split_cols_by_multivar(
256	5x	lyt = lyt_survtime,
257	5x	vars = colvars_survtime$vars,
258	5x	varlabels = colvars_survtime$labels
259		)
260
261	5x	if ("analysis" %in% df$survtime$row_type) {
262	4x	lyt_survtime <- split_rows_by(
263	4x	lyt = lyt_survtime,
264	4x	var = "row_type",
265	4x	split_fun = keep_split_levels("analysis"),
266	4x	nested = FALSE,
267	4x	child_labels = "hidden"
268		)
269	4x	lyt_survtime <- split_rows_by(lyt = lyt_survtime, var = "var_label", nested = TRUE)
270	4x	lyt_survtime <- analyze_colvars(
271	4x	lyt = lyt_survtime,
272	4x	afun = afun_lst[names(colvars_survtime$labels)],
273	4x	na_str = na_str,
274	4x	inclNAs = TRUE,
275	4x	extra_args = extra_args
276		)
277		}
278
279	5x	table_survtime <- build_table(lyt_survtime, df = df$survtime)
280		} else {
281	1x	table_survtime <- NULL
282		}
283
284		# Columns "n_tot_events" or "n_tot", and "hr", "ci" in table_hr are required.
285	6x	lyt_hr <- split_cols_by(lyt = lyt, var = "arm")
286	6x	lyt_hr <- split_rows_by(
287	6x	lyt = lyt_hr,
288	6x	var = "row_type",
289	6x	split_fun = keep_split_levels("content"),
290	6x	nested = FALSE
291		)
292	6x	lyt_hr <- summarize_row_groups(
293	6x	lyt = lyt_hr,
294	6x	var = "var_label",
295	6x	cfun = afun_lst[names(colvars_hr$labels)],
296	6x	na_str = na_str,
297	6x	extra_args = extra_args
298		)
299	6x	lyt_hr <- split_cols_by_multivar(
300	6x	lyt = lyt_hr,
301	6x	vars = colvars_hr$vars,
302	6x	varlabels = colvars_hr$labels
303		) %>%
304	6x	append_topleft("Baseline Risk Factors")
305
306	6x	if ("analysis" %in% df$survtime$row_type) {
307	5x	lyt_hr <- split_rows_by(
308	5x	lyt = lyt_hr,
309	5x	var = "row_type",
310	5x	split_fun = keep_split_levels("analysis"),
311	5x	nested = FALSE,
312	5x	child_labels = "hidden"
313		)
314	5x	lyt_hr <- split_rows_by(lyt = lyt_hr, var = "var_label", nested = TRUE)
315	5x	lyt_hr <- analyze_colvars(
316	5x	lyt = lyt_hr,
317	5x	afun = afun_lst[names(colvars_hr$labels)],
318	5x	na_str = na_str,
319	5x	inclNAs = TRUE,
320	5x	extra_args = extra_args
321		)
322		}
323	6x	table_hr <- build_table(lyt_hr, df = df$hr)
324
325		# There can be one or two vars starting with "n_tot".
326	6x	n_tot_ids <- grep("^n_tot", colvars_hr$vars)
327	6x	if (is.null(table_survtime)) {
328	1x	result <- table_hr
329	1x	hr_id <- match("hr", colvars_hr$vars)
330	1x	ci_id <- match("lcl", colvars_hr$vars)
331		} else {
332		# Reorder the table.
333	5x	result <- cbind_rtables(table_hr[, n_tot_ids], table_survtime, table_hr[, -n_tot_ids])
334		# And then calculate column indices accordingly.
335	5x	hr_id <- length(n_tot_ids) + ncol(table_survtime) + match("hr", colvars_hr$vars[-n_tot_ids])
336	5x	ci_id <- length(n_tot_ids) + ncol(table_survtime) + match("lcl", colvars_hr$vars[-n_tot_ids])
337	5x	n_tot_ids <- seq_along(n_tot_ids)
338		}
339
340	6x	structure(
341	6x	result,
342	6x	forest_header = paste0(rev(levels(df$survtime$arm)), "\nBetter"),
343	6x	col_x = hr_id,
344	6x	col_ci = ci_id,
345		# Take the first one for scaling the symbol sizes in graph.
346	6x	col_symbol_size = n_tot_ids[1]
347		)
348		}
349
350		#' Labels for Column Variables in Survival Duration by Subgroup Table
351		#'
352		#' @description `r lifecycle::badge("stable")`
353		#'
354		#' Internal function to check variables included in [tabulate_survival_subgroups()] and create column labels.
355		#'
356		#' @inheritParams tabulate_survival_subgroups
357		#' @inheritParams argument_convention
358		#' @param method (`character`)\cr p-value method for testing hazard ratio = 1.
359		#'
360		#' @return A `list` of variables and their labels to tabulate.
361		#'
362		#' @note At least one of `n_tot` and `n_tot_events` must be provided in `vars`.
363		#'
364		#' @export
365		d_survival_subgroups_colvars <- function(vars,
366		conf_level,
367		method,
368		time_unit = NULL) {
369	15x	checkmate::assert_character(vars)
370	15x	checkmate::assert_string(time_unit, null.ok = TRUE)
371	15x	checkmate::assert_subset(c("hr", "ci"), vars)
372	15x	checkmate::assert_true(any(c("n_tot", "n_tot_events") %in% vars))
373	15x	checkmate::assert_subset(
374	15x	vars,
375	15x	c("n", "n_events", "median", "n_tot", "n_tot_events", "hr", "ci", "pval")
376		)
377
378	15x	propcase_time_label <- if (!is.null(time_unit)) {
379	14x	paste0("Median (", time_unit, ")")
380		} else {
381	1x	"Median"
382		}
383
384	15x	varlabels <- c(
385	15x	n = "n",
386	15x	n_events = "Events",
387	15x	median = propcase_time_label,
388	15x	n_tot = "Total n",
389	15x	n_tot_events = "Total Events",
390	15x	hr = "Hazard Ratio",
391	15x	ci = paste0(100 * conf_level, "% Wald CI"),
392	15x	pval = method
393		)
394
395	15x	colvars <- vars
396
397		# The `lcl` variable is just a placeholder available in the analysis data,
398		# it is not acutally used in the tabulation.
399		# Variables used in the tabulation are lcl and ucl, see `a_survival_subgroups` for details.
400	15x	colvars[colvars == "ci"] <- "lcl"
401
402	15x	list(
403	15x	vars = colvars,
404	15x	labels = varlabels[vars]
405		)
406		}

1		#' Summarize Variables in Columns
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This analyze function uses the S3 generic function [s_summary()] to summarize different variables
6		#' that are arranged in columns. Additional standard formatting arguments are available. It is a
7		#' minimal wrapper for [rtables::analyze_colvars()]. The latter function is meant to add different
8		#' analysis methods for each column variables as different rows. To have the analysis methods as
9		#' column labels, please refer to [analyze_vars_in_cols()].
10		#'
11		#' @inheritParams argument_convention
12		#' @param ... arguments passed to `s_summary()`.
13		#' @param .indent_mods (named `vector` of `integer`)\cr indent modifiers for the labels. Each element of the vector
14		#' should be a name-value pair with name corresponding to a statistic specified in `.stats` and value the indentation
15		#' for that statistic's row label.
16		#'
17		#' @return
18		#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
19		#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
20		#' in columns, and add it to the table layout.
21		#'
22		#' @seealso [rtables::split_cols_by_multivar()] and [`analyze_colvars_functions`].
23		#'
24		#' @examples
25		#' dta_test <- data.frame(
26		#' USUBJID = rep(1:6, each = 3),
27		#' PARAMCD = rep("lab", 6 * 3),
28		#' AVISIT = rep(paste0("V", 1:3), 6),
29		#' ARM = rep(LETTERS[1:3], rep(6, 3)),
30		#' AVAL = c(9:1, rep(NA, 9)),
31		#' CHG = c(1:9, rep(NA, 9))
32		#' )
33		#'
34		#' ## Default output within a `rtables` pipeline.
35		#' basic_table() %>%
36		#' split_cols_by("ARM") %>%
37		#' split_rows_by("AVISIT") %>%
38		#' split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
39		#' summarize_colvars() %>%
40		#' build_table(dta_test)
41		#'
42		#' ## Selection of statistics, formats and labels also work.
43		#' basic_table() %>%
44		#' split_cols_by("ARM") %>%
45		#' split_rows_by("AVISIT") %>%
46		#' split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
47		#' summarize_colvars(
48		#' .stats = c("n", "mean_sd"),
49		#' .formats = c("mean_sd" = "xx.x, xx.x"),
50		#' .labels = c(n = "n", mean_sd = "Mean, SD")
51		#' ) %>%
52		#' build_table(dta_test)
53		#'
54		#' ## Use arguments interpreted by `s_summary`.
55		#' basic_table() %>%
56		#' split_cols_by("ARM") %>%
57		#' split_rows_by("AVISIT") %>%
58		#' split_cols_by_multivar(vars = c("AVAL", "CHG")) %>%
59		#' summarize_colvars(na.rm = FALSE) %>%
60		#' build_table(dta_test)
61		#'
62		#' @export
63		summarize_colvars <- function(lyt,
64		...,
65		na_level = lifecycle::deprecated(),
66		na_str = default_na_str(),
67		.stats = c("n", "mean_sd", "median", "range", "count_fraction"),
68		.formats = NULL,
69		.labels = NULL,
70		.indent_mods = NULL) {
71	3x	if (lifecycle::is_present(na_level)) {
72	!	lifecycle::deprecate_warn("0.9.1", "summarize_colvars(na_level)", "summarize_colvars(na_str)")
73	!	na_str <- na_level
74		}
75
76	3x	extra_args <- list(.stats = .stats, na_str = na_str, ...)
77	1x	if (!is.null(.formats)) extra_args[[".formats"]] <- .formats
78	1x	if (!is.null(.labels)) extra_args[[".labels"]] <- .labels
79	1x	if (!is.null(.indent_mods)) extra_args[[".indent_mods"]] <- .indent_mods
80
81	3x	analyze_colvars(
82	3x	lyt,
83	3x	afun = a_summary,
84	3x	na_str = na_str,
85	3x	extra_args = extra_args
86		)
87		}

1		#' Tabulate Binary Response by Subgroup
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Tabulate statistics such as response rate and odds ratio for population subgroups.
6		#'
7		#' @inheritParams extract_rsp_subgroups
8		#' @inheritParams argument_convention
9		#'
10		#' @details These functions create a layout starting from a data frame which contains
11		#' the required statistics. Tables typically used as part of forest plot.
12		#'
13		#' @seealso [extract_rsp_subgroups()]
14		#'
15		#' @examples
16		#' library(dplyr)
17		#' library(forcats)
18		#'
19		#' adrs <- tern_ex_adrs
20		#' adrs_labels <- formatters::var_labels(adrs)
21		#'
22		#' adrs_f <- adrs %>%
23		#' filter(PARAMCD == "BESRSPI") %>%
24		#' filter(ARM %in% c("A: Drug X", "B: Placebo")) %>%
25		#' droplevels() %>%
26		#' mutate(
27		#' # Reorder levels of factor to make the placebo group the reference arm.
28		#' ARM = fct_relevel(ARM, "B: Placebo"),
29		#' rsp = AVALC == "CR"
30		#' )
31		#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
32		#'
33		#' # Unstratified analysis.
34		#' df <- extract_rsp_subgroups(
35		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
36		#' data = adrs_f
37		#' )
38		#' df
39		#'
40		#' # Stratified analysis.
41		#' df_strat <- extract_rsp_subgroups(
42		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2"), strat = "STRATA1"),
43		#' data = adrs_f
44		#' )
45		#' df_strat
46		#'
47		#' # Grouping of the BMRKR2 levels.
48		#' df_grouped <- extract_rsp_subgroups(
49		#' variables = list(rsp = "rsp", arm = "ARM", subgroups = c("SEX", "BMRKR2")),
50		#' data = adrs_f,
51		#' groups_lists = list(
52		#' BMRKR2 = list(
53		#' "low" = "LOW",
54		#' "low/medium" = c("LOW", "MEDIUM"),
55		#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
56		#' )
57		#' )
58		#' )
59		#' df_grouped
60		#'
61		#' @name response_subgroups
62		#' @order 1
63		NULL
64
65		#' Prepares Response Data for Population Subgroups in Data Frames
66		#'
67		#' @description `r lifecycle::badge("stable")`
68		#'
69		#' Prepares response rates and odds ratios for population subgroups in data frames. Simple wrapper
70		#' for [h_odds_ratio_subgroups_df()] and [h_proportion_subgroups_df()]. Result is a list of two
71		#' `data.frames`: `prop` and `or`. `variables` corresponds to the names of variables found in `data`,
72		#' passed as a named `list` and requires elements `rsp`, `arm` and optionally `subgroups` and `strat`.
73		#' `groups_lists` optionally specifies groupings for `subgroups` variables.
74		#'
75		#' @inheritParams argument_convention
76		#' @inheritParams response_subgroups
77		#' @param label_all (`string`)\cr label for the total population analysis.
78		#'
79		#' @return A named list of two elements:
80		#' * `prop`: A `data.frame` containing columns `arm`, `n`, `n_rsp`, `prop`, `subgroup`, `var`,
81		#' `var_label`, and `row_type`.
82		#' * `or`: A `data.frame` containing columns `arm`, `n_tot`, `or`, `lcl`, `ucl`, `conf_level`,
83		#' `subgroup`, `var`, `var_label`, and `row_type`.
84		#'
85		#' @seealso [response_subgroups]
86		#'
87		#' @export
88		extract_rsp_subgroups <- function(variables,
89		data,
90		groups_lists = list(),
91		conf_level = 0.95,
92		method = NULL,
93		label_all = "All Patients") {
94	11x	df_prop <- h_proportion_subgroups_df(
95	11x	variables,
96	11x	data,
97	11x	groups_lists = groups_lists,
98	11x	label_all = label_all
99		)
100	11x	df_or <- h_odds_ratio_subgroups_df(
101	11x	variables,
102	11x	data,
103	11x	groups_lists = groups_lists,
104	11x	conf_level = conf_level,
105	11x	method = method,
106	11x	label_all = label_all
107		)
108
109	11x	list(prop = df_prop, or = df_or)
110		}
111
112		#' @describeIn response_subgroups Formatted analysis function which is used as `afun` in `tabulate_rsp_subgroups()`.
113		#'
114		#' @return
115		#' * `a_response_subgroups()` returns the corresponding list with formatted [rtables::CellValue()].
116		#'
117		#' @keywords internal
118		a_response_subgroups <- function(.formats = list(
119		n = "xx", # nolint start
120		n_rsp = "xx",
121		prop = "xx.x%",
122		n_tot = "xx",
123		or = list(format_extreme_values(2L)),
124		ci = list(format_extreme_values_ci(2L)),
125		pval = "x.xxxx \| (<0.0001)" # nolint end
126		),
127		na_str = default_na_str()) {
128	16x	checkmate::assert_list(.formats)
129	16x	checkmate::assert_subset(
130	16x	names(.formats),
131	16x	c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
132		)
133
134	16x	afun_lst <- Map(
135	16x	function(stat, fmt, na_str) {
136	107x	if (stat == "ci") {
137	15x	function(df, labelstr = "", ...) {
138	33x	in_rows(
139	33x	.list = combine_vectors(df$lcl, df$ucl),
140	33x	.labels = as.character(df$subgroup),
141	33x	.formats = fmt,
142	33x	.format_na_strs = na_str
143		)
144		}
145		} else {
146	92x	function(df, labelstr = "", ...) {
147	193x	in_rows(
148	193x	.list = as.list(df[[stat]]),
149	193x	.labels = as.character(df$subgroup),
150	193x	.formats = fmt,
151	193x	.format_na_strs = na_str
152		)
153		}
154		}
155		},
156	16x	stat = names(.formats),
157	16x	fmt = .formats,
158	16x	na_str = na_str
159		)
160
161	16x	afun_lst
162		}
163
164		#' @describeIn response_subgroups Table-creating function which creates a table
165		#' summarizing binary response by subgroup. This function is a wrapper for [rtables::analyze_colvars()]
166		#' and [rtables::summarize_row_groups()].
167		#'
168		#' @param df (`list`)\cr of data frames containing all analysis variables. List should be
169		#' created using [extract_rsp_subgroups()].
170		#' @param vars (`character`)\cr the names of statistics to be reported among:
171		#' * `n`: Total number of observations per group.
172		#' * `n_rsp`: Number of responders per group.
173		#' * `prop`: Proportion of responders.
174		#' * `n_tot`: Total number of observations.
175		#' * `or`: Odds ratio.
176		#' * `ci` : Confidence interval of odds ratio.
177		#' * `pval`: p-value of the effect.
178		#' Note, the statistics `n_tot`, `or` and `ci` are required.
179		#'
180		#' @return An `rtables` table summarizing binary response by subgroup.
181		#'
182		#' @examples
183		#' ## Table with default columns.
184		#' basic_table() %>%
185		#' tabulate_rsp_subgroups(df)
186		#'
187		#' ## Table with selected columns.
188		#' basic_table() %>%
189		#' tabulate_rsp_subgroups(
190		#' df = df,
191		#' vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
192		#' )
193		#'
194		#' @export
195		#' @order 2
196		tabulate_rsp_subgroups <- function(lyt,
197		df,
198		vars = c("n_tot", "n", "prop", "or", "ci"),
199		groups_lists = list(),
200		label_all = "All Patients",
201		na_str = default_na_str()) {
202	7x	conf_level <- df$or$conf_level[1]
203	7x	method <- if ("pval_label" %in% names(df$or)) {
204	5x	df$or$pval_label[1]
205		} else {
206	2x	NULL
207		}
208
209	7x	extra_args <- list(groups_lists = groups_lists, conf_level = conf_level, method = method, label_all = label_all)
210
211	7x	afun_lst <- a_response_subgroups(na_str = na_str)
212	7x	colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)
213
214	7x	colvars_prop <- list(
215	7x	vars = colvars$vars[names(colvars$labels) %in% c("n", "prop", "n_rsp")],
216	7x	labels = colvars$labels[names(colvars$labels) %in% c("n", "prop", "n_rsp")]
217		)
218	7x	colvars_or <- list(
219	7x	vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")],
220	7x	labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")]
221		)
222
223		# Columns from table_prop are optional.
224	7x	if (length(colvars_prop$vars) > 0) {
225	7x	lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
226	7x	lyt_prop <- split_cols_by_multivar(
227	7x	lyt = lyt_prop,
228	7x	vars = colvars_prop$vars,
229	7x	varlabels = colvars_prop$labels
230		)
231
232		# "All Patients" row
233	7x	lyt_prop <- split_rows_by(
234	7x	lyt = lyt_prop,
235	7x	var = "row_type",
236	7x	split_fun = keep_split_levels("content"),
237	7x	nested = FALSE,
238	7x	child_labels = "hidden"
239		)
240	7x	lyt_prop <- analyze_colvars(
241	7x	lyt = lyt_prop,
242	7x	afun = afun_lst[names(colvars_prop$labels)],
243	7x	na_str = na_str,
244	7x	extra_args = extra_args
245		)
246
247	7x	if ("analysis" %in% df$prop$row_type) {
248	6x	lyt_prop <- split_rows_by(
249	6x	lyt = lyt_prop,
250	6x	var = "row_type",
251	6x	split_fun = keep_split_levels("analysis"),
252	6x	nested = FALSE,
253	6x	child_labels = "hidden"
254		)
255	6x	lyt_prop <- split_rows_by(lyt = lyt_prop, var = "var_label", nested = TRUE)
256	6x	lyt_prop <- analyze_colvars(
257	6x	lyt = lyt_prop,
258	6x	afun = afun_lst[names(colvars_prop$labels)],
259	6x	na_str = na_str,
260	6x	inclNAs = TRUE,
261	6x	extra_args = extra_args
262		)
263		}
264
265	7x	table_prop <- build_table(lyt_prop, df = df$prop)
266		} else {
267	!	table_prop <- NULL
268		}
269
270		# Columns "n_tot", "or", "ci" in table_or are required.
271	7x	lyt_or <- split_cols_by(lyt = lyt, var = "arm")
272	7x	lyt_or <- split_cols_by_multivar(
273	7x	lyt = lyt_or,
274	7x	vars = colvars_or$vars,
275	7x	varlabels = colvars_or$labels
276		)
277
278		# "All Patients" row
279	7x	lyt_or <- split_rows_by(
280	7x	lyt = lyt_or,
281	7x	var = "row_type",
282	7x	split_fun = keep_split_levels("content"),
283	7x	nested = FALSE,
284	7x	child_labels = "hidden"
285		)
286	7x	lyt_or <- analyze_colvars(
287	7x	lyt = lyt_or,
288	7x	afun = afun_lst[names(colvars_or$labels)],
289	7x	na_str = na_str,
290	7x	extra_args = extra_args
291		) %>%
292	7x	append_topleft("Baseline Risk Factors")
293
294	7x	if ("analysis" %in% df$or$row_type) {
295	6x	lyt_or <- split_rows_by(
296	6x	lyt = lyt_or,
297	6x	var = "row_type",
298	6x	split_fun = keep_split_levels("analysis"),
299	6x	nested = FALSE,
300	6x	child_labels = "hidden"
301		)
302	6x	lyt_or <- split_rows_by(lyt = lyt_or, var = "var_label", nested = TRUE)
303	6x	lyt_or <- analyze_colvars(
304	6x	lyt = lyt_or,
305	6x	afun = afun_lst[names(colvars_or$labels)],
306	6x	na_str = na_str,
307	6x	inclNAs = TRUE,
308	6x	extra_args = extra_args
309		)
310		}
311	7x	table_or <- build_table(lyt_or, df = df$or)
312
313	7x	n_tot_id <- match("n_tot", colvars_or$vars)
314	7x	if (is.null(table_prop)) {
315	!	result <- table_or
316	!	or_id <- match("or", colvars_or$vars)
317	!	ci_id <- match("lcl", colvars_or$vars)
318		} else {
319	7x	result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
320	7x	or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
321	7x	ci_id <- 1L + ncol(table_prop) + match("lcl", colvars_or$vars[-n_tot_id])
322	7x	n_tot_id <- 1L
323		}
324	7x	structure(
325	7x	result,
326	7x	forest_header = paste0(levels(df$prop$arm), "\nBetter"),
327	7x	col_x = or_id,
328	7x	col_ci = ci_id,
329	7x	col_symbol_size = n_tot_id
330		)
331		}
332
333		#' Labels for Column Variables in Binary Response by Subgroup Table
334		#'
335		#' @description `r lifecycle::badge("stable")`
336		#'
337		#' Internal function to check variables included in [tabulate_rsp_subgroups()] and create column labels.
338		#'
339		#' @inheritParams argument_convention
340		#' @inheritParams tabulate_rsp_subgroups
341		#'
342		#' @return A `list` of variables to tabulate and their labels.
343		#'
344		#' @export
345		d_rsp_subgroups_colvars <- function(vars,
346		conf_level = NULL,
347		method = NULL) {
348	16x	checkmate::assert_character(vars)
349	16x	checkmate::assert_subset(c("n_tot", "or", "ci"), vars)
350	16x	checkmate::assert_subset(
351	16x	vars,
352	16x	c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
353		)
354
355	16x	varlabels <- c(
356	16x	n = "n",
357	16x	n_rsp = "Responders",
358	16x	prop = "Response (%)",
359	16x	n_tot = "Total n",
360	16x	or = "Odds Ratio"
361		)
362	16x	colvars <- vars
363
364	16x	if ("ci" %in% colvars) {
365	16x	checkmate::assert_false(is.null(conf_level))
366
367	16x	varlabels <- c(
368	16x	varlabels,
369	16x	ci = paste0(100 * conf_level, "% CI")
370		)
371
372		# The `lcl`` variable is just a placeholder available in the analysis data,
373		# it is not acutally used in the tabulation.
374		# Variables used in the tabulation are lcl and ucl, see `a_response_subgroups` for details.
375	16x	colvars[colvars == "ci"] <- "lcl"
376		}
377
378	16x	if ("pval" %in% colvars) {
379	13x	varlabels <- c(
380	13x	varlabels,
381	13x	pval = method
382		)
383		}
384
385	16x	list(
386	16x	vars = colvars,
387	16x	labels = varlabels[vars]
388		)
389		}

1		#' Split Function to Configure Risk Difference Column
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Wrapper function for [rtables::add_combo_levels()] which configures settings for the risk difference
6		#' column to be added to an `rtables` object. To add a risk difference column to a table, this function
7		#' should be used as `split_fun` in calls to [rtables::split_cols_by()], followed by setting argument
8		#' `riskdiff` to `TRUE` in all following analyze function calls.
9		#'
10		#' @param arm_x (`character`)\cr Name of reference arm to use in risk difference calculations.
11		#' @param arm_y (`character`)\cr Names of one or more arms to compare to reference arm in risk difference
12		#' calculations. A new column will be added for each value of `arm_y`.
13		#' @param col_label (`character`)\cr Labels to use when rendering the risk difference column within the table.
14		#' If more than one comparison arm is specified in `arm_y`, default labels will specify which two arms are
15		#' being compared (reference arm vs. comparison arm).
16		#' @param pct (`flag`)\cr whether output should be returned as percentages. Defaults to `TRUE`.
17		#'
18		#' @return A closure suitable for use as a split function (`split_fun`) within [rtables::split_cols_by()]
19		#' when creating a table layout.
20		#'
21		#' @seealso [stat_propdiff_ci()] for details on risk difference calculation.
22		#'
23		#' @examples
24		#' adae <- tern_ex_adae
25		#' adae$AESEV <- factor(adae$AESEV)
26		#'
27		#' lyt <- basic_table() %>%
28		#' split_cols_by("ARMCD", split_fun = add_riskdiff(arm_x = "ARM A", arm_y = c("ARM B", "ARM C"))) %>%
29		#' count_occurrences_by_grade(
30		#' var = "AESEV",
31		#' riskdiff = TRUE
32		#' )
33		#'
34		#' tbl <- build_table(lyt, df = adae)
35		#' tbl
36		#'
37		#' @export
38		add_riskdiff <- function(arm_x,
39		arm_y,
40		col_label = paste0(
41		"Risk Difference (%) (95% CI)", if (length(arm_y) > 1) paste0("\n", arm_x, " vs. ", arm_y)
42		),
43		pct = TRUE) {
44	7x	checkmate::assert_character(arm_x, len = 1)
45	7x	checkmate::assert_character(arm_y, min.len = 1)
46	7x	checkmate::assert_character(col_label, len = length(arm_y))
47
48	7x	combodf <- tibble::tribble(~valname, ~label, ~levelcombo, ~exargs)
49	7x	for (i in seq_len(length(arm_y))) {
50	7x	combodf <- rbind(
51	7x	combodf,
52	7x	tibble::tribble(
53	7x	~valname, ~label, ~levelcombo, ~exargs,
54	7x	paste("riskdiff", arm_x, arm_y[i], sep = "_"), col_label[i], c(arm_x, arm_y[i]), list()
55		)
56		)
57		}
58	7x	if (pct) combodf$valname <- paste0(combodf$valname, "_pct")
59	7x	add_combo_levels(combodf)
60		}
61
62		#' Analysis Function to Calculate Risk Difference Column Values
63		#'
64		#' In the risk difference column, this function uses the statistics function associated with `afun` to
65		#' calculates risk difference values from arm X (reference group) and arm Y. These arms are specified
66		#' when configuring the risk difference column which is done using the [add_riskdiff()] split function in
67		#' the previous call to [rtables::split_cols_by()]. For all other columns, applies `afun` as usual. This
68		#' function utilizes the [stat_propdiff_ci()] function to perform risk difference calculations.
69		#'
70		#' @inheritParams argument_convention
71		#' @param afun (named `list`)\cr A named list containing one name-value pair where the name corresponds to
72		#' the name of the statistics function that should be used in calculations and the value is the corresponding
73		#' analysis function.
74		#' @param s_args (named `list`)\cr Additional arguments to be passed to the statistics function and analysis
75		#' function supplied in `afun`.
76		#'
77		#' @return A list of formatted [rtables::CellValue()].
78		#'
79		#' @seealso
80		#' * [stat_propdiff_ci()] for details on risk difference calculation.
81		#' * Split function [add_riskdiff()] which, when used as `split_fun` within [rtables::split_cols_by()] with
82		#' `riskdiff` argument set to `TRUE` in subsequent analyze functions calls, adds a risk difference column
83		#' to a table layout.
84		#'
85		#' @keywords internal
86		afun_riskdiff <- function(df,
87		labelstr = "",
88		.var,
89		.N_col, # nolint
90		.N_row, # nolint
91		.df_row,
92		.spl_context,
93		.all_col_counts,
94		.stats,
95		.formats = NULL,
96		.labels = NULL,
97		.indent_mods = NULL,
98		na_str = default_na_str(),
99		afun,
100		s_args = list()) {
101	44x	if (!any(grepl("riskdiff", names(.spl_context)))) {
102	!	stop(
103	!	"Please set up levels to use in risk difference calculations using the `add_riskdiff` ",
104	!	"split function within `split_cols_by`. See ?add_riskdiff for details."
105		)
106		}
107	44x	checkmate::assert_list(afun, len = 1, types = "function")
108	44x	checkmate::assert_named(afun)
109	44x	afun_args <- list(
110	44x	.var = .var, .df_row = .df_row, .N_row = .N_row, denom = "N_col", labelstr = labelstr,
111	44x	.stats = .stats, .formats = .formats, .labels = .labels, .indent_mods = .indent_mods, na_str = na_str
112		)
113	44x	afun_args <- afun_args[intersect(names(afun_args), names(as.list(args(afun[[1]]))))]
114	!	if ("denom" %in% names(s_args)) afun_args[["denom"]] <- NULL
115
116	44x	cur_split <- tail(.spl_context$cur_col_split_val[[1]], 1)
117	44x	if (!grepl("^riskdiff", cur_split)) {
118		# Apply basic afun (no risk difference) in all other columns
119	33x	do.call(afun[[1]], args = c(list(df = df, .N_col = .N_col), afun_args, s_args))
120		} else {
121	11x	arm_x <- strsplit(cur_split, "_")[[1]][2]
122	11x	arm_y <- strsplit(cur_split, "_")[[1]][3]
123	11x	if (length(.spl_context$cur_col_split[[1]]) > 1) { # Different split name for nested column splits
124	!	arm_spl_x <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 2)], collapse = ""))
125	!	arm_spl_y <- gsub("riskdiff", "", paste0(strsplit(.spl_context$cur_col_id[1], "_")[[1]][c(1, 3)], collapse = ""))
126		} else {
127	11x	arm_spl_x <- arm_x
128	11x	arm_spl_y <- arm_y
129		}
130	11x	N_col_x <- .all_col_counts[[arm_spl_x]] # nolint
131	11x	N_col_y <- .all_col_counts[[arm_spl_y]] # nolint
132	11x	cur_var <- tail(.spl_context$cur_col_split[[1]], 1)
133
134		# Apply statistics function to arm X and arm Y data
135	11x	s_args <- c(s_args, afun_args[intersect(names(afun_args), names(as.list(args(names(afun)))))])
136	11x	s_x <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_x, ], .N_col = N_col_x), s_args))
137	11x	s_y <- do.call(names(afun), args = c(list(df = df[df[[cur_var]] == arm_y, ], .N_col = N_col_y), s_args))
138
139		# Get statistic name and row names
140	11x	stat <- ifelse("count_fraction" %in% names(s_x), "count_fraction", "unique")
141	11x	if ("flag_variables" %in% names(s_args)) {
142	1x	var_nms <- s_args$flag_variables
143	10x	} else if (!is.null(names(s_x[[stat]]))) {
144	4x	var_nms <- names(s_x[[stat]])
145		} else {
146	6x	var_nms <- ""
147	6x	s_x[[stat]] <- list(s_x[[stat]])
148	6x	s_y[[stat]] <- list(s_y[[stat]])
149		}
150
151		# Calculate risk difference for each row, repeated if multiple statistics in table
152	11x	pct <- tail(strsplit(cur_split, "_")[[1]], 1) == "pct"
153	11x	rd_ci <- rep(stat_propdiff_ci(
154	11x	lapply(s_x[[stat]], `[`, 1), lapply(s_y[[stat]], `[`, 1),
155	11x	N_col_x, N_col_y,
156	11x	list_names = var_nms,
157	11x	pct = pct
158	11x	), max(1, length(.stats)))
159
160	11x	in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods)
161		}
162		}

1		#' Summary numeric variables in columns
2		#'
3		#' @description `r lifecycle::badge("experimental")`
4		#'
5		#' Layout-creating function which can be used for creating column-wise summary tables.
6		#' This function sets the analysis methods as column labels and is a wrapper for
7		#' [rtables::analyze_colvars()]. It was designed principally for PK tables.
8		#'
9		#' @inheritParams argument_convention
10		#' @inheritParams rtables::analyze_colvars
11		#' @param imp_rule (`character`)\cr imputation rule setting. Defaults to `NULL` for no imputation rule. Can
12		#' also be `"1/3"` to implement 1/3 imputation rule or `"1/2"` to implement 1/2 imputation rule. In order
13		#' to use an imputation rule, the `avalcat_var` argument must be specified. See [imputation_rule()]
14		#' for more details on imputation.
15		#' @param avalcat_var (`character`)\cr if `imp_rule` is not `NULL`, name of variable that indicates whether a
16		#' row in the data corresponds to an analysis value in category `"BLQ"`, `"LTR"`, `"<PCLLOQ"`, or none of
17		#' the above (defaults to `"AVALCAT1"`). Variable must be present in the data and should match the variable
18		#' used to calculate the `n_blq` statistic (if included in `.stats`).
19		#' @param cache (`flag`)\cr whether to store computed values in a temporary caching environment. This will
20		#' speed up calculations in large tables, but should be set to `FALSE` if the same `rtable` layout is
21		#' used for multiple tables with different data. Defaults to `FALSE`.
22		#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
23		#' character vector applies on the column space. You can change the row labels by defining this
24		#' parameter to a named character vector with names corresponding to the split values. It defaults
25		#' to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
26		#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
27		#' label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
28		#' to define row labels. This behavior is not supported as we never need to overload row labels.
29		#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
30		#' This option allows you to add multiple instances of this functions, also in a nested fashion,
31		#' without adding more splits. This split must happen only one time on a single layout.
32		#'
33		#' @return
34		#' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
35		#' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
36		#' in columns, and add it to the table layout.
37		#'
38		#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and
39		#' [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its
40		#' support to more complex analysis pipelines on the column space. For the same reasons,
41		#' we encourage to read the examples carefully and file issues for cases that differ from
42		#' them.
43		#'
44		#' Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`),
45		#' row labels are assigned automatically to the split values in case of `rtables::analyze_colvars`
46		#' (`do_summarize_row_groups = FALSE`, the default), and to the group label for
47		#' `do_summarize_row_groups = TRUE`.
48		#'
49		#' @seealso [analyze_vars()], [rtables::analyze_colvars()].
50		#'
51		#' @examples
52		#' library(dplyr)
53		#'
54		#' # Data preparation
55		#' adpp <- tern_ex_adpp %>% h_pkparam_sort()
56		#'
57		#' lyt <- basic_table() %>%
58		#' split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
59		#' split_rows_by(
60		#' var = "SEX",
61		#' label_pos = "topleft",
62		#' child_label = "hidden"
63		#' ) %>% # Removes duplicated labels
64		#' analyze_vars_in_cols(vars = "AGE")
65		#' result <- build_table(lyt = lyt, df = adpp)
66		#' result
67		#'
68		#' # By selecting just some statistics and ad-hoc labels
69		#' lyt <- basic_table() %>%
70		#' split_rows_by(var = "ARM", label_pos = "topleft") %>%
71		#' split_rows_by(
72		#' var = "SEX",
73		#' label_pos = "topleft",
74		#' child_labels = "hidden",
75		#' split_fun = drop_split_levels
76		#' ) %>%
77		#' analyze_vars_in_cols(
78		#' vars = "AGE",
79		#' .stats = c("n", "cv", "geom_mean"),
80		#' .labels = c(
81		#' n = "aN",
82		#' cv = "aCV",
83		#' geom_mean = "aGeomMean"
84		#' )
85		#' )
86		#' result <- build_table(lyt = lyt, df = adpp)
87		#' result
88		#'
89		#' # Changing row labels
90		#' lyt <- basic_table() %>%
91		#' analyze_vars_in_cols(
92		#' vars = "AGE",
93		#' row_labels = "some custom label"
94		#' )
95		#' result <- build_table(lyt, df = adpp)
96		#' result
97		#'
98		#' # Pharmacokinetic parameters
99		#' lyt <- basic_table() %>%
100		#' split_rows_by(
101		#' var = "TLG_DISPLAY",
102		#' split_label = "PK Parameter",
103		#' label_pos = "topleft",
104		#' child_label = "hidden"
105		#' ) %>%
106		#' analyze_vars_in_cols(
107		#' vars = "AVAL"
108		#' )
109		#' result <- build_table(lyt, df = adpp)
110		#' result
111		#'
112		#' # Multiple calls (summarize label and analyze underneath)
113		#' lyt <- basic_table() %>%
114		#' split_rows_by(
115		#' var = "TLG_DISPLAY",
116		#' split_label = "PK Parameter",
117		#' label_pos = "topleft"
118		#' ) %>%
119		#' analyze_vars_in_cols(
120		#' vars = "AVAL",
121		#' do_summarize_row_groups = TRUE # does a summarize level
122		#' ) %>%
123		#' split_rows_by("SEX",
124		#' child_label = "hidden",
125		#' label_pos = "topleft"
126		#' ) %>%
127		#' analyze_vars_in_cols(
128		#' vars = "AVAL",
129		#' split_col_vars = FALSE # avoids re-splitting the columns
130		#' )
131		#' result <- build_table(lyt, df = adpp)
132		#' result
133		#'
134		#' @export
135		analyze_vars_in_cols <- function(lyt,
136		vars,
137		...,
138		.stats = c(
139		"n",
140		"mean",
141		"sd",
142		"se",
143		"cv",
144		"geom_cv"
145		),
146		.labels = c(
147		n = "n",
148		mean = "Mean",
149		sd = "SD",
150		se = "SE",
151		cv = "CV (%)",
152		geom_cv = "CV % Geometric Mean"
153		),
154		row_labels = NULL,
155		do_summarize_row_groups = FALSE,
156		split_col_vars = TRUE,
157		imp_rule = NULL,
158		avalcat_var = "AVALCAT1",
159		cache = FALSE,
160		.indent_mods = NULL,
161		na_level = lifecycle::deprecated(),
162		na_str = default_na_str(),
163		nested = TRUE,
164		.formats = NULL,
165		.aligns = NULL) {
166	10x	extra_args <- list(...)
167	10x	if (lifecycle::is_present(na_level)) {
168	!	lifecycle::deprecate_warn("0.9.1", "analyze_vars_in_cols(na_level)", "analyze_vars_in_cols(na_str)")
169	!	na_str <- na_level
170		}
171
172	10x	checkmate::assert_string(na_str, na.ok = TRUE, null.ok = TRUE)
173	10x	checkmate::assert_character(row_labels, null.ok = TRUE)
174	10x	checkmate::assert_int(.indent_mods, null.ok = TRUE)
175	10x	checkmate::assert_flag(nested)
176	10x	checkmate::assert_flag(split_col_vars)
177	10x	checkmate::assert_flag(do_summarize_row_groups)
178
179		# Filtering
180	10x	met_grps <- paste0("analyze_vars", c("_numeric", "_counts"))
181	10x	.stats <- get_stats(met_grps, stats_in = .stats)
182	10x	formats_v <- get_formats_from_stats(stats = .stats, formats_in = .formats)
183	10x	labels_v <- get_labels_from_stats(stats = .stats, labels_in = .labels)
184	!	if ("control" %in% names(extra_args)) labels_v <- labels_v %>% labels_use_control(extra_args[["control"]], .labels)
185
186		# Check for vars in the case that one or more are used
187	10x	if (length(vars) == 1) {
188	7x	vars <- rep(vars, length(.stats))
189	3x	} else if (length(vars) != length(.stats)) {
190	1x	stop(
191	1x	"Analyzed variables (vars) does not have the same ",
192	1x	"number of elements of specified statistics (.stats)."
193		)
194		}
195
196	9x	if (split_col_vars) {
197		# Checking there is not a previous identical column split
198	8x	clyt <- tail(clayout(lyt), 1)[[1]]
199
200	8x	dummy_lyt <- split_cols_by_multivar(
201	8x	lyt = basic_table(),
202	8x	vars = vars,
203	8x	varlabels = labels_v
204		)
205
206	8x	if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
207	!	stop(
208	!	"Column split called again with the same values. ",
209	!	"This can create many unwanted columns. Please consider adding ",
210	!	"split_col_vars = FALSE to the last call of ",
211	!	deparse(sys.calls()[[sys.nframe() - 1]]), "."
212		)
213		}
214
215		# Main col split
216	8x	lyt <- split_cols_by_multivar(
217	8x	lyt = lyt,
218	8x	vars = vars,
219	8x	varlabels = labels_v
220		)
221		}
222
223	9x	env <- new.env() # create caching environment
224
225	9x	if (do_summarize_row_groups) {
226	2x	if (length(unique(vars)) > 1) {
227	!	stop("When using do_summarize_row_groups only one label level var should be inserted.")
228		}
229
230		# Function list for do_summarize_row_groups. Slightly different handling of labels
231	2x	cfun_list <- Map(
232	2x	function(stat, use_cache, cache_env) {
233	12x	function(u, .spl_context, labelstr, .df_row, ...) {
234		# Statistic
235	24x	var_row_val <- paste(
236	24x	gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
237	24x	paste(.spl_context$value, collapse = "_"),
238	24x	sep = "_"
239		)
240	24x	if (use_cache) {
241	!	if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
242	!	x_stats <- cache_env[[var_row_val]]
243		} else {
244	24x	x_stats <- s_summary(u, ...)
245		}
246
247	24x	if (is.null(imp_rule) \|\| !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
248	24x	res <- x_stats[[stat]]
249		} else {
250	!	timept <- as.numeric(gsub(".?([0-9\\.]+).", "\\1", tail(.spl_context$value, 1)))
251	!	res_imp <- imputation_rule(
252	!	.df_row, x_stats, stat,
253	!	imp_rule = imp_rule,
254	!	post = grepl("Predose", tail(.spl_context$value, 1)) \|\| timept > 0,
255	!	avalcat_var = avalcat_var
256		)
257	!	res <- res_imp[["val"]]
258	!	na_str <- res_imp[["na_str"]]
259		}
260
261		# Label check and replacement
262	24x	if (length(row_labels) > 1) {
263	12x	if (!(labelstr %in% names(row_labels))) {
264	!	stop(
265	!	"Replacing the labels in do_summarize_row_groups needs a named vector",
266	!	"that contains the split values. In the current split variable ",
267	!	.spl_context$split[nrow(.spl_context)],
268	!	" the labelstr value (split value by default) ", labelstr, " is not in",
269	!	" row_labels names: ", names(row_labels)
270		)
271		}
272	12x	lbl <- unlist(row_labels[labelstr])
273		} else {
274	12x	lbl <- labelstr
275		}
276
277		# Cell creation
278	24x	rcell(res,
279	24x	label = lbl,
280	24x	format = formats_v[names(formats_v) == stat][[1]],
281	24x	format_na_str = na_str,
282	24x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
283	24x	align = .aligns
284		)
285		}
286		},
287	2x	stat = .stats,
288	2x	use_cache = cache,
289	2x	cache_env = replicate(length(.stats), env)
290		)
291
292		# Main call to rtables
293	2x	summarize_row_groups(
294	2x	lyt = lyt,
295	2x	var = unique(vars),
296	2x	cfun = cfun_list,
297	2x	na_str = na_str,
298	2x	extra_args = extra_args
299		)
300		} else {
301		# Function list for analyze_colvars
302	7x	afun_list <- Map(
303	7x	function(stat, use_cache, cache_env) {
304	32x	function(u, .spl_context, .df_row, ...) {
305		# Main statistics
306	210x	var_row_val <- paste(
307	210x	gsub("\\._\\[\\[[0-9]+\\]\\]_\\.", "", paste(tail(.spl_context$cur_col_split_val, 1)[[1]], collapse = "_")),
308	210x	paste(.spl_context$value, collapse = "_"),
309	210x	sep = "_"
310		)
311	210x	if (use_cache) {
312	16x	if (is.null(cache_env[[var_row_val]])) cache_env[[var_row_val]] <- s_summary(u, ...)
313	56x	x_stats <- cache_env[[var_row_val]]
314		} else {
315	154x	x_stats <- s_summary(u, ...)
316		}
317
318	210x	if (is.null(imp_rule) \|\| !stat %in% c("mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max")) {
319	170x	res <- x_stats[[stat]]
320		} else {
321	40x	timept <- as.numeric(gsub(".?([0-9\\.]+).", "\\1", tail(.spl_context$value, 1)))
322	40x	res_imp <- imputation_rule(
323	40x	.df_row, x_stats, stat,
324	40x	imp_rule = imp_rule,
325	40x	post = grepl("Predose", tail(.spl_context$value, 1)) \|\| timept > 0,
326	40x	avalcat_var = avalcat_var
327		)
328	40x	res <- res_imp[["val"]]
329	40x	na_str <- res_imp[["na_str"]]
330		}
331
332	210x	if (is.list(res)) {
333	19x	if (length(res) > 1) {
334	1x	stop("The analyzed column produced more than one category of results.")
335		} else {
336	18x	res <- unlist(res)
337		}
338		}
339
340		# Label from context
341	209x	label_from_context <- .spl_context$value[nrow(.spl_context)]
342
343		# Label switcher
344	209x	if (is.null(row_labels)) {
345	149x	lbl <- label_from_context
346		} else {
347	60x	if (length(row_labels) > 1) {
348	48x	if (!(label_from_context %in% names(row_labels))) {
349	!	stop(
350	!	"Replacing the labels in do_summarize_row_groups needs a named vector",
351	!	"that contains the split values. In the current split variable ",
352	!	.spl_context$split[nrow(.spl_context)],
353	!	" the split value ", label_from_context, " is not in",
354	!	" row_labels names: ", names(row_labels)
355		)
356		}
357	48x	lbl <- unlist(row_labels[label_from_context])
358		} else {
359	12x	lbl <- row_labels
360		}
361		}
362
363		# Cell creation
364	209x	rcell(res,
365	209x	label = lbl,
366	209x	format = formats_v[names(formats_v) == stat][[1]],
367	209x	format_na_str = na_str,
368	209x	indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods),
369	209x	align = .aligns
370		)
371		}
372		},
373	7x	stat = .stats,
374	7x	use_cache = cache,
375	7x	cache_env = replicate(length(.stats), env)
376		)
377
378		# Main call to rtables
379	7x	analyze_colvars(lyt,
380	7x	afun = afun_list,
381	7x	na_str = na_str,
382	7x	nested = nested,
383	7x	extra_args = extra_args
384		)
385		}
386		}
387
388		# Help function
389		get_last_col_split <- function(lyt) {
390	1x	tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
391		}

1		#' Subgroup Treatment Effect Pattern (STEP) Fit for Binary (Response) Outcome
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This fits the Subgroup Treatment Effect Pattern logistic regression models for a binary
6		#' (response) outcome. The treatment arm variable must have exactly 2 levels,
7		#' where the first one is taken as reference and the estimated odds ratios are
8		#' for the comparison of the second level vs. the first one.
9		#'
10		#' The (conditional) logistic regression model which is fit is:
11		#'
12		#' `response ~ arm * poly(biomarker, degree) + covariates + strata(strata)`
13		#'
14		#' where `degree` is specified by `control_step()`.
15		#'
16		#' @inheritParams argument_convention
17		#' @param variables (named `list` of `character`)\cr list of analysis variables:
18		#' needs `response`, `arm`, `biomarker`, and optional `covariates` and `strata`.
19		#' @param control (named `list`)\cr combined control list from [control_step()]
20		#' and [control_logistic()].
21		#'
22		#' @return A matrix of class `step`. The first part of the columns describe the
23		#' subgroup intervals used for the biomarker variable, including where the
24		#' center of the intervals are and their bounds. The second part of the
25		#' columns contain the estimates for the treatment arm comparison.
26		#'
27		#' @note For the default degree 0 the `biomarker` variable is not included in the model.
28		#'
29		#' @seealso [control_step()] and [control_logistic()] for the available
30		#' customization options.
31		#'
32		#' @examples
33		#' # Testing dataset with just two treatment arms.
34		#' library(survival)
35		#' library(dplyr)
36		#'
37		#' adrs_f <- tern_ex_adrs %>%
38		#' filter(
39		#' PARAMCD == "BESRSPI",
40		#' ARM %in% c("B: Placebo", "A: Drug X")
41		#' ) %>%
42		#' mutate(
43		#' # Reorder levels of ARM to have Placebo as reference arm for Odds Ratio calculations.
44		#' ARM = droplevels(forcats::fct_relevel(ARM, "B: Placebo")),
45		#' RSP = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
46		#' SEX = factor(SEX)
47		#' )
48		#'
49		#' variables <- list(
50		#' arm = "ARM",
51		#' biomarker = "BMRKR1",
52		#' covariates = "AGE",
53		#' response = "RSP"
54		#' )
55		#'
56		#' # Fit default STEP models: Here a constant treatment effect is estimated in each subgroup.
57		#' # We use a large enough bandwidth to avoid too small subgroups and linear separation in those.
58		#' step_matrix <- fit_rsp_step(
59		#' variables = variables,
60		#' data = adrs_f,
61		#' control = c(control_logistic(), control_step(bandwidth = 0.5))
62		#' )
63		#' dim(step_matrix)
64		#' head(step_matrix)
65		#'
66		#' # Specify different polynomial degree for the biomarker interaction to use more flexible local
67		#' # models. Or specify different logistic regression options, including confidence level.
68		#' step_matrix2 <- fit_rsp_step(
69		#' variables = variables,
70		#' data = adrs_f,
71		#' control = c(control_logistic(conf_level = 0.9), control_step(bandwidth = 0.6, degree = 1))
72		#' )
73		#'
74		#' # Use a global constant model. This is helpful as a reference for the subgroup models.
75		#' step_matrix3 <- fit_rsp_step(
76		#' variables = variables,
77		#' data = adrs_f,
78		#' control = c(control_logistic(), control_step(bandwidth = NULL, num_points = 2L))
79		#' )
80		#'
81		#' # It is also possible to use strata, i.e. use conditional logistic regression models.
82		#' variables2 <- list(
83		#' arm = "ARM",
84		#' biomarker = "BMRKR1",
85		#' covariates = "AGE",
86		#' response = "RSP",
87		#' strata = c("STRATA1", "STRATA2")
88		#' )
89		#'
90		#' step_matrix4 <- fit_rsp_step(
91		#' variables = variables2,
92		#' data = adrs_f,
93		#' control = c(control_logistic(), control_step(bandwidth = 0.6))
94		#' )
95		#'
96		#' @export
97		fit_rsp_step <- function(variables,
98		data,
99		control = c(control_step(), control_logistic())) {
100	5x	assert_df_with_variables(data, variables)
101	5x	checkmate::assert_list(control, names = "named")
102	5x	data <- data[!is.na(data[[variables$biomarker]]), ]
103	5x	window_sel <- h_step_window(x = data[[variables$biomarker]], control = control)
104	5x	interval_center <- window_sel$interval[, "Interval Center"]
105	5x	form <- h_step_rsp_formula(variables = variables, control = control)
106	5x	estimates <- if (is.null(control$bandwidth)) {
107	1x	h_step_rsp_est(
108	1x	formula = form,
109	1x	data = data,
110	1x	variables = variables,
111	1x	x = interval_center,
112	1x	control = control
113		)
114		} else {
115	4x	tmp <- mapply(
116	4x	FUN = h_step_rsp_est,
117	4x	x = interval_center,
118	4x	subset = as.list(as.data.frame(window_sel$sel)),
119	4x	MoreArgs = list(
120	4x	formula = form,
121	4x	data = data,
122	4x	variables = variables,
123	4x	control = control
124		)
125		)
126		# Maybe we find a more elegant solution than this.
127	4x	rownames(tmp) <- c("n", "logor", "se", "ci_lower", "ci_upper")
128	4x	t(tmp)
129		}
130	5x	result <- cbind(window_sel$interval, estimates)
131	5x	structure(
132	5x	result,
133	5x	class = c("step", "matrix"),
134	5x	variables = variables,
135	5x	control = control
136		)
137		}

1		#' Multivariate Logistic Regression Table
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Layout-creating function which summarizes a logistic variable regression for binary outcome with
6		#' categorical/continuous covariates in model statement. For each covariate category (if categorical)
7		#' or specified values (if continuous), present degrees of freedom, regression parameter estimate and
8		#' standard error (SE) relative to reference group or category. Report odds ratios for each covariate
9		#' category or specified values and corresponding Wald confidence intervals as default but allow user
10		#' to specify other confidence levels. Report p-value for Wald chi-square test of the null hypothesis
11		#' that covariate has no effect on response in model containing all specified covariates.
12		#' Allow option to include one two-way interaction and present similar output for
13		#' each interaction degree of freedom.
14		#'
15		#' @inheritParams argument_convention
16		#' @param drop_and_remove_str (`character`)\cr string to be dropped and removed.
17		#'
18		#' @return A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
19		#' Adding this function to an `rtable` layout will add a logistic regression variable summary to the table layout.
20		#'
21		#' @note For the formula, the variable names need to be standard `data.frame` column names without
22		#' special characters.
23		#'
24		#' @examples
25		#' library(dplyr)
26		#' library(broom)
27		#'
28		#' adrs_f <- tern_ex_adrs %>%
29		#' filter(PARAMCD == "BESRSPI") %>%
30		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
31		#' mutate(
32		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
33		#' RACE = factor(RACE),
34		#' SEX = factor(SEX)
35		#' )
36		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
37		#' mod1 <- fit_logistic(
38		#' data = adrs_f,
39		#' variables = list(
40		#' response = "Response",
41		#' arm = "ARMCD",
42		#' covariates = c("AGE", "RACE")
43		#' )
44		#' )
45		#' mod2 <- fit_logistic(
46		#' data = adrs_f,
47		#' variables = list(
48		#' response = "Response",
49		#' arm = "ARMCD",
50		#' covariates = c("AGE", "RACE"),
51		#' interaction = "AGE"
52		#' )
53		#' )
54		#'
55		#' df <- tidy(mod1, conf_level = 0.99)
56		#' df2 <- tidy(mod2, conf_level = 0.99)
57		#'
58		#' # flagging empty strings with "_"
59		#' df <- df_explicit_na(df, na_level = "_")
60		#' df2 <- df_explicit_na(df2, na_level = "_")
61		#'
62		#' result1 <- basic_table() %>%
63		#' summarize_logistic(
64		#' conf_level = 0.95,
65		#' drop_and_remove_str = "_"
66		#' ) %>%
67		#' build_table(df = df)
68		#' result1
69		#'
70		#' result2 <- basic_table() %>%
71		#' summarize_logistic(
72		#' conf_level = 0.95,
73		#' drop_and_remove_str = "_"
74		#' ) %>%
75		#' build_table(df = df2)
76		#' result2
77		#'
78		#' @export
79		#' @order 1
80		summarize_logistic <- function(lyt,
81		conf_level,
82		drop_and_remove_str = "",
83		.indent_mods = NULL) {
84		# checks
85	3x	checkmate::assert_string(drop_and_remove_str)
86
87	3x	sum_logistic_variable_test <- logistic_summary_by_flag("is_variable_summary")
88	3x	sum_logistic_term_estimates <- logistic_summary_by_flag("is_term_summary", .indent_mods = .indent_mods)
89	3x	sum_logistic_odds_ratios <- logistic_summary_by_flag("is_reference_summary", .indent_mods = .indent_mods)
90	3x	split_fun <- drop_and_remove_levels(drop_and_remove_str)
91
92	3x	lyt <- logistic_regression_cols(lyt, conf_level = conf_level)
93	3x	lyt <- split_rows_by(lyt, var = "variable", labels_var = "variable_label", split_fun = split_fun)
94	3x	lyt <- sum_logistic_variable_test(lyt)
95	3x	lyt <- split_rows_by(lyt, var = "term", labels_var = "term_label", split_fun = split_fun)
96	3x	lyt <- sum_logistic_term_estimates(lyt)
97	3x	lyt <- split_rows_by(lyt, var = "interaction", labels_var = "interaction_label", split_fun = split_fun)
98	3x	lyt <- split_rows_by(lyt, var = "reference", labels_var = "reference_label", split_fun = split_fun)
99	3x	lyt <- sum_logistic_odds_ratios(lyt)
100	3x	lyt
101		}
102
103		#' Fit for Logistic Regression
104		#'
105		#' @description `r lifecycle::badge("stable")`
106		#'
107		#' Fit a (conditional) logistic regression model.
108		#'
109		#' @inheritParams argument_convention
110		#' @param data (`data.frame`)\cr the data frame on which the model was fit.
111		#' @param response_definition (`string`)\cr the definition of what an event is in terms of `response`.
112		#' This will be used when fitting the (conditional) logistic regression model on the left hand
113		#' side of the formula.
114		#'
115		#' @return A fitted logistic regression model.
116		#'
117		#' @section Model Specification:
118		#'
119		#' The `variables` list needs to include the following elements:
120		#' * `arm`: Treatment arm variable name.
121		#' * `response`: The response arm variable name. Usually this is a 0/1 variable.
122		#' * `covariates`: This is either `NULL` (no covariates) or a character vector of covariate variable names.
123		#' * `interaction`: This is either `NULL` (no interaction) or a string of a single covariate variable name already
124		#' included in `covariates`. Then the interaction with the treatment arm is included in the model.
125		#'
126		#' @examples
127		#' library(dplyr)
128		#'
129		#' adrs_f <- tern_ex_adrs %>%
130		#' filter(PARAMCD == "BESRSPI") %>%
131		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
132		#' mutate(
133		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
134		#' RACE = factor(RACE),
135		#' SEX = factor(SEX)
136		#' )
137		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
138		#' mod1 <- fit_logistic(
139		#' data = adrs_f,
140		#' variables = list(
141		#' response = "Response",
142		#' arm = "ARMCD",
143		#' covariates = c("AGE", "RACE")
144		#' )
145		#' )
146		#' mod2 <- fit_logistic(
147		#' data = adrs_f,
148		#' variables = list(
149		#' response = "Response",
150		#' arm = "ARMCD",
151		#' covariates = c("AGE", "RACE"),
152		#' interaction = "AGE"
153		#' )
154		#' )
155		#'
156		#' @export
157		fit_logistic <- function(data,
158		variables = list(
159		response = "Response",
160		arm = "ARMCD",
161		covariates = NULL,
162		interaction = NULL,
163		strata = NULL
164		),
165		response_definition = "response") {
166	74x	assert_df_with_variables(data, variables)
167	74x	checkmate::assert_subset(names(variables), c("response", "arm", "covariates", "interaction", "strata"))
168	74x	checkmate::assert_string(response_definition)
169	74x	checkmate::assert_true(grepl("response", response_definition))
170
171	74x	response_definition <- sub(
172	74x	pattern = "response",
173	74x	replacement = variables$response,
174	74x	x = response_definition,
175	74x	fixed = TRUE
176		)
177	74x	form <- paste0(response_definition, " ~ ", variables$arm)
178	74x	if (!is.null(variables$covariates)) {
179	28x	form <- paste0(form, " + ", paste(variables$covariates, collapse = " + "))
180		}
181	74x	if (!is.null(variables$interaction)) {
182	17x	checkmate::assert_string(variables$interaction)
183	17x	checkmate::assert_subset(variables$interaction, variables$covariates)
184	17x	form <- paste0(form, " + ", variables$arm, ":", variables$interaction)
185		}
186	74x	if (!is.null(variables$strata)) {
187	14x	strata_arg <- if (length(variables$strata) > 1) {
188	7x	paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
189		} else {
190	7x	variables$strata
191		}
192	14x	form <- paste0(form, "+ strata(", strata_arg, ")")
193		}
194	74x	formula <- stats::as.formula(form)
195	74x	if (is.null(variables$strata)) {
196	60x	stats::glm(
197	60x	formula = formula,
198	60x	data = data,
199	60x	family = stats::binomial("logit")
200		)
201		} else {
202	14x	clogit_with_tryCatch(
203	14x	formula = formula,
204	14x	data = data,
205	14x	x = TRUE
206		)
207		}
208		}
209
210		#' Custom Tidy Method for Binomial GLM Results
211		#'
212		#' @description `r lifecycle::badge("stable")`
213		#'
214		#' Helper method (for [broom::tidy()]) to prepare a data frame from a `glm` object
215		#' with `binomial` family.
216		#'
217		#' @inheritParams argument_convention
218		#' @param at (`NULL` or `numeric`)\cr optional values for the interaction variable. Otherwise the median is used.
219		#' @param x logistic regression model fitted by [stats::glm()] with "binomial" family.
220		#'
221		#' @return A `data.frame` containing the tidied model.
222		#'
223		#' @method tidy glm
224		#'
225		#' @seealso [h_logistic_regression] for relevant helper functions.
226		#'
227		#' @examples
228		#' library(dplyr)
229		#' library(broom)
230		#'
231		#' adrs_f <- tern_ex_adrs %>%
232		#' filter(PARAMCD == "BESRSPI") %>%
233		#' filter(RACE %in% c("ASIAN", "WHITE", "BLACK OR AFRICAN AMERICAN")) %>%
234		#' mutate(
235		#' Response = case_when(AVALC %in% c("PR", "CR") ~ 1, TRUE ~ 0),
236		#' RACE = factor(RACE),
237		#' SEX = factor(SEX)
238		#' )
239		#' formatters::var_labels(adrs_f) <- c(formatters::var_labels(tern_ex_adrs), Response = "Response")
240		#' mod1 <- fit_logistic(
241		#' data = adrs_f,
242		#' variables = list(
243		#' response = "Response",
244		#' arm = "ARMCD",
245		#' covariates = c("AGE", "RACE")
246		#' )
247		#' )
248		#' mod2 <- fit_logistic(
249		#' data = adrs_f,
250		#' variables = list(
251		#' response = "Response",
252		#' arm = "ARMCD",
253		#' covariates = c("AGE", "RACE"),
254		#' interaction = "AGE"
255		#' )
256		#' )
257		#'
258		#' df <- tidy(mod1, conf_level = 0.99)
259		#' df2 <- tidy(mod2, conf_level = 0.99)
260		#'
261		#' @export
262		tidy.glm <- function(x, # nolint
263		conf_level = 0.95,
264		at = NULL,
265		...) {
266	5x	checkmate::assert_class(x, "glm")
267	5x	checkmate::assert_set_equal(x$family$family, "binomial")
268
269	5x	terms_name <- attr(stats::terms(x), "term.labels")
270	5x	xs_class <- attr(x$terms, "dataClasses")
271	5x	interaction <- terms_name[which(!terms_name %in% names(xs_class))]
272	5x	df <- if (length(interaction) == 0) {
273	2x	h_logistic_simple_terms(
274	2x	x = terms_name,
275	2x	fit_glm = x,
276	2x	conf_level = conf_level
277		)
278		} else {
279	3x	h_logistic_inter_terms(
280	3x	x = terms_name,
281	3x	fit_glm = x,
282	3x	conf_level = conf_level,
283	3x	at = at
284		)
285		}
286	5x	for (var in c("variable", "term", "interaction", "reference")) {
287	20x	df[[var]] <- factor(df[[var]], levels = unique(df[[var]]))
288		}
289	5x	df
290		}
291
292		#' Logistic Regression Multivariate Column Layout Function
293		#'
294		#' @description `r lifecycle::badge("stable")`
295		#'
296		#' Layout-creating function which creates a multivariate column layout summarizing logistic
297		#' regression results. This function is a wrapper for [rtables::split_cols_by_multivar()].
298		#'
299		#' @inheritParams argument_convention
300		#'
301		#' @return A layout object suitable for passing to further layouting functions. Adding this
302		#' function to an `rtable` layout will split the table into columns corresponding to
303		#' statistics `df`, `estimate`, `std_error`, `odds_ratio`, `ci`, and `pvalue`.
304		#'
305		#' @export
306		logistic_regression_cols <- function(lyt,
307		conf_level = 0.95) {
308	4x	vars <- c("df", "estimate", "std_error", "odds_ratio", "ci", "pvalue")
309	4x	var_labels <- c(
310	4x	df = "Degrees of Freedom",
311	4x	estimate = "Parameter Estimate",
312	4x	std_error = "Standard Error",
313	4x	odds_ratio = "Odds Ratio",
314	4x	ci = paste("Wald", f_conf_level(conf_level)),
315	4x	pvalue = "p-value"
316		)
317	4x	split_cols_by_multivar(
318	4x	lyt = lyt,
319	4x	vars = vars,
320	4x	varlabels = var_labels
321		)
322		}
323
324		#' Logistic Regression Summary Table Constructor Function
325		#'
326		#' @description `r lifecycle::badge("stable")`
327		#'
328		#' Constructor for content functions to be used in [`summarize_logistic()`] to summarize
329		#' logistic regression results. This function is a wrapper for [rtables::summarize_row_groups()].
330		#'
331		#' @inheritParams argument_convention
332		#' @param flag_var (`string`)\cr variable name identifying which row should be used in this
333		#' content function.
334		#'
335		#' @return A content function.
336		#'
337		#' @export
338		logistic_summary_by_flag <- function(flag_var, na_str = default_na_str(), .indent_mods = NULL) {
339	10x	checkmate::assert_string(flag_var)
340	10x	function(lyt) {
341	10x	cfun_list <- list(
342	10x	df = cfun_by_flag("df", flag_var, format = "xx.", .indent_mods = .indent_mods),
343	10x	estimate = cfun_by_flag("estimate", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
344	10x	std_error = cfun_by_flag("std_error", flag_var, format = "xx.xxx", .indent_mods = .indent_mods),
345	10x	odds_ratio = cfun_by_flag("odds_ratio", flag_var, format = ">999.99", .indent_mods = .indent_mods),
346	10x	ci = cfun_by_flag("ci", flag_var, format = format_extreme_values_ci(2L), .indent_mods = .indent_mods),
347	10x	pvalue = cfun_by_flag("pvalue", flag_var, format = "x.xxxx \| (<0.0001)", .indent_mods = .indent_mods)
348		)
349	10x	summarize_row_groups(
350	10x	lyt = lyt,
351	10x	cfun = cfun_list,
352	10x	na_str = na_str
353		)
354		}
355		}

1		#' Combination Functions Class
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' `CombinationFunction` is an S4 class which extends standard functions. These are special functions that
6		#' can be combined and negated with the logical operators.
7		#'
8		#' @param e1 (`CombinationFunction`)\cr left hand side of logical operator.
9		#' @param e2 (`CombinationFunction`)\cr right hand side of logical operator.
10		#' @param x (`CombinationFunction`)\cr the function which should be negated.
11		#'
12		#' @return Returns a logical value indicating whether the left hand side of the equation equals the right hand side.
13		#'
14		#' @exportClass CombinationFunction
15		#' @export CombinationFunction
16		#'
17		#' @examples
18		#' higher <- function(a) {
19		#' force(a)
20		#' CombinationFunction(
21		#' function(x) {
22		#' x > a
23		#' }
24		#' )
25		#' }
26		#'
27		#' lower <- function(b) {
28		#' force(b)
29		#' CombinationFunction(
30		#' function(x) {
31		#' x < b
32		#' }
33		#' )
34		#' }
35		#'
36		#' c1 <- higher(5)
37		#' c2 <- lower(10)
38		#' c3 <- higher(5) & lower(10)
39		#' c3(7)
40		#'
41		#' @aliases CombinationFunction-class
42		#' @name combination_function
43		CombinationFunction <- methods::setClass("CombinationFunction", contains = "function") # nolint
44
45		#' @describeIn combination_function Logical "AND" combination of `CombinationFunction` functions.
46		#' The resulting object is of the same class, and evaluates the two argument functions. The result
47		#' is then the "AND" of the two individual results.
48		#'
49		#' @export
50		methods::setMethod(
51		"&",
52		signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
53		definition = function(e1, e2) {
54	4x	CombinationFunction(function(...) {
55	490x	e1(...) && e2(...)
56		})
57		}
58		)
59
60		#' @describeIn combination_function Logical "OR" combination of `CombinationFunction` functions.
61		#' The resulting object is of the same class, and evaluates the two argument functions. The result
62		#' is then the "OR" of the two individual results.
63		#'
64		#' @export
65		methods::setMethod(
66		"\|",
67		signature = c(e1 = "CombinationFunction", e2 = "CombinationFunction"),
68		definition = function(e1, e2) {
69	2x	CombinationFunction(function(...) {
70	4x	e1(...) \|\| e2(...)
71		})
72		}
73		)
74
75		#' @describeIn combination_function Logical negation of `CombinationFunction` functions.
76		#' The resulting object is of the same class, and evaluates the original function. The result
77		#' is then the opposite of this results.
78		#'
79		#' @export
80		methods::setMethod(
81		"!",
82		signature = c(x = "CombinationFunction"),
83		definition = function(x) {
84	2x	CombinationFunction(function(...) {
85	305x	!x(...)
86		})
87		}
88		)

1		#' Helper Function to create a map dataframe that can be used in `trim_levels_to_map` split function.
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper Function to create a map dataframe from the input dataset, which can be used as an argument in the
6		#' `trim_levels_to_map` split function. Based on different method, the map is constructed differently.
7		#'
8		#' @inheritParams argument_convention
9		#' @param abnormal (named `list`)\cr identifying the abnormal range level(s) in `df`. Based on the levels of
10		#' abnormality of the input dataset, it can be something like `list(Low = "LOW LOW", High = "HIGH HIGH")` or
11		#' `abnormal = list(Low = "LOW", High = "HIGH"))`
12		#' @param method (`string`)\cr indicates how the returned map will be constructed. Can be `"default"` or `"range"`.
13		#'
14		#' @return A map `data.frame`.
15		#'
16		#' @note If method is `"default"`, the returned map will only have the abnormal directions that are observed in the
17		#' `df`, and records with all normal values will be excluded to avoid error in creating layout. If method is
18		#' `"range"`, the returned map will be based on the rule that at least one observation with low range > 0
19		#' for low direction and at least one observation with high range is not missing for high direction.
20		#'
21		#' @examples
22		#' adlb <- df_explicit_na(tern_ex_adlb)
23		#'
24		#' h_map_for_count_abnormal(
25		#' df = adlb,
26		#' variables = list(anl = "ANRIND", split_rows = c("LBCAT", "PARAM")),
27		#' abnormal = list(low = c("LOW"), high = c("HIGH")),
28		#' method = "default",
29		#' na_str = "<Missing>"
30		#' )
31		#'
32		#' df <- data.frame(
33		#' USUBJID = c(rep("1", 4), rep("2", 4), rep("3", 4)),
34		#' AVISIT = c(
35		#' rep("WEEK 1", 2),
36		#' rep("WEEK 2", 2),
37		#' rep("WEEK 1", 2),
38		#' rep("WEEK 2", 2),
39		#' rep("WEEK 1", 2),
40		#' rep("WEEK 2", 2)
41		#' ),
42		#' PARAM = rep(c("ALT", "CPR"), 6),
43		#' ANRIND = c(
44		#' "NORMAL", "NORMAL", "LOW",
45		#' "HIGH", "LOW", "LOW", "HIGH", "HIGH", rep("NORMAL", 4)
46		#' ),
47		#' ANRLO = rep(5, 12),
48		#' ANRHI = rep(20, 12)
49		#' )
50		#' df$ANRIND <- factor(df$ANRIND, levels = c("LOW", "HIGH", "NORMAL"))
51		#' h_map_for_count_abnormal(
52		#' df = df,
53		#' variables = list(
54		#' anl = "ANRIND",
55		#' split_rows = c("PARAM"),
56		#' range_low = "ANRLO",
57		#' range_high = "ANRHI"
58		#' ),
59		#' abnormal = list(low = c("LOW"), high = c("HIGH")),
60		#' method = "range",
61		#' na_str = "<Missing>"
62		#' )
63		#'
64		#' @export
65		h_map_for_count_abnormal <- function(df,
66		variables = list(
67		anl = "ANRIND",
68		split_rows = c("PARAM"),
69		range_low = "ANRLO",
70		range_high = "ANRHI"
71		),
72		abnormal = list(low = c("LOW", "LOW LOW"), high = c("HIGH", "HIGH HIGH")),
73		method = c("default", "range"),
74		na_level = lifecycle::deprecated(),
75		na_str = "<Missing>") {
76	7x	if (lifecycle::is_present(na_level)) {
77	!	lifecycle::deprecate_warn("0.9.1", "h_map_for_count_abnormal(na_level)", "h_map_for_count_abnormal(na_str)")
78	!	na_str <- na_level
79		}
80
81	7x	method <- match.arg(method)
82	7x	checkmate::assert_subset(c("anl", "split_rows"), names(variables))
83	7x	checkmate::assert_false(anyNA(df[variables$split_rows]))
84	7x	assert_df_with_variables(df,
85	7x	variables = list(anl = variables$anl, split_rows = variables$split_rows),
86	7x	na_level = na_str
87		)
88	7x	assert_df_with_factors(df, list(val = variables$anl))
89	7x	assert_valid_factor(df[[variables$anl]], any.missing = FALSE)
90	7x	assert_list_of_variables(variables)
91	7x	checkmate::assert_list(abnormal, types = "character", len = 2)
92
93		# Drop usued levels from df as they are not supposed to be in the final map
94	7x	df <- droplevels(df)
95
96	7x	normal_value <- setdiff(levels(df[[variables$anl]]), unlist(abnormal))
97
98		# Based on the understanding of clinical data, there should only be one level of normal which is "NORMAL"
99	7x	checkmate::assert_vector(normal_value, len = 1)
100
101		# Default method will only have what is observed in the df, and records with all normal values will be excluded to
102		# avoid error in layout building.
103	7x	if (method == "default") {
104	3x	df_abnormal <- subset(df, df[[variables$anl]] %in% unlist(abnormal))
105	3x	map <- unique(df_abnormal[c(variables$split_rows, variables$anl)])
106	3x	map_normal <- unique(subset(map, select = variables$split_rows))
107	3x	map_normal[[variables$anl]] <- normal_value
108	3x	map <- rbind(map, map_normal)
109	4x	} else if (method == "range") {
110		# range method follows the rule that at least one observation with ANRLO > 0 for low
111		# direction and at least one observation with ANRHI is not missing for high direction.
112	4x	checkmate::assert_subset(c("range_low", "range_high"), names(variables))
113	4x	checkmate::assert_subset(c("LOW", "HIGH"), toupper(names(abnormal)))
114
115	4x	assert_df_with_variables(df,
116	4x	variables = list(
117	4x	range_low = variables$range_low,
118	4x	range_high = variables$range_high
119		)
120		)
121
122		# Define low direction of map
123	4x	df_low <- subset(df, df[[variables$range_low]] > 0)
124	4x	map_low <- unique(df_low[variables$split_rows])
125	4x	low_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "LOW"]))
126	4x	low_levels_df <- as.data.frame(low_levels)
127	4x	colnames(low_levels_df) <- variables$anl
128	4x	low_levels_df <- do.call("rbind", replicate(nrow(map_low), low_levels_df, simplify = FALSE))
129	4x	rownames(map_low) <- NULL # Just to avoid strange row index in case upstream functions changed
130	4x	map_low <- map_low[rep(seq_len(nrow(map_low)), each = length(low_levels)), , drop = FALSE]
131	4x	map_low <- cbind(map_low, low_levels_df)
132
133		# Define high direction of map
134	4x	df_high <- subset(df, df[[variables$range_high]] != na_str \| !is.na(df[[variables$range_high]]))
135	4x	map_high <- unique(df_high[variables$split_rows])
136	4x	high_levels <- unname(unlist(abnormal[toupper(names(abnormal)) == "HIGH"]))
137	4x	high_levels_df <- as.data.frame(high_levels)
138	4x	colnames(high_levels_df) <- variables$anl
139	4x	high_levels_df <- do.call("rbind", replicate(nrow(map_high), high_levels_df, simplify = FALSE))
140	4x	rownames(map_high) <- NULL
141	4x	map_high <- map_high[rep(seq_len(nrow(map_high)), each = length(high_levels)), , drop = FALSE]
142	4x	map_high <- cbind(map_high, high_levels_df)
143
144		# Define normal of map
145	4x	map_normal <- unique(rbind(map_low, map_high)[variables$split_rows])
146	4x	map_normal[variables$anl] <- normal_value
147
148	4x	map <- rbind(map_low, map_high, map_normal)
149		}
150
151		# map should be all characters
152	7x	map <- data.frame(lapply(map, as.character), stringsAsFactors = FALSE)
153
154		# sort the map final output by split_rows variables
155	7x	for (i in rev(seq_len(length(variables$split_rows)))) {
156	7x	map <- map[order(map[[i]]), ]
157		}
158	7x	map
159		}

1		#' Number of Patients
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Count the number of unique and non-unique patients in a column (variable).
6		#'
7		#' @inheritParams argument_convention
8		#' @param count_by (`vector`)\cr optional vector of any type to be combined with `x` when counting `nonunique`
9		#' records.
10		#' @param unique_count_suffix (`logical`)\cr should `"(n)"` suffix be added to `unique_count` labels.
11		#' Defaults to `TRUE`.
12		#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("summarize_num_patients")`
13		#' to see available statistics for this function.
14		#'
15		#' @name summarize_num_patients
16		#' @order 1
17		NULL
18
19		#' @describeIn summarize_num_patients Statistics function which counts the number of
20		#' unique patients, the corresponding percentage taken with respect to the
21		#' total number of patients, and the number of non-unique patients.
22		#'
23		#' @param x (`character` or `factor`)\cr vector of patient IDs.
24		#'
25		#' @return
26		#' * `s_num_patients()` returns a named `list` of 3 statistics:
27		#' * `unique`: Vector of counts and percentages.
28		#' * `nonunique`: Vector of counts.
29		#' * `unique_count`: Counts.
30		#'
31		#' @examples
32		#' # Use the statistics function to count number of unique and nonunique patients.
33		#' s_num_patients(x = as.character(c(1, 1, 1, 2, 4, NA)), labelstr = "", .N_col = 6L)
34		#' s_num_patients(
35		#' x = as.character(c(1, 1, 1, 2, 4, NA)),
36		#' labelstr = "",
37		#' .N_col = 6L,
38		#' count_by = c(1, 1, 2, 1, 1, 1)
39		#' )
40		#'
41		#' @export
42		s_num_patients <- function(x, labelstr, .N_col, count_by = NULL, unique_count_suffix = TRUE) { # nolint
43
44	109x	checkmate::assert_string(labelstr)
45	109x	checkmate::assert_count(.N_col)
46	109x	checkmate::assert_multi_class(x, classes = c("factor", "character"))
47	109x	checkmate::assert_flag(unique_count_suffix)
48
49	109x	count1 <- n_available(unique(x))
50	109x	count2 <- n_available(x)
51
52	109x	if (!is.null(count_by)) {
53	10x	checkmate::assert_vector(count_by, len = length(x))
54	10x	count2 <- n_available(unique(interaction(x, count_by)))
55		}
56
57	109x	out <- list(
58	109x	unique = formatters::with_label(c(count1, ifelse(count1 == 0 && .N_col == 0, 0, count1 / .N_col)), labelstr),
59	109x	nonunique = formatters::with_label(count2, labelstr),
60	109x	unique_count = formatters::with_label(
61	109x	count1, ifelse(unique_count_suffix, paste0(labelstr, if (nzchar(labelstr)) " ", "(n)"), labelstr)
62		)
63		)
64
65	109x	out
66		}
67
68		#' @describeIn summarize_num_patients Statistics function which counts the number of unique patients
69		#' in a column (variable), the corresponding percentage taken with respect to the total number of
70		#' patients, and the number of non-unique patients in the column.
71		#'
72		#' @param required (`character` or `NULL`)\cr optional name of a variable that is required to be non-missing.
73		#'
74		#' @return
75		#' * `s_num_patients_content()` returns the same values as `s_num_patients()`.
76		#'
77		#' @examples
78		#' # Count number of unique and non-unique patients.
79		#'
80		#' df <- data.frame(
81		#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
82		#' EVENT = as.character(c(10, 15, 10, 17, 8))
83		#' )
84		#' s_num_patients_content(df, .N_col = 5, .var = "USUBJID")
85		#'
86		#' df_by_event <- data.frame(
87		#' USUBJID = as.character(c(1, 2, 1, 4, NA)),
88		#' EVENT = c(10, 15, 10, 17, 8)
89		#' )
90		#' s_num_patients_content(df_by_event, .N_col = 5, .var = "USUBJID", count_by = "EVENT")
91		#'
92		#' @export
93		s_num_patients_content <- function(df,
94		labelstr = "",
95		.N_col, # nolint
96		.var,
97		required = NULL,
98		count_by = NULL,
99		unique_count_suffix = TRUE) {
100	46x	checkmate::assert_string(.var)
101	46x	checkmate::assert_data_frame(df)
102	46x	if (is.null(count_by)) {
103	43x	assert_df_with_variables(df, list(id = .var))
104		} else {
105	3x	assert_df_with_variables(df, list(id = .var, count_by = count_by))
106		}
107	46x	if (!is.null(required)) {
108	!	checkmate::assert_string(required)
109	!	assert_df_with_variables(df, list(required = required))
110	!	df <- df[!is.na(df[[required]]), , drop = FALSE]
111		}
112
113	46x	x <- df[[.var]]
114	46x	y <- if (is.null(count_by)) NULL else df[[count_by]]
115
116	46x	s_num_patients(
117	46x	x = x,
118	46x	labelstr = labelstr,
119	46x	.N_col = .N_col,
120	46x	count_by = y,
121	46x	unique_count_suffix = unique_count_suffix
122		)
123		}
124
125		c_num_patients <- make_afun(
126		s_num_patients_content,
127		.stats = c("unique", "nonunique", "unique_count"),
128		.formats = c(unique = format_count_fraction_fixed_dp, nonunique = "xx", unique_count = "xx")
129		)
130
131		#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
132		#' and additional format arguments. This function is a wrapper for [rtables::summarize_row_groups()].
133		#'
134		#' @return
135		#' * `summarize_num_patients()` returns a layout object suitable for passing to further layouting functions,
136		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
137		#' the statistics from `s_num_patients_content()` to the table layout.
138		#'
139		#' @export
140		#' @order 3
141		summarize_num_patients <- function(lyt,
142		var,
143		required = NULL,
144		count_by = NULL,
145		unique_count_suffix = TRUE,
146		na_str = default_na_str(),
147		.stats = NULL,
148		.formats = NULL,
149		.labels = c(
150		unique = "Number of patients with at least one event",
151		nonunique = "Number of events"
152		),
153		indent_mod = lifecycle::deprecated(),
154		.indent_mods = 0L,
155		riskdiff = FALSE,
156		...) {
157	9x	checkmate::assert_flag(riskdiff)
158
159	9x	if (lifecycle::is_present(indent_mod)) {
160	!	lifecycle::deprecate_warn("0.8.2", "summarize_num_patients(indent_mod)", "summarize_num_patients(.indent_mods)")
161	!	.indent_mods <- indent_mod
162		}
163
164	4x	if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
165	2x	if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]
166
167	9x	s_args <- list(required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, ...)
168
169	9x	cfun <- make_afun(
170	9x	c_num_patients,
171	9x	.stats = .stats,
172	9x	.formats = .formats,
173	9x	.labels = .labels
174		)
175
176	9x	extra_args <- if (isFALSE(riskdiff)) {
177	8x	s_args
178		} else {
179	1x	list(
180	1x	afun = list("s_num_patients_content" = cfun),
181	1x	.stats = .stats,
182	1x	.indent_mods = .indent_mods,
183	1x	s_args = s_args
184		)
185		}
186
187	9x	summarize_row_groups(
188	9x	lyt = lyt,
189	9x	var = var,
190	9x	cfun = ifelse(isFALSE(riskdiff), cfun, afun_riskdiff),
191	9x	na_str = na_str,
192	9x	extra_args = extra_args,
193	9x	indent_mod = .indent_mods
194		)
195		}
196
197		#' @describeIn summarize_num_patients Layout-creating function which can take statistics function arguments
198		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
199		#'
200		#' @return
201		#' * `analyze_num_patients()` returns a layout object suitable for passing to further layouting functions,
202		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
203		#' the statistics from `s_num_patients_content()` to the table layout.
204		#'
205		#' @details In general, functions that starts with `analyze*` are expected to
206		#' work like [rtables::analyze()], while functions that starts with `summarize*`
207		#' are based upon [rtables::summarize_row_groups()]. The latter provides a
208		#' value for each dividing split in the row and column space, but, being it
209		#' bound to the fundamental splits, it is repeated by design in every page
210		#' when pagination is involved.
211		#'
212		#' @note As opposed to [summarize_num_patients()], this function does not repeat the produced rows.
213		#'
214		#' @examples
215		#' df <- data.frame(
216		#' USUBJID = as.character(c(1, 2, 1, 4, NA, 6, 6, 8, 9)),
217		#' ARM = c("A", "A", "A", "A", "A", "B", "B", "B", "B"),
218		#' AGE = c(10, 15, 10, 17, 8, 11, 11, 19, 17)
219		#' )
220		#'
221		#' tbl <- basic_table() %>%
222		#' split_cols_by("ARM") %>%
223		#' add_colcounts() %>%
224		#' analyze_num_patients("USUBJID", .stats = c("unique")) %>%
225		#' build_table(df)
226		#'
227		#' tbl
228		#'
229		#' @export
230		#' @order 2
231		analyze_num_patients <- function(lyt,
232		vars,
233		required = NULL,
234		count_by = NULL,
235		unique_count_suffix = TRUE,
236		na_str = default_na_str(),
237		nested = TRUE,
238		.stats = NULL,
239		.formats = NULL,
240		.labels = c(
241		unique = "Number of patients with at least one event",
242		nonunique = "Number of events"
243		),
244		show_labels = c("default", "visible", "hidden"),
245		indent_mod = lifecycle::deprecated(),
246		.indent_mods = 0L,
247		riskdiff = FALSE,
248		...) {
249	3x	checkmate::assert_flag(riskdiff)
250
251	3x	if (lifecycle::is_present(indent_mod)) {
252	!	lifecycle::deprecate_warn("0.8.2", "analyze_num_patients(indent_mod)", "analyze_num_patients(.indent_mods)")
253	!	.indent_mods <- indent_mod
254		}
255
256	!	if (is.null(.stats)) .stats <- c("unique", "nonunique", "unique_count")
257	!	if (length(.labels) > length(.stats)) .labels <- .labels[names(.labels) %in% .stats]
258
259	3x	s_args <- list(required = required, count_by = count_by, unique_count_suffix = unique_count_suffix, ...)
260
261	3x	afun <- make_afun(
262	3x	c_num_patients,
263	3x	.stats = .stats,
264	3x	.formats = .formats,
265	3x	.labels = .labels
266		)
267
268	3x	extra_args <- if (isFALSE(riskdiff)) {
269	2x	s_args
270		} else {
271	1x	list(
272	1x	afun = list("s_num_patients_content" = afun),
273	1x	.stats = .stats,
274	1x	.indent_mods = .indent_mods,
275	1x	s_args = s_args
276		)
277		}
278
279	3x	analyze(
280	3x	afun = ifelse(isFALSE(riskdiff), afun, afun_riskdiff),
281	3x	lyt = lyt,
282	3x	vars = vars,
283	3x	na_str = na_str,
284	3x	nested = nested,
285	3x	extra_args = extra_args,
286	3x	show_labels = show_labels,
287	3x	indent_mod = .indent_mods
288		)
289		}

1		#' Create a STEP Graph
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Based on the STEP results, creates a `ggplot` graph showing the estimated HR or OR
6		#' along the continuous biomarker value subgroups.
7		#'
8		#' @param df (`tibble`)\cr result of [tidy.step()].
9		#' @param use_percentile (`flag`)\cr whether to use percentiles for the x axis or actual
10		#' biomarker values.
11		#' @param est (named `list`)\cr `col` and `lty` settings for estimate line.
12		#' @param ci_ribbon (named `list` or `NULL`)\cr `fill` and `alpha` settings for the confidence interval
13		#' ribbon area, or `NULL` to not plot a CI ribbon.
14		#' @param col (`character`)\cr colors.
15		#'
16		#' @return A `ggplot` STEP graph.
17		#'
18		#' @seealso Custom tidy method [tidy.step()].
19		#'
20		#' @examples
21		#' library(nestcolor)
22		#' library(survival)
23		#' lung$sex <- factor(lung$sex)
24		#'
25		#' # Survival example.
26		#' vars <- list(
27		#' time = "time",
28		#' event = "status",
29		#' arm = "sex",
30		#' biomarker = "age"
31		#' )
32		#'
33		#' step_matrix <- fit_survival_step(
34		#' variables = vars,
35		#' data = lung,
36		#' control = c(control_coxph(), control_step(num_points = 10, degree = 2))
37		#' )
38		#' step_data <- broom::tidy(step_matrix)
39		#'
40		#' # Default plot.
41		#' g_step(step_data)
42		#'
43		#' # Add the reference 1 horizontal line.
44		#' library(ggplot2)
45		#' g_step(step_data) +
46		#' ggplot2::geom_hline(ggplot2::aes(yintercept = 1), linetype = 2)
47		#'
48		#' # Use actual values instead of percentiles, different color for estimate and no CI,
49		#' # use log scale for y axis.
50		#' g_step(
51		#' step_data,
52		#' use_percentile = FALSE,
53		#' est = list(col = "blue", lty = 1),
54		#' ci_ribbon = NULL
55		#' ) + scale_y_log10()
56		#'
57		#' # Adding another curve based on additional column.
58		#' step_data$extra <- exp(step_data$`Percentile Center`)
59		#' g_step(step_data) +
60		#' ggplot2::geom_line(ggplot2::aes(y = extra), linetype = 2, color = "green")
61		#'
62		#' # Response example.
63		#' vars <- list(
64		#' response = "status",
65		#' arm = "sex",
66		#' biomarker = "age"
67		#' )
68		#'
69		#' step_matrix <- fit_rsp_step(
70		#' variables = vars,
71		#' data = lung,
72		#' control = c(
73		#' control_logistic(response_definition = "I(response == 2)"),
74		#' control_step()
75		#' )
76		#' )
77		#' step_data <- broom::tidy(step_matrix)
78		#' g_step(step_data)
79		#'
80		#' @export
81		g_step <- function(df,
82		use_percentile = "Percentile Center" %in% names(df),
83		est = list(col = "blue", lty = 1),
84		ci_ribbon = list(fill = getOption("ggplot2.discrete.colour")[1], alpha = 0.5),
85		col = getOption("ggplot2.discrete.colour")) {
86	2x	checkmate::assert_tibble(df)
87	2x	checkmate::assert_flag(use_percentile)
88	2x	checkmate::assert_character(col, null.ok = TRUE)
89	2x	checkmate::assert_list(est, names = "named")
90	2x	checkmate::assert_list(ci_ribbon, names = "named", null.ok = TRUE)
91
92	2x	x_var <- ifelse(use_percentile, "Percentile Center", "Interval Center")
93	2x	df$x <- df[[x_var]]
94	2x	attrs <- attributes(df)
95	2x	df$y <- df[[attrs$estimate]]
96
97		# Set legend names. To be modified also at call level
98	2x	legend_names <- c("Estimate", "CI 95%")
99
100	2x	p <- ggplot2::ggplot(df, ggplot2::aes(x = .data[["x"]], y = .data[["y"]]))
101
102	2x	if (!is.null(col)) {
103	2x	p <- p +
104	2x	ggplot2::scale_color_manual(values = col)
105		}
106
107	2x	if (!is.null(ci_ribbon)) {
108	1x	if (is.null(ci_ribbon$fill)) {
109	!	ci_ribbon$fill <- "lightblue"
110		}
111	1x	p <- p + ggplot2::geom_ribbon(
112	1x	ggplot2::aes(
113	1x	ymin = .data[["ci_lower"]], ymax = .data[["ci_upper"]],
114	1x	fill = legend_names[2]
115		),
116	1x	alpha = ci_ribbon$alpha
117		) +
118	1x	scale_fill_manual(
119	1x	name = "", values = c("CI 95%" = ci_ribbon$fill)
120		)
121		}
122	2x	suppressMessages(p <- p +
123	2x	ggplot2::geom_line(
124	2x	ggplot2::aes(y = .data[["y"]], color = legend_names[1]),
125	2x	linetype = est$lty
126		) +
127	2x	scale_colour_manual(
128	2x	name = "", values = c("Estimate" = "blue")
129		))
130
131	2x	p <- p + ggplot2::labs(x = attrs$biomarker, y = attrs$estimate)
132	2x	if (use_percentile) {
133	1x	p <- p + ggplot2::scale_x_continuous(labels = scales::percent)
134		}
135	2x	p
136		}
137
138		#' Custom Tidy Method for STEP Results
139		#'
140		#' @description `r lifecycle::badge("stable")`
141		#'
142		#' Tidy the STEP results into a `tibble` format ready for plotting.
143		#'
144		#' @param x (`step` matrix)\cr results from [fit_survival_step()].
145		#' @param ... not used here.
146		#'
147		#' @return A `tibble` with one row per STEP subgroup. The estimates and CIs are on the HR or OR scale,
148		#' respectively. Additional attributes carry metadata also used for plotting.
149		#'
150		#' @seealso [g_step()] which consumes the result from this function.
151		#'
152		#' @method tidy step
153		#'
154		#' @examples
155		#' library(survival)
156		#' lung$sex <- factor(lung$sex)
157		#' vars <- list(
158		#' time = "time",
159		#' event = "status",
160		#' arm = "sex",
161		#' biomarker = "age"
162		#' )
163		#' step_matrix <- fit_survival_step(
164		#' variables = vars,
165		#' data = lung,
166		#' control = c(control_coxph(), control_step(num_points = 10, degree = 2))
167		#' )
168		#' broom::tidy(step_matrix)
169		#'
170		#' @export
171		tidy.step <- function(x, ...) { # nolint
172	7x	checkmate::assert_class(x, "step")
173	7x	dat <- as.data.frame(x)
174	7x	nams <- names(dat)
175	7x	is_surv <- "loghr" %in% names(dat)
176	7x	est_var <- ifelse(is_surv, "loghr", "logor")
177	7x	new_est_var <- ifelse(is_surv, "Hazard Ratio", "Odds Ratio")
178	7x	new_y_vars <- c(new_est_var, c("ci_lower", "ci_upper"))
179	7x	names(dat)[match(est_var, nams)] <- new_est_var
180	7x	dat[, new_y_vars] <- exp(dat[, new_y_vars])
181	7x	any_is_na <- any(is.na(dat[, new_y_vars]))
182	7x	any_is_very_large <- any(abs(dat[, new_y_vars]) > 1e10, na.rm = TRUE)
183	7x	if (any_is_na) {
184	2x	warning(paste(
185	2x	"Missing values in the point estimate or CI columns,",
186	2x	"this will lead to holes in the `g_step()` plot"
187		))
188		}
189	7x	if (any_is_very_large) {
190	2x	warning(paste(
191	2x	"Very large absolute values in the point estimate or CI columns,",
192	2x	"consider adding `scale_y_log10()` to the `g_step()` result for plotting"
193		))
194		}
195	7x	if (any_is_na \|\| any_is_very_large) {
196	4x	warning("Consider using larger `bandwidth`, less `num_points` in `control_step()` settings for fitting")
197		}
198	7x	structure(
199	7x	tibble::as_tibble(dat),
200	7x	estimate = new_est_var,
201	7x	biomarker = attr(x, "variables")$biomarker,
202	7x	ci = f_conf_level(attr(x, "control")$conf_level)
203		)
204		}

1		#' Encode Categorical Missing Values in a Data Frame
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' This is a helper function to encode missing entries across groups of categorical
6		#' variables in a data frame.
7		#'
8		#' @details Missing entries are those with `NA` or empty strings and will
9		#' be replaced with a specified value. If factor variables include missing
10		#' values, the missing value will be inserted as the last level.
11		#' Similarly, in case character or logical variables should be converted to factors
12		#' with the `char_as_factor` or `logical_as_factor` options, the missing values will
13		#' be set as the last level.
14		#'
15		#' @param data (`data.frame`)\cr data set.
16		#' @param omit_columns (`character`)\cr names of variables from `data` that should
17		#' not be modified by this function.
18		#' @param char_as_factor (`flag`)\cr whether to convert character variables
19		#' in `data` to factors.
20		#' @param logical_as_factor (`flag`)\cr whether to convert logical variables
21		#' in `data` to factors.
22		#' @param na_level (`string`)\cr used to replace all `NA` or empty
23		#' values inside non-`omit_columns` columns.
24		#'
25		#' @return A `data.frame` with the chosen modifications applied.
26		#'
27		#' @seealso [sas_na()] and [explicit_na()] for other missing data helper functions.
28		#'
29		#' @examples
30		#' my_data <- data.frame(
31		#' u = c(TRUE, FALSE, NA, TRUE),
32		#' v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
33		#' w = c("A", "B", NA, "C"),
34		#' x = c("D", "E", "F", NA),
35		#' y = c("G", "H", "I", ""),
36		#' z = c(1, 2, 3, 4),
37		#' stringsAsFactors = FALSE
38		#' )
39		#'
40		#' # Example 1
41		#' # Encode missing values in all character or factor columns.
42		#' df_explicit_na(my_data)
43		#' # Also convert logical columns to factor columns.
44		#' df_explicit_na(my_data, logical_as_factor = TRUE)
45		#' # Encode missing values in a subset of columns.
46		#' df_explicit_na(my_data, omit_columns = c("x", "y"))
47		#'
48		#' # Example 2
49		#' # Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
50		#' # After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
51		#' # included when generating `rtables`.
52		#' adsl <- tern_ex_adsl
53		#' adsl$SEX[adsl$SEX == "M"] <- NA
54		#' adsl <- df_explicit_na(adsl)
55		#'
56		#' # If you want the `Na` values to be displayed in the table use the `na_level` argument.
57		#' adsl <- tern_ex_adsl
58		#' adsl$SEX[adsl$SEX == "M"] <- NA
59		#' adsl <- df_explicit_na(adsl, na_level = "Missing Values")
60		#'
61		#' # Example 3
62		#' # Numeric variables that have missing values are not altered. This means that any `NA` value in
63		#' # a numeric variable will not be included in the summary statistics, nor will they be included
64		#' # in the denominator value for calculating the percent values.
65		#' adsl <- tern_ex_adsl
66		#' adsl$AGE[adsl$AGE < 30] <- NA
67		#' adsl <- df_explicit_na(adsl)
68		#'
69		#' @export
70		df_explicit_na <- function(data,
71		omit_columns = NULL,
72		char_as_factor = TRUE,
73		logical_as_factor = FALSE,
74		na_level = "<Missing>") {
75	22x	checkmate::assert_character(omit_columns, null.ok = TRUE, min.len = 1, any.missing = FALSE)
76	21x	checkmate::assert_data_frame(data)
77	20x	checkmate::assert_flag(char_as_factor)
78	19x	checkmate::assert_flag(logical_as_factor)
79	19x	checkmate::assert_string(na_level)
80
81	17x	target_vars <- if (is.null(omit_columns)) {
82	15x	names(data)
83		} else {
84	2x	setdiff(names(data), omit_columns) # May have duplicates.
85		}
86	17x	if (length(target_vars) == 0) {
87	1x	return(data)
88		}
89
90	16x	l_target_vars <- split(target_vars, target_vars)
91
92		# Makes sure target_vars exist in data and names are not duplicated.
93	16x	assert_df_with_variables(data, l_target_vars)
94
95	16x	for (x in target_vars) {
96	304x	xi <- data[[x]]
97	304x	xi_label <- obj_label(xi)
98
99		# Determine whether to convert character or logical input.
100	304x	do_char_conversion <- is.character(xi) && char_as_factor
101	304x	do_logical_conversion <- is.logical(xi) && logical_as_factor
102
103		# Pre-convert logical to character to deal correctly with replacing NA
104		# values below.
105	304x	if (do_logical_conversion) {
106	2x	xi <- as.character(xi)
107		}
108
109	304x	if (is.factor(xi) \|\| is.character(xi)) {
110		# Handle empty strings and NA values.
111	217x	xi <- explicit_na(sas_na(xi), label = na_level)
112
113		# Convert to factors if requested for the original type,
114		# set na_level as the last value.
115	217x	if (do_char_conversion \|\| do_logical_conversion) {
116	78x	levels_xi <- setdiff(sort(unique(xi)), na_level)
117	78x	if (na_level %in% unique(xi)) {
118	18x	levels_xi <- c(levels_xi, na_level)
119		}
120
121	78x	xi <- factor(xi, levels = levels_xi)
122		}
123
124	217x	data[, x] <- formatters::with_label(xi, label = xi_label)
125		}
126		}
127	16x	return(data)
128		}

1		#' Helper Functions for Subgroup Treatment Effect Pattern (STEP) Calculations
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Helper functions that are used internally for the STEP calculations.
6		#'
7		#' @inheritParams argument_convention
8		#'
9		#' @name h_step
10		#' @include control_step.R
11		NULL
12
13		#' @describeIn h_step creates the windows for STEP, based on the control settings
14		#' provided.
15		#'
16		#' @param x (`numeric`)\cr biomarker value(s) to use (without `NA`).
17		#' @param control (named `list`)\cr output from `control_step()`.
18		#'
19		#' @return
20		#' * `h_step_window()` returns a list containing the window-selection matrix `sel`
21		#' and the interval information matrix `interval`.
22		#'
23		#' @export
24		h_step_window <- function(x,
25		control = control_step()) {
26	12x	checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
27	12x	checkmate::assert_list(control, names = "named")
28
29	12x	sel <- matrix(FALSE, length(x), control$num_points)
30	12x	out <- matrix(0, control$num_points, 3)
31	12x	colnames(out) <- paste("Interval", c("Center", "Lower", "Upper"))
32	12x	if (control$use_percentile) {
33		# Create windows according to percentile cutoffs.
34	9x	out <- cbind(out, out)
35	9x	colnames(out)[1:3] <- paste("Percentile", c("Center", "Lower", "Upper"))
36	9x	xs <- seq(0, 1, length = control$num_points + 2)[-1]
37	9x	for (i in seq_len(control$num_points)) {
38	185x	out[i, 2:3] <- c(
39	185x	max(xs[i] - control$bandwidth, 0),
40	185x	min(xs[i] + control$bandwidth, 1)
41		)
42	185x	out[i, 5:6] <- stats::quantile(x, out[i, 2:3])
43	185x	sel[, i] <- x >= out[i, 5] & x <= out[i, 6]
44		}
45		# Center is the middle point of the percentile window.
46	9x	out[, 1] <- xs[-control$num_points - 1]
47	9x	out[, 4] <- stats::quantile(x, out[, 1])
48		} else {
49		# Create windows according to cutoffs.
50	3x	m <- c(min(x), max(x))
51	3x	xs <- seq(m[1], m[2], length = control$num_points + 2)[-1]
52	3x	for (i in seq_len(control$num_points)) {
53	11x	out[i, 2:3] <- c(
54	11x	max(xs[i] - control$bandwidth, m[1]),
55	11x	min(xs[i] + control$bandwidth, m[2])
56		)
57	11x	sel[, i] <- x >= out[i, 2] & x <= out[i, 3]
58		}
59		# Center is the same as the point for predicting.
60	3x	out[, 1] <- xs[-control$num_points - 1]
61		}
62	12x	list(sel = sel, interval = out)
63		}
64
65		#' @describeIn h_step calculates the estimated treatment effect estimate
66		#' on the linear predictor scale and corresponding standard error from a STEP `model` fitted
67		#' on `data` given `variables` specification, for a single biomarker value `x`.
68		#' This works for both `coxph` and `glm` models, i.e. for calculating log hazard ratio or log odds
69		#' ratio estimates.
70		#'
71		#' @param model the regression model object.
72		#'
73		#' @return
74		#' * `h_step_trt_effect()` returns a vector with elements `est` and `se`.
75		#'
76		#' @export
77		h_step_trt_effect <- function(data,
78		model,
79		variables,
80		x) {
81	208x	checkmate::assert_multi_class(model, c("coxph", "glm"))
82	208x	checkmate::assert_number(x)
83	208x	assert_df_with_variables(data, variables)
84	208x	checkmate::assert_factor(data[[variables$arm]], n.levels = 2)
85
86	208x	newdata <- data[c(1, 1), ]
87	208x	newdata[, variables$biomarker] <- x
88	208x	newdata[, variables$arm] <- levels(data[[variables$arm]])
89	208x	model_terms <- stats::delete.response(stats::terms(model))
90	208x	model_frame <- stats::model.frame(model_terms, data = newdata, xlev = model$xlevels)
91	208x	mat <- stats::model.matrix(model_terms, data = model_frame, contrasts.arg = model$contrasts)
92	208x	coefs <- stats::coef(model)
93		# Note: It is important to use the coef subset from matrix, otherwise intercept and
94		# strata are included for coxph() models.
95	208x	mat <- mat[, names(coefs)]
96	208x	mat_diff <- diff(mat)
97	208x	est <- mat_diff %*% coefs
98	208x	var <- mat_diff %% stats::vcov(model) %% t(mat_diff)
99	208x	se <- sqrt(var)
100	208x	c(
101	208x	est = est,
102	208x	se = se
103		)
104		}
105
106		#' @describeIn h_step builds the model formula used in survival STEP calculations.
107		#'
108		#' @return
109		#' * `h_step_survival_formula()` returns a model formula.
110		#'
111		#' @export
112		h_step_survival_formula <- function(variables,
113		control = control_step()) {
114	10x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
115
116	10x	assert_list_of_variables(variables[c("arm", "biomarker", "event", "time")])
117	10x	form <- paste0("Surv(", variables$time, ", ", variables$event, ") ~ ", variables$arm)
118	10x	if (control$degree > 0) {
119	5x	form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
120		}
121	10x	if (!is.null(variables$covariates)) {
122	6x	form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
123		}
124	10x	if (!is.null(variables$strata)) {
125	2x	form <- paste0(form, " + strata(", paste0(variables$strata, collapse = ", "), ")")
126		}
127	10x	stats::as.formula(form)
128		}
129
130		#' @describeIn h_step estimates the model with `formula` built based on
131		#' `variables` in `data` for a given `subset` and `control` parameters for the
132		#' Cox regression.
133		#'
134		#' @param formula (`formula`)\cr the regression model formula.
135		#' @param subset (`logical`)\cr subset vector.
136		#'
137		#' @return
138		#' * `h_step_survival_est()` returns a matrix of number of observations `n`,
139		#' `events`, log hazard ratio estimates `loghr`, standard error `se`,
140		#' and Wald confidence interval bounds `ci_lower` and `ci_upper`. One row is
141		#' included for each biomarker value in `x`.
142		#'
143		#' @export
144		h_step_survival_est <- function(formula,
145		data,
146		variables,
147		x,
148		subset = rep(TRUE, nrow(data)),
149		control = control_coxph()) {
150	55x	checkmate::assert_formula(formula)
151	55x	assert_df_with_variables(data, variables)
152	55x	checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
153	55x	checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
154	55x	checkmate::assert_list(control, names = "named")
155
156		# Note: `subset` in `coxph` needs to be an expression referring to `data` variables.
157	55x	data$.subset <- subset
158	55x	coxph_warnings <- NULL
159	55x	tryCatch(
160	55x	withCallingHandlers(
161	55x	expr = {
162	55x	fit <- survival::coxph(
163	55x	formula = formula,
164	55x	data = data,
165	55x	subset = .subset,
166	55x	ties = control$ties
167		)
168		},
169	55x	warning = function(w) {
170	1x	coxph_warnings <<- c(coxph_warnings, w)
171	1x	invokeRestart("muffleWarning")
172		}
173		),
174	55x	finally = {
175		}
176		)
177	55x	if (!is.null(coxph_warnings)) {
178	1x	warning(paste(
179	1x	"Fit warnings occurred, please consider using a simpler model, or",
180	1x	"larger `bandwidth`, less `num_points` in `control_step()` settings"
181		))
182		}
183		# Produce a matrix with one row per `x` and columns `est` and `se`.
184	55x	estimates <- t(vapply(
185	55x	X = x,
186	55x	FUN = h_step_trt_effect,
187	55x	FUN.VALUE = c(1, 2),
188	55x	data = data,
189	55x	model = fit,
190	55x	variables = variables
191		))
192	55x	q_norm <- stats::qnorm((1 + control$conf_level) / 2)
193	55x	cbind(
194	55x	n = fit$n,
195	55x	events = fit$nevent,
196	55x	loghr = estimates[, "est"],
197	55x	se = estimates[, "se"],
198	55x	ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
199	55x	ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
200		)
201		}
202
203		#' @describeIn h_step builds the model formula used in response STEP calculations.
204		#'
205		#' @return
206		#' * `h_step_rsp_formula()` returns a model formula.
207		#'
208		#' @export
209		h_step_rsp_formula <- function(variables,
210		control = c(control_step(), control_logistic())) {
211	14x	checkmate::assert_character(variables$covariates, null.ok = TRUE)
212	14x	assert_list_of_variables(variables[c("arm", "biomarker", "response")])
213	14x	response_definition <- sub(
214	14x	pattern = "response",
215	14x	replacement = variables$response,
216	14x	x = control$response_definition,
217	14x	fixed = TRUE
218		)
219	14x	form <- paste0(response_definition, " ~ ", variables$arm)
220	14x	if (control$degree > 0) {
221	8x	form <- paste0(form, " * stats::poly(", variables$biomarker, ", degree = ", control$degree, ", raw = TRUE)")
222		}
223	14x	if (!is.null(variables$covariates)) {
224	8x	form <- paste(form, "+", paste(variables$covariates, collapse = "+"))
225		}
226	14x	if (!is.null(variables$strata)) {
227	5x	strata_arg <- if (length(variables$strata) > 1) {
228	2x	paste0("I(interaction(", paste0(variables$strata, collapse = ", "), "))")
229		} else {
230	3x	variables$strata
231		}
232	5x	form <- paste0(form, "+ strata(", strata_arg, ")")
233		}
234	14x	stats::as.formula(form)
235		}
236
237		#' @describeIn h_step estimates the model with `formula` built based on
238		#' `variables` in `data` for a given `subset` and `control` parameters for the
239		#' logistic regression.
240		#'
241		#' @param formula (`formula`)\cr the regression model formula.
242		#' @param subset (`logical`)\cr subset vector.
243		#'
244		#' @return
245		#' * `h_step_rsp_est()` returns a matrix of number of observations `n`, log odds
246		#' ratio estimates `logor`, standard error `se`, and Wald confidence interval bounds
247		#' `ci_lower` and `ci_upper`. One row is included for each biomarker value in `x`.
248		#'
249		#' @export
250		h_step_rsp_est <- function(formula,
251		data,
252		variables,
253		x,
254		subset = rep(TRUE, nrow(data)),
255		control = control_logistic()) {
256	58x	checkmate::assert_formula(formula)
257	58x	assert_df_with_variables(data, variables)
258	58x	checkmate::assert_logical(subset, min.len = 1, any.missing = FALSE)
259	58x	checkmate::assert_numeric(x, min.len = 1, any.missing = FALSE)
260	58x	checkmate::assert_list(control, names = "named")
261		# Note: `subset` in `glm` needs to be an expression referring to `data` variables.
262	58x	data$.subset <- subset
263	58x	fit_warnings <- NULL
264	58x	tryCatch(
265	58x	withCallingHandlers(
266	58x	expr = {
267	58x	fit <- if (is.null(variables$strata)) {
268	54x	stats::glm(
269	54x	formula = formula,
270	54x	data = data,
271	54x	subset = .subset,
272	54x	family = stats::binomial("logit")
273		)
274		} else {
275		# clogit needs coxph and strata imported
276	4x	survival::clogit(
277	4x	formula = formula,
278	4x	data = data,
279	4x	subset = .subset
280		)
281		}
282		},
283	58x	warning = function(w) {
284	19x	fit_warnings <<- c(fit_warnings, w)
285	19x	invokeRestart("muffleWarning")
286		}
287		),
288	58x	finally = {
289		}
290		)
291	58x	if (!is.null(fit_warnings)) {
292	13x	warning(paste(
293	13x	"Fit warnings occurred, please consider using a simpler model, or",
294	13x	"larger `bandwidth`, less `num_points` in `control_step()` settings"
295		))
296		}
297		# Produce a matrix with one row per `x` and columns `est` and `se`.
298	58x	estimates <- t(vapply(
299	58x	X = x,
300	58x	FUN = h_step_trt_effect,
301	58x	FUN.VALUE = c(1, 2),
302	58x	data = data,
303	58x	model = fit,
304	58x	variables = variables
305		))
306	58x	q_norm <- stats::qnorm((1 + control$conf_level) / 2)
307	58x	cbind(
308	58x	n = length(fit$y),
309	58x	logor = estimates[, "est"],
310	58x	se = estimates[, "se"],
311	58x	ci_lower = estimates[, "est"] - q_norm * estimates[, "se"],
312	58x	ci_upper = estimates[, "est"] + q_norm * estimates[, "se"]
313		)
314		}

1		#' Convert List of Groups to Data Frame
2		#'
3		#' This converts a list of group levels into a data frame format which is expected by [rtables::add_combo_levels()].
4		#'
5		#' @param groups_list (named `list` of `character`)\cr specifies the new group levels via the names and the
6		#' levels that belong to it in the character vectors that are elements of the list.
7		#'
8		#' @return [tibble::tibble()] in the required format.
9		#'
10		#' @examples
11		#' grade_groups <- list(
12		#' "Any Grade (%)" = c("1", "2", "3", "4", "5"),
13		#' "Grade 3-4 (%)" = c("3", "4"),
14		#' "Grade 5 (%)" = "5"
15		#' )
16		#' groups_list_to_df(grade_groups)
17		#'
18		#' @export
19		groups_list_to_df <- function(groups_list) {
20	5x	checkmate::assert_list(groups_list, names = "named")
21	5x	lapply(groups_list, checkmate::assert_character)
22	5x	tibble::tibble(
23	5x	valname = make_names(names(groups_list)),
24	5x	label = names(groups_list),
25	5x	levelcombo = unname(groups_list),
26	5x	exargs = replicate(length(groups_list), list())
27		)
28		}
29
30		#' Reference and Treatment Group Combination
31		#'
32		#' @description `r lifecycle::badge("stable")`
33		#'
34		#' Facilitate the re-combination of groups divided as reference and treatment groups; it helps in arranging groups of
35		#' columns in the `rtables` framework and teal modules.
36		#'
37		#' @param fct (`factor`)\cr the variable with levels which needs to be grouped.
38		#' @param ref (`string`)\cr the reference level(s).
39		#' @param collapse (`string`)\cr a character string to separate `fct` and `ref`.
40		#'
41		#' @return A `list` with first item `ref` (reference) and second item `trt` (treatment).
42		#'
43		#' @examples
44		#' groups <- combine_groups(
45		#' fct = DM$ARM,
46		#' ref = c("B: Placebo")
47		#' )
48		#'
49		#' basic_table() %>%
50		#' split_cols_by_groups("ARM", groups) %>%
51		#' add_colcounts() %>%
52		#' analyze_vars("AGE") %>%
53		#' build_table(DM)
54		#'
55		#' @export
56		combine_groups <- function(fct,
57		ref = NULL,
58		collapse = "/") {
59	10x	checkmate::assert_string(collapse)
60	10x	checkmate::assert_character(ref, min.chars = 1, any.missing = FALSE, null.ok = TRUE)
61	10x	checkmate::assert_multi_class(fct, classes = c("factor", "character"))
62
63	10x	fct <- as_factor_keep_attributes(fct)
64
65	10x	group_levels <- levels(fct)
66	10x	if (is.null(ref)) {
67	6x	ref <- group_levels[1]
68		} else {
69	4x	checkmate::assert_subset(ref, group_levels)
70		}
71
72	10x	groups <- list(
73	10x	ref = group_levels[group_levels %in% ref],
74	10x	trt = group_levels[!group_levels %in% ref]
75		)
76	10x	stats::setNames(groups, nm = lapply(groups, paste, collapse = collapse))
77		}
78
79		#' Split Columns by Groups of Levels
80		#'
81		#' @description `r lifecycle::badge("stable")`
82		#'
83		#' @inheritParams argument_convention
84		#' @inheritParams groups_list_to_df
85		#' @param ... additional arguments to [rtables::split_cols_by()] in order. For instance, to
86		#' control formats (`format`), add a joint column for all groups (`incl_all`).
87		#'
88		#' @return A layout object suitable for passing to further layouting functions. Adding
89		#' this function to an `rtable` layout will add a column split including the given
90		#' groups to the table layout.
91		#'
92		#' @seealso [rtables::split_cols_by()]
93		#'
94		#' @examples
95		#' # 1 - Basic use
96		#'
97		#' # Without group combination `split_cols_by_groups` is
98		#' # equivalent to [rtables::split_cols_by()].
99		#' basic_table() %>%
100		#' split_cols_by_groups("ARM") %>%
101		#' add_colcounts() %>%
102		#' analyze("AGE") %>%
103		#' build_table(DM)
104		#'
105		#' # Add a reference column.
106		#' basic_table() %>%
107		#' split_cols_by_groups("ARM", ref_group = "B: Placebo") %>%
108		#' add_colcounts() %>%
109		#' analyze(
110		#' "AGE",
111		#' afun = function(x, .ref_group, .in_ref_col) {
112		#' if (.in_ref_col) {
113		#' in_rows("Diff Mean" = rcell(NULL))
114		#' } else {
115		#' in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
116		#' }
117		#' }
118		#' ) %>%
119		#' build_table(DM)
120		#'
121		#' # 2 - Adding group specification
122		#'
123		#' # Manual preparation of the groups.
124		#' groups <- list(
125		#' "Arms A+B" = c("A: Drug X", "B: Placebo"),
126		#' "Arms A+C" = c("A: Drug X", "C: Combination")
127		#' )
128		#'
129		#' # Use of split_cols_by_groups without reference column.
130		#' basic_table() %>%
131		#' split_cols_by_groups("ARM", groups) %>%
132		#' add_colcounts() %>%
133		#' analyze("AGE") %>%
134		#' build_table(DM)
135		#'
136		#' # Including differentiated output in the reference column.
137		#' basic_table() %>%
138		#' split_cols_by_groups("ARM", groups_list = groups, ref_group = "Arms A+B") %>%
139		#' analyze(
140		#' "AGE",
141		#' afun = function(x, .ref_group, .in_ref_col) {
142		#' if (.in_ref_col) {
143		#' in_rows("Diff. of Averages" = rcell(NULL))
144		#' } else {
145		#' in_rows("Diff. of Averages" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
146		#' }
147		#' }
148		#' ) %>%
149		#' build_table(DM)
150		#'
151		#' # 3 - Binary list dividing factor levels into reference and treatment
152		#'
153		#' # `combine_groups` defines reference and treatment.
154		#' groups <- combine_groups(
155		#' fct = DM$ARM,
156		#' ref = c("A: Drug X", "B: Placebo")
157		#' )
158		#' groups
159		#'
160		#' # Use group definition without reference column.
161		#' basic_table() %>%
162		#' split_cols_by_groups("ARM", groups_list = groups) %>%
163		#' add_colcounts() %>%
164		#' analyze("AGE") %>%
165		#' build_table(DM)
166		#'
167		#' # Use group definition with reference column (first item of groups).
168		#' basic_table() %>%
169		#' split_cols_by_groups("ARM", groups, ref_group = names(groups)[1]) %>%
170		#' add_colcounts() %>%
171		#' analyze(
172		#' "AGE",
173		#' afun = function(x, .ref_group, .in_ref_col) {
174		#' if (.in_ref_col) {
175		#' in_rows("Diff Mean" = rcell(NULL))
176		#' } else {
177		#' in_rows("Diff Mean" = rcell(mean(x) - mean(.ref_group), format = "xx.xx"))
178		#' }
179		#' }
180		#' ) %>%
181		#' build_table(DM)
182		#'
183		#' @export
184		split_cols_by_groups <- function(lyt,
185		var,
186		groups_list = NULL,
187		ref_group = NULL,
188		...) {
189	6x	if (is.null(groups_list)) {
190	2x	split_cols_by(
191	2x	lyt = lyt,
192	2x	var = var,
193	2x	ref_group = ref_group,
194		...
195		)
196		} else {
197	4x	groups_df <- groups_list_to_df(groups_list)
198	4x	if (!is.null(ref_group)) {
199	3x	ref_group <- groups_df$valname[groups_df$label == ref_group]
200		}
201	4x	split_cols_by(
202	4x	lyt = lyt,
203	4x	var = var,
204	4x	split_fun = add_combo_levels(groups_df, keep_levels = groups_df$valname),
205	4x	ref_group = ref_group,
206		...
207		)
208		}
209		}
210
211		#' Combine Counts
212		#'
213		#' Simplifies the estimation of column counts, especially when group combination is required.
214		#'
215		#' @inheritParams combine_groups
216		#' @inheritParams groups_list_to_df
217		#'
218		#' @return A `vector` of column counts.
219		#'
220		#' @seealso [combine_groups()]
221		#'
222		#' @examples
223		#' ref <- c("A: Drug X", "B: Placebo")
224		#' groups <- combine_groups(fct = DM$ARM, ref = ref)
225		#'
226		#' col_counts <- combine_counts(
227		#' fct = DM$ARM,
228		#' groups_list = groups
229		#' )
230		#'
231		#' basic_table() %>%
232		#' split_cols_by_groups("ARM", groups) %>%
233		#' add_colcounts() %>%
234		#' analyze_vars("AGE") %>%
235		#' build_table(DM, col_counts = col_counts)
236		#'
237		#' ref <- "A: Drug X"
238		#' groups <- combine_groups(fct = DM$ARM, ref = ref)
239		#' col_counts <- combine_counts(
240		#' fct = DM$ARM,
241		#' groups_list = groups
242		#' )
243		#'
244		#' basic_table() %>%
245		#' split_cols_by_groups("ARM", groups) %>%
246		#' add_colcounts() %>%
247		#' analyze_vars("AGE") %>%
248		#' build_table(DM, col_counts = col_counts)
249		#'
250		#' @export
251		combine_counts <- function(fct, groups_list = NULL) {
252	4x	checkmate::assert_multi_class(fct, classes = c("factor", "character"))
253
254	4x	fct <- as_factor_keep_attributes(fct)
255
256	4x	if (is.null(groups_list)) {
257	1x	y <- table(fct)
258	1x	y <- stats::setNames(as.numeric(y), nm = dimnames(y)[[1]])
259		} else {
260	3x	y <- vapply(
261	3x	X = groups_list,
262	3x	FUN = function(x) sum(table(fct)[x]),
263	3x	FUN.VALUE = 1
264		)
265		}
266	4x	y
267		}

1		#' Count Patients with Marked Laboratory Abnormalities
2		#'
3		#' @description `r lifecycle::badge("stable")`
4		#'
5		#' Primary analysis variable `.var` indicates whether single, replicated or last marked laboratory
6		#' abnormality was observed (`factor`). Additional analysis variables are `id` (`character` or `factor`)
7		#' and `direction` (`factor`) indicating the direction of the abnormality. Denominator is number of
8		#' patients with at least one valid measurement during the analysis.
9		#' * For `Single, not last` and `Last or replicated`: Numerator is number of patients
10		#' with `Single, not last` and `Last or replicated` levels, respectively.
11		#' * For `Any`: Numerator is the number of patients with either single or
12		#' replicated marked abnormalities.
13		#'
14		#' @inheritParams argument_convention
15		#' @param category (`list`)\cr with different marked category names for single
16		#' and last or replicated.
17		#' @param .stats (`character`)\cr statistics to select for the table. Run `get_stats("abnormal_by_marked")`
18		#' to see available statistics for this function.
19		#'
20		#' @note `Single, not last` and `Last or replicated` levels are mutually exclusive. If a patient has
21		#' abnormalities that meet both the `Single, not last` and `Last or replicated` criteria, then the
22		#' patient will be counted only under the `Last or replicated` category.
23		#'
24		#' @name abnormal_by_marked
25		#' @order 1
26		NULL
27
28		#' @describeIn abnormal_by_marked Statistics function for patients with marked lab abnormalities.
29		#'
30		#' @return
31		#' * `s_count_abnormal_by_marked()` returns statistic `count_fraction` with `Single, not last`,
32		#' `Last or replicated`, and `Any` results.
33		#'
34		#' @keywords internal
35		s_count_abnormal_by_marked <- function(df,
36		.var = "AVALCAT1",
37		.spl_context,
38		category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
39		variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir")) {
40	3x	checkmate::assert_string(.var)
41	3x	checkmate::assert_list(variables)
42	3x	checkmate::assert_list(category)
43	3x	checkmate::assert_subset(names(category), c("single", "last_replicated"))
44	3x	checkmate::assert_subset(names(variables), c("id", "param", "direction"))
45	3x	checkmate::assert_vector(unique(df[[variables$direction]]), max.len = 1)
46
47	2x	assert_df_with_variables(df, c(aval = .var, variables))
48	2x	checkmate::assert_multi_class(df[[.var]], classes = c("factor", "character"))
49	2x	checkmate::assert_multi_class(df[[variables$id]], classes = c("factor", "character"))
50
51
52	2x	first_row <- .spl_context[.spl_context$split == variables[["param"]], ]
53		# Patients in the denominator have at least one post-baseline visit.
54	2x	subj <- first_row$full_parent_df[[1]][[variables[["id"]]]]
55	2x	subj_cur_col <- subj[first_row$cur_col_subset[[1]]]
56		# Some subjects may have a record for high and low directions but
57		# should be counted only once.
58	2x	denom <- length(unique(subj_cur_col))
59
60	2x	if (denom != 0) {
61	2x	subjects_last_replicated <- unique(
62	2x	df[df[[.var]] %in% category[["last_replicated"]], variables$id, drop = TRUE]
63		)
64	2x	subjects_single <- unique(
65	2x	df[df[[.var]] %in% category[["single"]], variables$id, drop = TRUE]
66		)
67		# Subjects who have both single and last/replicated abnormalities are counted in only the last/replicated group.
68	2x	subjects_single <- setdiff(subjects_single, subjects_last_replicated)
69	2x	n_single <- length(subjects_single)
70	2x	n_last_replicated <- length(subjects_last_replicated)
71	2x	n_any <- n_single + n_last_replicated
72	2x	result <- list(count_fraction = list(
73	2x	"Single, not last" = c(n_single, n_single / denom),
74	2x	"Last or replicated" = c(n_last_replicated, n_last_replicated / denom),
75	2x	"Any Abnormality" = c(n_any, n_any / denom)
76		))
77		} else {
78	!	result <- list(count_fraction = list(
79	!	"Single, not last" = c(0, 0),
80	!	"Last or replicated" = c(0, 0),
81	!	"Any Abnormality" = c(0, 0)
82		))
83		}
84
85	2x	result
86		}
87
88		#' @describeIn abnormal_by_marked Formatted analysis function which is used as `afun`
89		#' in `count_abnormal_by_marked()`.
90		#'
91		#' @return
92		#' * `a_count_abnormal_by_marked()` returns the corresponding list with formatted [rtables::CellValue()].
93		#'
94		#' @keywords internal
95		a_count_abnormal_by_marked <- make_afun(
96		s_count_abnormal_by_marked,
97		.formats = c(count_fraction = format_count_fraction)
98		)
99
100		#' @describeIn abnormal_by_marked Layout-creating function which can take statistics function arguments
101		#' and additional format arguments. This function is a wrapper for [rtables::analyze()].
102		#'
103		#' @return
104		#' * `count_abnormal_by_marked()` returns a layout object suitable for passing to further layouting functions,
105		#' or to [rtables::build_table()]. Adding this function to an `rtable` layout will add formatted rows containing
106		#' the statistics from `s_count_abnormal_by_marked()` to the table layout.
107		#'
108		#' @examples
109		#' library(dplyr)
110		#'
111		#' df <- data.frame(
112		#' USUBJID = as.character(c(rep(1, 5), rep(2, 5), rep(1, 5), rep(2, 5))),
113		#' ARMCD = factor(c(rep("ARM A", 5), rep("ARM B", 5), rep("ARM A", 5), rep("ARM B", 5))),
114		#' ANRIND = factor(c(
115		#' "NORMAL", "HIGH", "HIGH", "HIGH HIGH", "HIGH",
116		#' "HIGH", "HIGH", "HIGH HIGH", "NORMAL", "HIGH HIGH", "NORMAL", "LOW", "LOW", "LOW LOW", "LOW",
117		#' "LOW", "LOW", "LOW LOW", "NORMAL", "LOW LOW"
118		#' )),
119		#' ONTRTFL = rep(c("", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), 2),
120		#' PARAMCD = factor(c(rep("CRP", 10), rep("ALT", 10))),
121		#' AVALCAT1 = factor(rep(c("", "", "", "SINGLE", "REPLICATED", "", "", "LAST", "", "SINGLE"), 2)),
122		#' stringsAsFactors = FALSE
123		#' )
124		#'
125		#' df <- df %>%
126		#' mutate(abn_dir = factor(
127		#' case_when(
128		#' ANRIND == "LOW LOW" ~ "Low",
129		#' ANRIND == "HIGH HIGH" ~ "High",
130		#' TRUE ~ ""
131		#' ),
132		#' levels = c("Low", "High")
133		#' ))
134		#'
135		#' # Select only post-baseline records.
136		#' df <- df %>% filter(ONTRTFL == "Y")
137		#' df_crp <- df %>%
138		#' filter(PARAMCD == "CRP") %>%
139		#' droplevels()
140		#' full_parent_df <- list(df_crp, "not_needed")
141		#' cur_col_subset <- list(rep(TRUE, nrow(df_crp)), "not_needed")
142		#' spl_context <- data.frame(
143		#' split = c("PARAMCD", "GRADE_DIR"),
144		#' full_parent_df = I(full_parent_df),
145		#' cur_col_subset = I(cur_col_subset)
146		#' )
147		#'
148		#' map <- unique(
149		#' df[df$abn_dir %in% c("Low", "High") & df$AVALCAT1 != "", c("PARAMCD", "abn_dir")]
150		#' ) %>%
151		#' lapply(as.character) %>%
152		#' as.data.frame() %>%
153		#' arrange(PARAMCD, abn_dir)
154		#'
155		#' basic_table() %>%
156		#' split_cols_by("ARMCD") %>%
157		#' split_rows_by("PARAMCD") %>%
158		#' summarize_num_patients(
159		#' var = "USUBJID",
160		#' .stats = "unique_count"
161		#' ) %>%
162		#' split_rows_by(
163		#' "abn_dir",
164		#' split_fun = trim_levels_to_map(map)
165		#' ) %>%
166		#' count_abnormal_by_marked(
167		#' var = "AVALCAT1",
168		#' variables = list(
169		#' id = "USUBJID",
170		#' param = "PARAMCD",
171		#' direction = "abn_dir"
172		#' )
173		#' ) %>%
174		#' build_table(df = df)
175		#'
176		#' basic_table() %>%
177		#' split_cols_by("ARMCD") %>%
178		#' split_rows_by("PARAMCD") %>%
179		#' summarize_num_patients(
180		#' var = "USUBJID",
181		#' .stats = "unique_count"
182		#' ) %>%
183		#' split_rows_by(
184		#' "abn_dir",
185		#' split_fun = trim_levels_in_group("abn_dir")
186		#' ) %>%
187		#' count_abnormal_by_marked(
188		#' var = "AVALCAT1",
189		#' variables = list(
190		#' id = "USUBJID",
191		#' param = "PARAMCD",
192		#' direction = "abn_dir"
193		#' )
194		#' ) %>%
195		#' build_table(df = df)
196		#'
197		#' @export
198		#' @order 2
199		count_abnormal_by_marked <- function(lyt,
200		var,
201		category = list(single = "SINGLE", last_replicated = c("LAST", "REPLICATED")),
202		variables = list(id = "USUBJID", param = "PARAM", direction = "abn_dir"),
203		na_str = default_na_str(),
204		nested = TRUE,
205		...,
206		.stats = NULL,
207		.formats = NULL,
208		.labels = NULL,
209		.indent_mods = NULL) {
210	1x	checkmate::assert_string(var)
211
212	1x	extra_args <- list(category = category, variables = variables, ...)
213
214	1x	afun <- make_afun(
215	1x	a_count_abnormal_by_marked,
216	1x	.stats = .stats,
217	1x	.formats = .formats,
218	1x	.labels = .labels,
219	1x	.indent_mods = .indent_mods,
220	1x	.ungroup_stats = "count_fraction"
221		)
222
223	1x	lyt <- analyze(
224	1x	lyt = lyt,
225	1x	vars = var,
226	1x	afun = afun,
227	1x	na_str = na_str,
228	1x	nested = nested,
229	1x	show_labels = "hidden",
230	1x	extra_args = extra_args
231		)
232	1x	lyt
233		}