Proportion Difference — prop

Usage

d_proportion_diff(conf_level, method, long = FALSE)

prop_diff_wald(rsp, grp, conf_level = 0.95, correct = FALSE)

prop_diff_ha(rsp, grp, conf_level)

prop_diff_nc(rsp, grp, conf_level, correct = FALSE)

prop_diff_cmh(rsp, grp, strata, conf_level = 0.95)

s_proportion_diff(
  df,
  .var,
  .ref_group,
  .in_ref_col,
  variables = list(strata = NULL),
  conf_level = 0.95,
  method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc")
)

a_proportion_diff(
  df,
  .var,
  .ref_group,
  .in_ref_col,
  variables = list(strata = NULL),
  conf_level = 0.95,
  method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc")
)

estimate_proportion_diff(
  lyt,
  vars,
  ...,
  var_labels = vars,
  show_labels = "hidden",
  table_names = vars,
  .stats = NULL,
  .formats = NULL,
  .labels = NULL,
  .indent_mods = NULL
)

Arguments

conf_level: (proportion)
confidence level of the interval.
method: (string)
the method used for the confidence interval estimation.
long: (logical)
Whether a long or a short (default) description is required.
rsp: (logical)
whether each subject is a responder or not.
grp: (factor)
vector assigning observations to one out of two groups (e.g. reference and treatment group).
correct: logical
include the continuity correction.
strata: (factor)
with one level per stratum and same length as rsp.
df: (data frame)
data set containing all analysis variables.
.var: (string)
single variable name that is passed by rtables when requested by a statistics function.
.ref_group: (data frame or vector)
the data corresponding to the reference group.
.in_ref_col: (logical)
TRUE when working with the reference level, FALSE otherwise.
variables: (named list of string)
list of additional analysis variables.
lyt: (layout)
input layout where analyses will be added to.
vars: (character)
variable names for the primary analysis variable to be iterated over.
...: arguments passed to s_proportion_diff().
var_labels: character for label.
show_labels: label visibility: one of "default", "visible" and "hidden".
table_names: (character)
this can be customized in case that the same vars are analyzed multiple times, to avoid warnings from rtables.
.stats: (character)
statistics to select for the table.
.formats: (named character or list)
formats for the statistics.
.labels: (named character)
labels for the statistics (without indent).
.indent_mods: (named integer)
indent modifiers for the labels.

Value

String describing the analysis.

Functions

d_proportion_diff(): This is an auxiliary function that describes the analysis in s_proportion_diff.
prop_diff_wald(): The Wald interval follows the usual textbook definition for a single proportion confidence interval using the normal approximation. It is possible to include a continuity correction for Wald's interval.
prop_diff_ha(): Anderson-Hauck confidence interval.
prop_diff_nc(): Newcombe confidence interval. It is based on the Wilson score confidence interval for a single binomial proportion.
prop_diff_cmh(): Calculates the weighted difference. This is defined as the difference in response rates between the experimental treatment group and the control treatment group, adjusted for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared test, use stats::mantelhaen.test().
s_proportion_diff(): Statistics function estimating the difference in terms of responder proportion.
a_proportion_diff(): Formatted Analysis function which can be further customized by calling rtables::make_afun() on it. It is used as afun in rtables::analyze().
estimate_proportion_diff(): Adds a descriptive analyze layer to rtables pipelines. The analysis is applied to a dataframe and return the estimations, in rcells. The ellipsis (...) conveys arguments to s_proportion_diff(), for instance na.rm = FALSE if missing data should be accounted for.

Examples

# Wald confidence interval
set.seed(2)
rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
grp <- factor(c(rep("A", 10), rep("B", 10)))
prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
#> $diff
#> [1] 0
#> 
#> $diff_ci
#> [1] -0.4382613  0.4382613
#> 

# Anderson-Hauck confidence interval
## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
#> $diff
#> [1] 0.25
#> 
#> $diff_ci
#> [1] -0.9195011  1.0000000
#> 

## Edge case: Same proportion of response in A and B.
rsp <- c(TRUE, FALSE, TRUE, FALSE)
grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
#> $diff
#> [1] 0
#> 
#> $diff_ci
#> [1] -0.8451161  0.8451161
#> 

# Newcombe confidence interval

set.seed(1)
rsp <- c(
  sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
  sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
)
grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
table(rsp, grp)
#>        grp
#> rsp      B  A
#>   FALSE 20 10
#>   TRUE  20 30
prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
#> $diff
#> [1] 0.25
#> 
#> $diff_ci
#> [1] 0.07193388 0.40725819
#> 

# Cochran-Mantel-Haenszel confidence interval

set.seed(2)
rsp <- sample(c(TRUE, FALSE), 100, TRUE)
grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
grp <- factor(grp, levels = c("Placebo", "Treatment"))
strata_data <- data.frame(
  "f1" = sample(c("a", "b"), 100, TRUE),
  "f2" = sample(c("x", "y", "z"), 100, TRUE),
  stringsAsFactors = TRUE
)

prop_diff_cmh(
  rsp = rsp, grp = grp, strata = interaction(strata_data),
  conf_level = 0.90
)
#> $prop
#>   Placebo Treatment 
#> 0.5331117 0.3954251 
#> 
#> $prop_ci
#> $prop_ci$Placebo
#> [1] 0.4306536 0.6355698
#> 
#> $prop_ci$Treatment
#> [1] 0.2890735 0.5017768
#> 
#> 
#> $diff
#> [1] -0.1376866
#> 
#> $diff_ci
#> [1] -0.285363076  0.009989872
#> 

# Summary

## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
dta <- data.frame(
  rsp = c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE),
  grp = factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
)

s_proportion_diff(
  df = subset(dta, grp == "A"),
  .var = "rsp",
  .ref_group = subset(dta, grp == "B"),
  .in_ref_col = FALSE,
  conf_level = 0.90,
  method = "ha"
)
#> $diff
#> [1] 25
#> attr(,"label")
#> [1] "Difference in Response rate (%)"
#> 
#> $diff_ci
#> [1] -91.95011 100.00000
#> attr(,"label")
#> [1] "90% CI (Anderson-Hauck)"
#> 

a_proportion_diff(
  df = subset(dta, grp == "A"),
  .var = "rsp",
  .ref_group = subset(dta, grp == "B"),
  .in_ref_col = FALSE,
  conf_level = 0.90,
  method = "ha"
)
#> RowsVerticalSection (in_rows) object print method:
#> ----------------------------
#>   row_name formatted_cell indent_mod                       row_label
#> 1     diff           25.0          0 Difference in Response rate (%)
#> 2  diff_ci (-92.0, 100.0)          1         90% CI (Anderson-Hauck)

l <- basic_table() %>%
  split_cols_by(var = "grp", ref_group = "B") %>%
  estimate_proportion_diff(
    vars = "rsp",
    conf_level = 0.90,
    method = "ha"
  )

build_table(l, df = dta)
#>                                   B         A       
#> ————————————————————————————————————————————————————
#> Difference in Response rate (%)            25.0     
#>   90% CI (Anderson-Hauck)             (-92.0, 100.0)