Usage
d_proportion_diff(conf_level, method, long = FALSE)
prop_diff_wald(rsp, grp, conf_level = 0.95, correct = FALSE)
prop_diff_ha(rsp, grp, conf_level)
prop_diff_nc(rsp, grp, conf_level, correct = FALSE)
prop_diff_cmh(rsp, grp, strata, conf_level = 0.95)
prop_diff_strat_nc(
rsp,
grp,
strata,
weights_method = c("cmh", "wilson_h"),
conf_level = 0.95,
correct = FALSE
)
s_proportion_diff(
df,
.var,
.ref_group,
.in_ref_col,
variables = list(strata = NULL),
conf_level = 0.95,
method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe",
"strat_newcombecc"),
weights_method = "cmh"
)
a_proportion_diff(
df,
.var,
.ref_group,
.in_ref_col,
variables = list(strata = NULL),
conf_level = 0.95,
method = c("waldcc", "wald", "cmh", "ha", "newcombe", "newcombecc", "strat_newcombe",
"strat_newcombecc"),
weights_method = "cmh"
)
estimate_proportion_diff(
lyt,
vars,
...,
var_labels = vars,
show_labels = "hidden",
table_names = vars,
.stats = NULL,
.formats = NULL,
.labels = NULL,
.indent_mods = NULL
)
Arguments
- conf_level
(
proportion
)
confidence level of the interval.- method
(
string
)
the method used for the confidence interval estimation.- long
(
logical
)
Whether a long or a short (default) description is required.- rsp
(
logical
)
whether each subject is a responder or not.- grp
(
factor
)
vector assigning observations to one out of two groups (e.g. reference and treatment group).- correct
logical
include the continuity correction. For further information, see for examplestats::prop.test()
.- strata
(
factor
)
with one level per stratum and same length asrsp
.- weights_method
(
string
)
it can be one ofc("cmh", "heuristic")
and directs the way weights are estimated.- df
(
data frame
)
data set containing all analysis variables.- .var
(
string
)
single variable name that is passed byrtables
when requested by a statistics function.- .ref_group
(
data frame
orvector
)
the data corresponding to the reference group.- .in_ref_col
(
logical
)TRUE
when working with the reference level,FALSE
otherwise.- variables
(named
list
ofstring
)
list of additional analysis variables.- lyt
(
layout
)
input layout where analyses will be added to.- vars
(
character
)
variable names for the primary analysis variable to be iterated over.- ...
arguments passed to
s_proportion_diff()
.- var_labels
character for label.
- show_labels
label visibility: one of "default", "visible" and "hidden".
- table_names
(
character
)
this can be customized in case that the samevars
are analyzed multiple times, to avoid warnings fromrtables
.- .stats
(
character
)
statistics to select for the table.- .formats
(named
character
orlist
)
formats for the statistics.- .labels
(named
character
)
labels for the statistics (without indent).- .indent_mods
(named
integer
)
indent modifiers for the labels.
Functions
d_proportion_diff()
: This is an auxiliary function that describes the analysis ins_proportion_diff
.prop_diff_wald()
: The Wald interval follows the usual textbook definition for a single proportion confidence interval using the normal approximation. It is possible to include a continuity correction for Wald's interval.prop_diff_ha()
: Anderson-Hauck confidence interval.prop_diff_nc()
: Newcombe confidence interval. It is based on the Wilson score confidence interval for a single binomial proportion.prop_diff_cmh()
: Calculates the weighted difference. This is defined as the difference in response rates between the experimental treatment group and the control treatment group, adjusted for stratification factors by applying Cochran-Mantel-Haenszel (CMH) weights. For the CMH chi-squared test, usestats::mantelhaen.test()
.prop_diff_strat_nc()
: Calculates the stratified Newcombe confidence interval and difference in response rates between the experimental treatment group and the control treatment group, adjusted for stratification factors. This implementation follows closely the one proposed by Yan and Su (2010) . Weights can be estimated from the heuristic proposed inprop_strat_wilson()
or from CMH-derived weights (seeprop_diff_cmh()
).s_proportion_diff()
: Statistics function estimating the difference in terms of responder proportion.a_proportion_diff()
: Formatted Analysis function which can be further customized by callingrtables::make_afun()
on it. It is used asafun
inrtables::analyze()
.estimate_proportion_diff()
: Adds a descriptive analyze layer tortables
pipelines. The analysis is applied to adataframe
and return the estimations, inrcells
. The ellipsis (...
) conveys arguments tos_proportion_diff()
, for instancena.rm = FALSE
if missing data should be accounted for.
References
Yan X, Su XG (2010). “Stratified Wilson and Newcombe Confidence Intervals for Multiple Binomial Proportions.” Stat. Biopharm. Res., 2(3), 329--335.
Examples
# Wald confidence interval
set.seed(2)
rsp <- sample(c(TRUE, FALSE), replace = TRUE, size = 20)
grp <- factor(c(rep("A", 10), rep("B", 10)))
prop_diff_wald(rsp = rsp, grp = grp, conf_level = 0.95, correct = FALSE)
#> $diff
#> [1] 0
#>
#> $diff_ci
#> [1] -0.4382613 0.4382613
#>
# Anderson-Hauck confidence interval
## "Mid" case: 3/4 respond in group A, 1/2 respond in group B.
rsp <- c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
grp <- factor(c("A", "B", "A", "B", "A", "A"), levels = c("B", "A"))
prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.90)
#> $diff
#> [1] 0.25
#>
#> $diff_ci
#> [1] -0.9195011 1.0000000
#>
## Edge case: Same proportion of response in A and B.
rsp <- c(TRUE, FALSE, TRUE, FALSE)
grp <- factor(c("A", "A", "B", "B"), levels = c("A", "B"))
prop_diff_ha(rsp = rsp, grp = grp, conf_level = 0.6)
#> $diff
#> [1] 0
#>
#> $diff_ci
#> [1] -0.8451161 0.8451161
#>
# Newcombe confidence interval
set.seed(1)
rsp <- c(
sample(c(TRUE, FALSE), size = 40, prob = c(3 / 4, 1 / 4), replace = TRUE),
sample(c(TRUE, FALSE), size = 40, prob = c(1 / 2, 1 / 2), replace = TRUE)
)
grp <- factor(rep(c("A", "B"), each = 40), levels = c("B", "A"))
table(rsp, grp)
#> grp
#> rsp B A
#> FALSE 20 10
#> TRUE 20 30
prop_diff_nc(rsp = rsp, grp = grp, conf_level = 0.9)
#> $diff
#> [1] 0.25
#>
#> $diff_ci
#> [1] 0.07193388 0.40725819
#>
# Cochran-Mantel-Haenszel confidence interval
set.seed(2)
rsp <- sample(c(TRUE, FALSE), 100, TRUE)
grp <- sample(c("Placebo", "Treatment"), 100, TRUE)
grp <- factor(grp, levels = c("Placebo", "Treatment"))
strata_data <- data.frame(
"f1" = sample(c("a", "b"), 100, TRUE),
"f2" = sample(c("x", "y", "z"), 100, TRUE),
stringsAsFactors = TRUE
)
prop_diff_cmh(
rsp = rsp, grp = grp, strata = interaction(strata_data),
conf_level = 0.90
)
#> $prop
#> Placebo Treatment
#> 0.5331117 0.3954251
#>
#> $prop_ci
#> $prop_ci$Placebo
#> [1] 0.4306536 0.6355698
#>
#> $prop_ci$Treatment
#> [1] 0.2890735 0.5017768
#>
#>
#> $diff
#> [1] -0.1376866
#>
#> $diff_ci
#> [1] -0.285363076 0.009989872
#>
#> $weights
#> a.x b.x a.y b.y a.z b.z
#> 0.1148388 0.2131696 0.1148388 0.2131696 0.1767914 0.1671918
#>
#> $n1
#> a.x b.x a.y b.y a.z b.z
#> 4 11 8 11 13 11
#>
#> $n2
#> a.x b.x a.y b.y a.z b.z
#> 8 9 4 9 6 6
#>
# Stratified Newcombe confidence interval
set.seed(2)
data_set <- data.frame(
"rsp" = sample(c(TRUE, FALSE), 100, TRUE),
"f1" = sample(c("a", "b"), 100, TRUE),
"f2" = sample(c("x", "y", "z"), 100, TRUE),
"grp" = sample(c("Placebo", "Treatment"), 100, TRUE),
stringsAsFactors = TRUE
)
prop_diff_strat_nc(
rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
weights_method = "cmh",
conf_level = 0.90
)
#> $diff
#> [1] -0.05777672
#>
#> $diff_ci
#> lower upper
#> -0.2236537 0.1119331
#>
prop_diff_strat_nc(
rsp = data_set$rsp, grp = data_set$grp, strata = interaction(data_set[2:3]),
weights_method = "wilson_h",
conf_level = 0.90
)
#> $diff
#> [1] -0.07771884
#>
#> $diff_ci
#> lower upper
#> -0.2540844 0.1027720
#>
# Summary
## "Mid" case: 4/4 respond in group A, 1/2 respond in group B.
nex <- 100 # Number of example rows
dta <- data.frame(
"rsp" = sample(c(TRUE, FALSE), nex, TRUE),
"grp" = sample(c("A", "B"), nex, TRUE),
"f1" = sample(c("a1", "a2"), nex, TRUE),
"f2" = sample(c("x", "y", "z"), nex, TRUE),
stringsAsFactors = TRUE
)
s_proportion_diff(
df = subset(dta, grp == "A"),
.var = "rsp",
.ref_group = subset(dta, grp == "B"),
.in_ref_col = FALSE,
conf_level = 0.90,
method = "ha"
)
#> $diff
#> [1] -0.1204336
#> attr(,"label")
#> [1] "Difference in Response rate (%)"
#>
#> $diff_ci
#> [1] -17.82763 17.58677
#> attr(,"label")
#> [1] "90% CI (Anderson-Hauck)"
#>
# CMH example with strata
s_proportion_diff(
df = subset(dta, grp == "A"),
.var = "rsp",
.ref_group = subset(dta, grp == "B"),
.in_ref_col = FALSE,
variables = list(strata = c("f1", "f2")),
conf_level = 0.90,
method = "cmh"
)
#> $diff
#> [1] -0.1045856
#> attr(,"label")
#> [1] "Difference in Response rate (%)"
#>
#> $diff_ci
#> [1] -15.98426 15.77509
#> attr(,"label")
#> [1] "90% CI (CMH, without correction)"
#>
a_proportion_diff(
df = subset(dta, grp == "A"),
.var = "rsp",
.ref_group = subset(dta, grp == "B"),
.in_ref_col = FALSE,
conf_level = 0.90,
method = "ha"
)
#> RowsVerticalSection (in_rows) object print method:
#> ----------------------------
#> row_name formatted_cell indent_mod row_label
#> 1 diff -0.1 0 Difference in Response rate (%)
#> 2 diff_ci (-17.8, 17.6) 1 90% CI (Anderson-Hauck)
l <- basic_table() %>%
split_cols_by(var = "grp", ref_group = "B") %>%
estimate_proportion_diff(
vars = "rsp",
conf_level = 0.90,
method = "ha"
)
build_table(l, df = dta)
#> B A
#> ———————————————————————————————————————————————————
#> Difference in Response rate (%) -0.1
#> 90% CI (Anderson-Hauck) (-17.8, 17.6)