The goal of {epitabulate} is to facilitate producing tables for descriptive epidemiological analysis. This is a product of the R4EPIs project; learn more at https://r4epi.github.io/sitrep/
You can install {epitabulate} from CRAN:
install.packages("epitabulate")You can also install the in-development version from GitHub using the {remotes} package (but there’s no guarantee that it will be stable):
# install.packages("remotes")
remotes::install_github("R4EPI/epitabulate") Here is an example of the tables produced by {epitabulate} from a randomly generated dataset.
library(dplyr)
#>
#> Attache Paket: 'dplyr'
#> Die folgenden Objekte sind maskiert von 'package:stats':
#>
#> filter, lag
#> Die folgenden Objekte sind maskiert von 'package:base':
#>
#> intersect, setdiff, setequal, union
library(epitabulate)
# define how many people to simulate
n <- 1000
# generate a fake dataset
linelist <- data.frame(
sex = sample(c("male", "female"), n, replace = TRUE),
age_group = sample(c("<5", "5-14", "15-29", "30-44", "45-59", "60+"), n, replace = TRUE,
prob = c(0.1, 0.15, 0.25, 0.2, 0.15, 0.15)),
fever = sample(c("yes", "no"), n, replace = TRUE, prob = c(0.3, 0.7)),
death = sample(c("yes", "no"), n, replace = TRUE, prob = c(0.05, 0.95)),
observation_time = round(runif(n, min = 1, max = 365), 1) # days under observation
)
# turn yes/no variables into logical (TRUE/FALSE) variables
linelist <- linelist |>
mutate(
fever = grepl("yes", fever),
death = grepl("yes", death)
)There are three functions that will provide quick statistics for different rates based on binomial estimates of proportions from binom::binom.wilson()
attack_rate(10, 50)
#> cases population ar lower upper
#> 1 10 50 20 11.24375 33.03711
case_fatality_rate(2, 50)
#> deaths population cfr lower upper
#> 1 2 50 4 1.103888 13.46009
mortality_rate(40, 50000)
#> deaths population mortality per 10 000 lower upper
#> 1 40 50000 8 5.87591 10.89109In addition, it’s possible to rapidly calculate Case fatality rate from a linelist, stratified by different groups (e.g. gender):
case_fatality_rate_df(linelist,
deaths = death,
group = sex,
add_total = TRUE,
mergeCI = TRUE)
#> # A tibble: 4 × 5
#> sex deaths population cfr ci
#> <fct> <int> <int> <dbl> <chr>
#> 1 female 30 483 6.21 (4.39-8.73)
#> 2 male 19 517 3.68 (2.37-5.67)
#> 3 (Missing) 0 0 NaN (NaN-NaN)
#> 4 Total 49 1000 4.9 (3.73-6.42)It is also possible to add these proportions on to {gtsummary} tabulations
cfr <- linelist |>
select(death) |>
gtsummary::tbl_summary(
statistic = everything() ~ "{N}",
label = death ~ "All participants"
) %>%
add_cfr(deaths_var = "death")
#> There was a warning for variable "death"
#> ! cfr by strata is not currently available, ignoring `by` argumentIt is possible to add counts to {gtsummary} tbl_uvregression
## Odds ratios
or <- gtsummary::tbl_uvregression(linelist,
method = glm,
y = death,
include = fever,
method.args = list(family = binomial),
exponentiate = TRUE,
hide_n = TRUE) |>
add_crosstabs(wide = TRUE)#> tbl_id1 variable var_label var_type row_type header_row N_obs N_event N
#> 1 1 fever fever dichotomous label FALSE 1000 49 1000
#> coefficients_type coefficients_label label term var_class var_nlevels
#> 1 logistic OR fever feverTRUE logical 2
#> contrasts contrasts_type n_obs n_event_FALSE n_nonevent_FALSE
#> 1 contr.treatment treatment 302 20 282
#> n_event_TRUE n_nonevent_TRUE estimate std.error statistic nevent ci
#> 1 29 669 1.636097 0.299205 1.645405 49 0.90, 2.92
#> conf.low conf.high p.value n_event_NA n_nonevent_NA reference_row
#> 1 0.898395 2.924016 0.09988628 NA NA FALSE
## Risk ratios
rr <- gtsummary::tbl_uvregression(linelist,
method = MASS::glm.nb,
y = death,
include = fever,
exponentiate = TRUE,
hide_n = TRUE) |>
add_crosstabs(wide = TRUE)
#> There was a warning constructing the model for variable "fever". See message
#> below.
#> ! Iterationsgrenze erreicht and Iterationsgrenze erreicht#> tbl_id1 variable var_label var_type row_type header_row N_obs N_event N
#> 1 1 fever fever dichotomous label FALSE 1000 49 1000
#> coefficients_type coefficients_label label term var_class var_nlevels
#> 1 poisson RR fever feverTRUE logical 2
#> contrasts contrasts_type exposure n_obs_FALSE n_event_FALSE n_obs_TRUE
#> 1 contr.treatment treatment 302 302 20 698
#> n_event_TRUE estimate std.error statistic nevent ci conf.low
#> 1 29 1.593971 0.2906669 1.603996 49 0.89, 2.80 0.8892179
#> conf.high p.value n_obs_NA n_event_NA reference_row
#> 1 2.80055 0.1087149 NA NA FALSE
## Incidence rate ratios
irr <- gtsummary::tbl_uvregression(linelist,
method = glm,
y = death,
include = fever,
method.args = list(family = poisson,
offset = log(observation_time)),
exponentiate = TRUE,
hide_n = TRUE) |>
add_crosstabs(wide = TRUE)#> tbl_id1 variable var_label var_type row_type header_row N_obs N_event N
#> 1 1 fever fever dichotomous label FALSE 1000 49 1000
#> coefficients_type coefficients_label label term var_class var_nlevels
#> 1 poisson IRR fever feverTRUE logical 2
#> contrasts contrasts_type n_obs exposure_FALSE n_event_FALSE
#> 1 contr.treatment treatment 302 55206.9 20
#> exposure_TRUE n_event_TRUE estimate std.error statistic nevent ci
#> 1 128410.7 29 1.604131 0.2906557 1.625918 49 0.89, 2.82
#> conf.low conf.high p.value exposure_NA n_event_NA reference_row
#> 1 0.8948995 2.818357 0.1039672 NA NA FALSEIt is also possible to run stratified analysis to produce Cochran Mantel-haenszel estimates.
cmh <- tbl_cmh(data = linelist,
case = death,
exposure = fever,
strata = age_group,
measure = "OR") |>
add_crosstabs()#> # A tibble: 21 × 10
#> `**Strata**` `**Characteristic**` `**Case (n)**` `**Control (n)**` `**OR**`
#> <chr> <chr> <chr> <chr> <chr>
#> 1 Crude fever <NA> <NA> <NA>
#> 2 <NA> FALSE 29 669 <NA>
#> 3 <NA> TRUE 20 282 1.64
#> 4 <5 fever <NA> <NA> <NA>
#> 5 <NA> FALSE 2 67 <NA>
#> 6 <NA> TRUE 1 23 1.46
#> 7 15-29 fever <NA> <NA> <NA>
#> 8 <NA> FALSE 3 163 <NA>
#> 9 <NA> TRUE 2 91 1.19
#> 10 30-44 fever <NA> <NA> <NA>
#> # ℹ 11 more rows
#> # ℹ 5 more variables: `**95% CI**` <chr>, `**p-value**` <chr>,
#> # `**CMH estimate**` <chr>, `**95% CI**` <chr>, `**p-value**` <chr>