Tabulate counts and proportions
tab_linelist(
x,
...,
strata = NULL,
keep = TRUE,
drop = NULL,
na.rm = TRUE,
prop_total = FALSE,
row_total = FALSE,
col_total = FALSE,
wide = TRUE,
transpose = NULL,
digits = 1,
pretty = TRUE
)
tab_survey(
x,
...,
strata = NULL,
keep = TRUE,
drop = NULL,
na.rm = TRUE,
prop_total = FALSE,
row_total = FALSE,
col_total = FALSE,
wide = TRUE,
transpose = NULL,
digits = 1,
method = "logit",
deff = FALSE,
pretty = TRUE
)a data.frame() or tbl_svy object
categorical variables to tabulate
a stratifier to split the data
a character vector specifying which values to retain in the
tabulation. Defaults to TRUE, which keeps all the values.
a character vector specifying which values to drop in the
tabulation. Defaults to NULL, which keeps all values.
When TRUE (default), missing (NA) values present in var
will be removed from the data set with a warning, causing a change in
denominator for the tabulations. Setting this to FALSE creates an
explicit missing value called "(Missing)".
if TRUE and strata is not NULL, then the totals of the
rows will be reported as proportions of the total data set, otherwise, they
will be proportions within the stratum (default).
create a new column with the total counts for each row of stratified data.
create a new row with the total counts for each column of stratified data.
if TRUE (default) and strata is defined, then the results are
presented in a wide table with each stratification counts and estimates in
separate columns. If FALSE, then the data will be presented in a long
format where the counts and estimates are presented in single columns. This
has no effect if strata is not defined.
if wide = TRUE, then this will transpose the columns to
the rows, which is useful when you stratify by age group. Default is
NULL, which will not transpose anything. You have three options for
transpose:
transpose = "variable": uses the variable column, (dropping values if strata exists).
Use this if you know that your values are all identical or at least
identifiable by the variable name.
transpose = "value" : uses the value column, (dropping variables if strata exists).
Use this if your values are important and the variable names are
generic placeholders.
transpose = "both" : combines the variable and value columns.
Use this if both the variables and values are important.
(survey only) if pretty = FALSE, this indicates the number
of digits used for proportion and CI
(survey only) if TRUE, default, the proportion and CI are merged
(survey only) a method from survey::svyciprop() to calculate
the confidence interval. Defaults to "logit".
a logical indicating if the design effect should be reported.
Defaults to TRUE.
a tibble::tibble() with a column for variables, a column for values,
and counts and proportions. If strata is not NULL and wide = TRUE,
then there will be separate columns for each strata for the counts and
proportions. Survey data will report confidence intervals.
have_packages <- require("matchmaker") & require("epidict")
#> Loading required package: matchmaker
#> Loading required package: epidict
if (have_packages) {
withAutoprint({
# Simulating linelist data
linelist <- epidict::gen_data("Measles", numcases = 1000, org = "MSF")
measles_dict <- epidict::msf_dict("Measles", compact = FALSE)
# Cleaning linelist data
linelist_clean <- matchmaker::match_df(
x = linelist,
dictionary = measles_dict,
from = "option_code",
to = "option_name",
by = "data_element_shortname",
order = "option_order_in_set"
)
# get a descriptive table by sex
tab_linelist(linelist_clean, sex)
# describe prenancy statistics, but remove missing data from the tally
tab_linelist(linelist_clean, trimester, na.rm = TRUE)
# describe by symptom
tab_linelist(linelist_clean,
cough, nasal_discharge, severe_oral_lesions,
transpose = "value"
)
# describe prenancy statistics, stratifying by vitamin A perscription
tab_linelist(linelist_clean, trimester, sex,
strata = prescribed_vitamin_a,
na.rm = TRUE, row_total = TRUE
)
})
}
#> > linelist <- epidict::gen_data("Measles", numcases = 1000, org = "MSF")
#> > measles_dict <- epidict::msf_dict("Measles", compact = FALSE)
#> > linelist_clean <- matchmaker::match_df(x = linelist, dictionary = measles_dict,
#> + from = "option_code", to = "option_name", by = "data_element_shortname", order = "option_order_in_set")
#> > tab_linelist(linelist_clean, sex)
#> # A tibble: 3 × 4
#> variable value n proportion
#> <chr> <chr> <int> <dbl>
#> 1 sex Male 318 31.8
#> 2 sex Female 354 35.4
#> 3 sex Unknown/unspecified 328 32.8
#> > tab_linelist(linelist_clean, trimester, na.rm = TRUE)
#> Warning: Removing 911 missing values
#> # A tibble: 3 × 4
#> variable value n proportion
#> <chr> <chr> <int> <dbl>
#> 1 trimester 1st trimester 35 39.3
#> 2 trimester 2nd trimester 20 22.5
#> 3 trimester 3rd trimester 34 38.2
#> > tab_linelist(linelist_clean, cough, nasal_discharge, severe_oral_lesions,
#> + transpose = "value")
#> # A tibble: 3 × 5
#> variable `Yes n` `Yes proportion` `No n` `No proportion`
#> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 cough 545 54.5 455 45.5
#> 2 nasal_discharge 507 50.7 493 49.3
#> 3 severe_oral_lesions 483 48.3 517 51.7
#> > tab_linelist(linelist_clean, trimester, sex, strata = prescribed_vitamin_a,
#> + na.rm = TRUE, row_total = TRUE)
#> Warning: Removing 911 missing values
#> # A tibble: 6 × 7
#> variable value `Yes n` `Yes proportion` `No n` No propo…¹ Total
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 trimester 1st trimester 22 44 13 33.3 35
#> 2 trimester 2nd trimester 11 22 9 23.1 20
#> 3 trimester 3rd trimester 17 34 17 43.6 34
#> 4 sex Male 138 28.8 180 34.5 318
#> 5 sex Female 174 36.3 180 34.5 354
#> 6 sex Unknown/unspecified 167 34.9 161 30.9 328
#> # … with abbreviated variable name ¹`No proportion`
have_survey_packages <- require("survey") && require("srvyr")
#> Loading required package: survey
#> Loading required package: grid
#> Loading required package: Matrix
#> Loading required package: survival
#>
#> Attaching package: ‘survey’
#> The following object is masked from ‘package:graphics’:
#>
#> dotchart
#> Loading required package: srvyr
#>
#> Attaching package: ‘srvyr’
#> The following object is masked from ‘package:stats’:
#>
#> filter
if (have_survey_packages) {
withAutoprint({
data(api)
# stratified sample
surv <- apistrat %>%
as_survey_design(strata = stype, weights = pw)
s <- surv %>%
tab_survey(awards, strata = stype, col_total = TRUE, row_total = TRUE, deff = TRUE)
s
# making things pretty
s %>%
# wrap all "n" variables in braces (note space before n).
epikit::augment_redundant(" (n)" = " n") %>%
# relabel all columns containing "prop" to "% (95% CI)"
epikit::rename_redundant(
"% (95% CI)" = ci,
"Design Effect" = deff
)
# long data
surv %>%
tab_survey(awards, strata = stype, wide = FALSE)
# tabulate binary variables
surv %>%
tab_survey(yr.rnd, sch.wide, awards, keep = "Yes")
# stratify the binary variables
surv %>%
tab_survey(yr.rnd, sch.wide, awards,
strata = stype,
keep = "Yes"
)
# invert the tabulation
surv %>%
tab_survey(yr.rnd, sch.wide, awards,
strata = stype,
drop = "Yes",
deff = TRUE,
row_total = TRUE
)
})
}
#> > data(api)
#> > surv <- apistrat %>% as_survey_design(strata = stype, weights = pw)
#> > s <- surv %>% tab_survey(awards, strata = stype, col_total = TRUE, row_total = TRUE,
#> + deff = TRUE)
#> > s
#> # A tibble: 3 × 12
#> variable value `E n` `E ci` E def…¹ `H n` `H ci` H def…² `M n` `M ci` M def…³
#> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 awards No 1194. 27.0% … 0.768 513. 68.0%… 0.388 529. 52.0%… 0.534
#> 2 awards Yes 3227. 73.0% … 0.319 242. 32.0%… 0.308 489. 48.0%… 0.319
#> 3 awards Total 4421. NA (NA… NA 755. NA (N… NA 1018. NA (N… NA
#> # … with 1 more variable: `Total n` <dbl>, and abbreviated variable names
#> # ¹`E deff`, ²`H deff`, ³`M deff`
#> > s %>% epikit::augment_redundant(` (n)` = " n") %>% epikit::rename_redundant(`% (95% CI)` = ci,
#> + `Design Effect` = deff)
#> # A tibble: 3 × 12
#> variable value `E (n)` % (95…¹ Desig…² `H (n)` % (95…³ Desig…⁴ `M (n)` % (95…⁵
#> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
#> 1 awards No 1194. 27.0% … 0.768 513. 68.0% … 0.388 529. 52.0% …
#> 2 awards Yes 3227. 73.0% … 0.319 242. 32.0% … 0.308 489. 48.0% …
#> 3 awards Total 4421. NA (NA… NA 755. NA (NA… NA 1018. NA (NA…
#> # … with 2 more variables: `Design Effect` <dbl>, `Total (n)` <dbl>, and
#> # abbreviated variable names ¹`% (95% CI)`, ²`Design Effect`, ³`% (95% CI)`,
#> # ⁴`Design Effect`, ⁵`% (95% CI)`
#> > surv %>% tab_survey(awards, strata = stype, wide = FALSE)
#> # A tibble: 6 × 5
#> # Groups: value [2]
#> variable value stype n ci
#> <chr> <chr> <fct> <dbl> <chr>
#> 1 awards No E 1194. 27.0% (19.1--36.7)
#> 2 awards No H 513. 68.0% (53.5--79.7)
#> 3 awards No M 529. 52.0% (37.9--65.8)
#> 4 awards Yes E 3227. 73.0% (63.3--80.9)
#> 5 awards Yes H 242. 32.0% (20.3--46.5)
#> 6 awards Yes M 489. 48.0% (34.2--62.1)
#> > surv %>% tab_survey(yr.rnd, sch.wide, awards, keep = "Yes")
#> # A tibble: 3 × 4
#> variable value n ci
#> <chr> <chr> <dbl> <chr>
#> 1 yr.rnd Yes 852. 13.7% (9.1--20.3)
#> 2 sch.wide Yes 5128. 82.8% (77.4--87.1)
#> 3 awards Yes 3958. 63.9% (56.8--70.5)
#> > surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, keep = "Yes")
#> # A tibble: 3 × 8
#> variable value `E n` `E ci` `H n` `H ci` `M n` `M ci`
#> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr>
#> 1 yr.rnd Yes 796. 18.0% (11.6--26.9) 15.1 2.0% (0.3--13.7) 40.7 4.0% (…
#> 2 sch.wide Yes 4023. 91.0% (83.4--95.3) 393. 52.0% (37.9--65.8) 713. 70.0% …
#> 3 awards Yes 3227. 73.0% (63.3--80.9) 242. 32.0% (20.3--46.5) 489. 48.0% …
#> > surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, drop = "Yes",
#> + deff = TRUE, row_total = TRUE)
#> # A tibble: 3 × 12
#> variable value `E n` `E ci` E def…¹ `H n` `H ci` H def…² `M n` `M ci` M def…³
#> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 yr.rnd No 3625. 82.0% … 0.103 740. 98.0%… 0.0402 977. 96.0%… 0.0658
#> 2 sch.wide No 398. 9.0% (… 1.37 362. 48.0%… 0.704 305. 30.0%… 0.811
#> 3 awards No 1194. 27.0% … 0.768 513. 68.0%… 0.388 529. 52.0%… 0.534
#> # … with 1 more variable: `Total n` <dbl>, and abbreviated variable names
#> # ¹`E deff`, ²`H deff`, ³`M deff`