Tabulate counts and proportions
tab_linelist(
x,
...,
strata = NULL,
keep = TRUE,
drop = NULL,
na.rm = TRUE,
prop_total = FALSE,
row_total = FALSE,
col_total = FALSE,
wide = TRUE,
transpose = NULL,
digits = 1,
pretty = TRUE
)
tab_survey(
x,
...,
strata = NULL,
keep = TRUE,
drop = NULL,
na.rm = TRUE,
prop_total = FALSE,
row_total = FALSE,
col_total = FALSE,
wide = TRUE,
transpose = NULL,
digits = 1,
method = "logit",
deff = FALSE,
pretty = TRUE
)
a data.frame()
or tbl_svy object
categorical variables to tabulate
a stratifier to split the data
a character vector specifying which values to retain in the
tabulation. Defaults to TRUE
, which keeps all the values.
a character vector specifying which values to drop in the
tabulation. Defaults to NULL
, which keeps all values.
When TRUE
(default), missing (NA) values present in var
will be removed from the data set with a warning, causing a change in
denominator for the tabulations. Setting this to FALSE
creates an
explicit missing value called "(Missing)".
if TRUE
and strata
is not NULL
, then the totals of the
rows will be reported as proportions of the total data set, otherwise, they
will be proportions within the stratum (default).
create a new column with the total counts for each row of stratified data.
create a new row with the total counts for each column of stratified data.
if TRUE
(default) and strata is defined, then the results are
presented in a wide table with each stratification counts and estimates in
separate columns. If FALSE
, then the data will be presented in a long
format where the counts and estimates are presented in single columns. This
has no effect if strata is not defined.
if wide = TRUE
, then this will transpose the columns to
the rows, which is useful when you stratify by age group. Default is
NULL
, which will not transpose anything. You have three options for
transpose:
transpose = "variable"
: uses the variable column, (dropping values if strata exists).
Use this if you know that your values are all identical or at least
identifiable by the variable name.
transpose = "value"
: uses the value column, (dropping variables if strata exists).
Use this if your values are important and the variable names are
generic placeholders.
transpose = "both"
: combines the variable and value columns.
Use this if both the variables and values are important.
(survey only) if pretty = FALSE
, this indicates the number
of digits used for proportion and CI
(survey only) if TRUE
, default, the proportion and CI are merged
(survey only) a method from survey::svyciprop()
to calculate
the confidence interval. Defaults to "logit".
a logical indicating if the design effect should be reported.
Defaults to TRUE
.
a tibble::tibble()
with a column for variables, a column for values,
and counts and proportions. If strata
is not NULL
and wide = TRUE
,
then there will be separate columns for each strata for the counts and
proportions. Survey data will report confidence intervals.
have_packages <- require("matchmaker") & require("epidict")
#> Loading required package: matchmaker
#> Loading required package: epidict
if (have_packages) {
withAutoprint({
# Simulating linelist data
linelist <- epidict::gen_data("Measles", numcases = 1000, org = "MSF")
measles_dict <- epidict::msf_dict("Measles", compact = FALSE)
# Cleaning linelist data
linelist_clean <- matchmaker::match_df(
x = linelist,
dictionary = measles_dict,
from = "option_code",
to = "option_name",
by = "data_element_shortname",
order = "option_order_in_set"
)
# get a descriptive table by sex
tab_linelist(linelist_clean, sex)
# describe prenancy statistics, but remove missing data from the tally
tab_linelist(linelist_clean, trimester, na.rm = TRUE)
# describe by symptom
tab_linelist(linelist_clean,
cough, nasal_discharge, severe_oral_lesions,
transpose = "value"
)
# describe prenancy statistics, stratifying by vitamin A perscription
tab_linelist(linelist_clean, trimester, sex,
strata = prescribed_vitamin_a,
na.rm = TRUE, row_total = TRUE
)
})
}
#> > linelist <- epidict::gen_data("Measles", numcases = 1000, org = "MSF")
#> > measles_dict <- epidict::msf_dict("Measles", compact = FALSE)
#> > linelist_clean <- matchmaker::match_df(x = linelist, dictionary = measles_dict,
#> + from = "option_code", to = "option_name", by = "data_element_shortname", order = "option_order_in_set")
#> > tab_linelist(linelist_clean, sex)
#> # A tibble: 3 × 4
#> variable value n proportion
#> <chr> <chr> <int> <dbl>
#> 1 sex Male 318 31.8
#> 2 sex Female 354 35.4
#> 3 sex Unknown/unspecified 328 32.8
#> > tab_linelist(linelist_clean, trimester, na.rm = TRUE)
#> Warning: Removing 911 missing values
#> # A tibble: 3 × 4
#> variable value n proportion
#> <chr> <chr> <int> <dbl>
#> 1 trimester 1st trimester 35 39.3
#> 2 trimester 2nd trimester 20 22.5
#> 3 trimester 3rd trimester 34 38.2
#> > tab_linelist(linelist_clean, cough, nasal_discharge, severe_oral_lesions,
#> + transpose = "value")
#> # A tibble: 3 × 5
#> variable `Yes n` `Yes proportion` `No n` `No proportion`
#> <fct> <dbl> <dbl> <dbl> <dbl>
#> 1 cough 545 54.5 455 45.5
#> 2 nasal_discharge 507 50.7 493 49.3
#> 3 severe_oral_lesions 483 48.3 517 51.7
#> > tab_linelist(linelist_clean, trimester, sex, strata = prescribed_vitamin_a,
#> + na.rm = TRUE, row_total = TRUE)
#> Warning: Removing 911 missing values
#> # A tibble: 6 × 7
#> variable value `Yes n` `Yes proportion` `No n` No propo…¹ Total
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 trimester 1st trimester 22 44 13 33.3 35
#> 2 trimester 2nd trimester 11 22 9 23.1 20
#> 3 trimester 3rd trimester 17 34 17 43.6 34
#> 4 sex Male 138 28.8 180 34.5 318
#> 5 sex Female 174 36.3 180 34.5 354
#> 6 sex Unknown/unspecified 167 34.9 161 30.9 328
#> # … with abbreviated variable name ¹`No proportion`
have_survey_packages <- require("survey") && require("srvyr")
#> Loading required package: survey
#> Loading required package: grid
#> Loading required package: Matrix
#> Loading required package: survival
#>
#> Attaching package: ‘survey’
#> The following object is masked from ‘package:graphics’:
#>
#> dotchart
#> Loading required package: srvyr
#>
#> Attaching package: ‘srvyr’
#> The following object is masked from ‘package:stats’:
#>
#> filter
if (have_survey_packages) {
withAutoprint({
data(api)
# stratified sample
surv <- apistrat %>%
as_survey_design(strata = stype, weights = pw)
s <- surv %>%
tab_survey(awards, strata = stype, col_total = TRUE, row_total = TRUE, deff = TRUE)
s
# making things pretty
s %>%
# wrap all "n" variables in braces (note space before n).
epikit::augment_redundant(" (n)" = " n") %>%
# relabel all columns containing "prop" to "% (95% CI)"
epikit::rename_redundant(
"% (95% CI)" = ci,
"Design Effect" = deff
)
# long data
surv %>%
tab_survey(awards, strata = stype, wide = FALSE)
# tabulate binary variables
surv %>%
tab_survey(yr.rnd, sch.wide, awards, keep = "Yes")
# stratify the binary variables
surv %>%
tab_survey(yr.rnd, sch.wide, awards,
strata = stype,
keep = "Yes"
)
# invert the tabulation
surv %>%
tab_survey(yr.rnd, sch.wide, awards,
strata = stype,
drop = "Yes",
deff = TRUE,
row_total = TRUE
)
})
}
#> > data(api)
#> > surv <- apistrat %>% as_survey_design(strata = stype, weights = pw)
#> > s <- surv %>% tab_survey(awards, strata = stype, col_total = TRUE, row_total = TRUE,
#> + deff = TRUE)
#> > s
#> # A tibble: 3 × 12
#> variable value `E n` `E ci` E def…¹ `H n` `H ci` H def…² `M n` `M ci` M def…³
#> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 awards No 1194. 27.0% … 0.768 513. 68.0%… 0.388 529. 52.0%… 0.534
#> 2 awards Yes 3227. 73.0% … 0.319 242. 32.0%… 0.308 489. 48.0%… 0.319
#> 3 awards Total 4421. NA (NA… NA 755. NA (N… NA 1018. NA (N… NA
#> # … with 1 more variable: `Total n` <dbl>, and abbreviated variable names
#> # ¹`E deff`, ²`H deff`, ³`M deff`
#> > s %>% epikit::augment_redundant(` (n)` = " n") %>% epikit::rename_redundant(`% (95% CI)` = ci,
#> + `Design Effect` = deff)
#> # A tibble: 3 × 12
#> variable value `E (n)` % (95…¹ Desig…² `H (n)` % (95…³ Desig…⁴ `M (n)` % (95…⁵
#> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
#> 1 awards No 1194. 27.0% … 0.768 513. 68.0% … 0.388 529. 52.0% …
#> 2 awards Yes 3227. 73.0% … 0.319 242. 32.0% … 0.308 489. 48.0% …
#> 3 awards Total 4421. NA (NA… NA 755. NA (NA… NA 1018. NA (NA…
#> # … with 2 more variables: `Design Effect` <dbl>, `Total (n)` <dbl>, and
#> # abbreviated variable names ¹`% (95% CI)`, ²`Design Effect`, ³`% (95% CI)`,
#> # ⁴`Design Effect`, ⁵`% (95% CI)`
#> > surv %>% tab_survey(awards, strata = stype, wide = FALSE)
#> # A tibble: 6 × 5
#> # Groups: value [2]
#> variable value stype n ci
#> <chr> <chr> <fct> <dbl> <chr>
#> 1 awards No E 1194. 27.0% (19.1--36.7)
#> 2 awards No H 513. 68.0% (53.5--79.7)
#> 3 awards No M 529. 52.0% (37.9--65.8)
#> 4 awards Yes E 3227. 73.0% (63.3--80.9)
#> 5 awards Yes H 242. 32.0% (20.3--46.5)
#> 6 awards Yes M 489. 48.0% (34.2--62.1)
#> > surv %>% tab_survey(yr.rnd, sch.wide, awards, keep = "Yes")
#> # A tibble: 3 × 4
#> variable value n ci
#> <chr> <chr> <dbl> <chr>
#> 1 yr.rnd Yes 852. 13.7% (9.1--20.3)
#> 2 sch.wide Yes 5128. 82.8% (77.4--87.1)
#> 3 awards Yes 3958. 63.9% (56.8--70.5)
#> > surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, keep = "Yes")
#> # A tibble: 3 × 8
#> variable value `E n` `E ci` `H n` `H ci` `M n` `M ci`
#> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr>
#> 1 yr.rnd Yes 796. 18.0% (11.6--26.9) 15.1 2.0% (0.3--13.7) 40.7 4.0% (…
#> 2 sch.wide Yes 4023. 91.0% (83.4--95.3) 393. 52.0% (37.9--65.8) 713. 70.0% …
#> 3 awards Yes 3227. 73.0% (63.3--80.9) 242. 32.0% (20.3--46.5) 489. 48.0% …
#> > surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, drop = "Yes",
#> + deff = TRUE, row_total = TRUE)
#> # A tibble: 3 × 12
#> variable value `E n` `E ci` E def…¹ `H n` `H ci` H def…² `M n` `M ci` M def…³
#> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl>
#> 1 yr.rnd No 3625. 82.0% … 0.103 740. 98.0%… 0.0402 977. 96.0%… 0.0658
#> 2 sch.wide No 398. 9.0% (… 1.37 362. 48.0%… 0.704 305. 30.0%… 0.811
#> 3 awards No 1194. 27.0% … 0.768 513. 68.0%… 0.388 529. 52.0%… 0.534
#> # … with 1 more variable: `Total n` <dbl>, and abbreviated variable names
#> # ¹`E deff`, ²`H deff`, ³`M deff`