Tabulate counts and proportions

tab_linelist(
  x,
  ...,
  strata = NULL,
  keep = TRUE,
  drop = NULL,
  na.rm = TRUE,
  prop_total = FALSE,
  row_total = FALSE,
  col_total = FALSE,
  wide = TRUE,
  transpose = NULL,
  digits = 1,
  pretty = TRUE
)

tab_survey(
  x,
  ...,
  strata = NULL,
  keep = TRUE,
  drop = NULL,
  na.rm = TRUE,
  prop_total = FALSE,
  row_total = FALSE,
  col_total = FALSE,
  wide = TRUE,
  transpose = NULL,
  digits = 1,
  method = "logit",
  deff = FALSE,
  pretty = TRUE
)

Arguments

x

a data.frame() or tbl_svy object

...

categorical variables to tabulate

strata

a stratifier to split the data

keep

a character vector specifying which values to retain in the tabulation. Defaults to TRUE, which keeps all the values.

drop

a character vector specifying which values to drop in the tabulation. Defaults to NULL, which keeps all values.

na.rm

When TRUE (default), missing (NA) values present in var will be removed from the data set with a warning, causing a change in denominator for the tabulations. Setting this to FALSE creates an explicit missing value called "(Missing)".

prop_total

if TRUE and strata is not NULL, then the totals of the rows will be reported as proportions of the total data set, otherwise, they will be proportions within the stratum (default).

row_total

create a new column with the total counts for each row of stratified data.

col_total

create a new row with the total counts for each column of stratified data.

wide

if TRUE (default) and strata is defined, then the results are presented in a wide table with each stratification counts and estimates in separate columns. If FALSE, then the data will be presented in a long format where the counts and estimates are presented in single columns. This has no effect if strata is not defined.

transpose

if wide = TRUE, then this will transpose the columns to the rows, which is useful when you stratify by age group. Default is NULL, which will not transpose anything. You have three options for transpose:

  • transpose = "variable": uses the variable column, (dropping values if strata exists). Use this if you know that your values are all identical or at least identifiable by the variable name.

  • transpose = "value" : uses the value column, (dropping variables if strata exists). Use this if your values are important and the variable names are generic placeholders.

  • transpose = "both" : combines the variable and value columns. Use this if both the variables and values are important.

digits

(survey only) if pretty = FALSE, this indicates the number of digits used for proportion and CI

pretty

(survey only) if TRUE, default, the proportion and CI are merged

method

(survey only) a method from survey::svyciprop() to calculate the confidence interval. Defaults to "logit".

deff

a logical indicating if the design effect should be reported. Defaults to TRUE.

Value

a tibble::tibble() with a column for variables, a column for values, and counts and proportions. If strata is not NULL and wide = TRUE, then there will be separate columns for each strata for the counts and proportions. Survey data will report confidence intervals.

Examples

have_packages <- require("matchmaker") & require("epidict")
#> Loading required package: matchmaker
#> Loading required package: epidict
if (have_packages) { withAutoprint({ # Simulating linelist data linelist <- epidict::gen_data("Measles", numcases = 1000, org = "MSF") measles_dict <- epidict::msf_dict("Measles", compact = FALSE) # Cleaning linelist data linelist_clean <- matchmaker::match_df( x = linelist, dictionary = measles_dict, from = "option_code", to = "option_name", by = "data_element_shortname", order = "option_order_in_set" ) # get a descriptive table by sex tab_linelist(linelist_clean, sex) # describe prenancy statistics, but remove missing data from the tally tab_linelist(linelist_clean, trimester, na.rm = TRUE) # describe by symptom tab_linelist(linelist_clean, cough, nasal_discharge, severe_oral_lesions, transpose = "value" ) # describe prenancy statistics, stratifying by vitamin A perscription tab_linelist(linelist_clean, trimester, sex, strata = prescribed_vitamin_a, na.rm = TRUE, row_total = TRUE ) }) }
#> > linelist <- epidict::gen_data("Measles", numcases = 1000, org = "MSF") #> > measles_dict <- epidict::msf_dict("Measles", compact = FALSE) #> > linelist_clean <- matchmaker::match_df(x = linelist, dictionary = measles_dict, #> + from = "option_code", to = "option_name", by = "data_element_shortname", order = "option_order_in_set") #> > tab_linelist(linelist_clean, sex) #> # A tibble: 3 x 4 #> variable value n proportion #> <chr> <chr> <dbl> <dbl> #> 1 sex Male 337 33.7 #> 2 sex Female 326 32.6 #> 3 sex Unknown/unspecified 337 33.7 #> > tab_linelist(linelist_clean, trimester, na.rm = TRUE)
#> Warning: Removing 919 missing values
#> # A tibble: 3 x 4 #> variable value n proportion #> <chr> <chr> <dbl> <dbl> #> 1 trimester 1st trimester 21 25.9 #> 2 trimester 2nd trimester 28 34.6 #> 3 trimester 3rd trimester 32 39.5 #> > tab_linelist(linelist_clean, cough, nasal_discharge, severe_oral_lesions, #> + transpose = "value") #> # A tibble: 3 x 5 #> variable `Yes n` `Yes proportion` `No n` `No proportion` #> <fct> <dbl> <dbl> <dbl> <dbl> #> 1 cough 511 51.1 489 48.9 #> 2 nasal_discharge 488 48.8 512 51.2 #> 3 severe_oral_lesions 509 50.9 491 49.1 #> > tab_linelist(linelist_clean, trimester, sex, strata = prescribed_vitamin_a, #> + na.rm = TRUE, row_total = TRUE)
#> Warning: Removing 919 missing values
#> # A tibble: 6 x 7 #> variable value `Yes n` `Yes proportion` `No n` `No proportion` Total #> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 trimester 1st trimester 11 25 10 27.0 21 #> 2 trimester 2nd trimester 14 31.8 14 37.8 28 #> 3 trimester 3rd trimester 19 43.2 13 35.1 32 #> 4 sex Male 176 34.1 161 33.3 337 #> 5 sex Female 171 33.1 155 32.0 326 #> 6 sex Unknown/unspe… 169 32.8 168 34.7 337
have_survey_packages <- require("survey") && require("srvyr")
#> Loading required package: survey
#> Loading required package: grid
#> Loading required package: Matrix
#> Loading required package: survival
#> #> Attaching package: ‘survey’
#> The following object is masked from ‘package:graphics’: #> #> dotchart
#> Loading required package: srvyr
#> #> Attaching package: ‘srvyr’
#> The following object is masked from ‘package:stats’: #> #> filter
if (have_survey_packages) { withAutoprint({ data(api) # stratified sample surv <- apistrat %>% as_survey_design(strata = stype, weights = pw) s <- surv %>% tab_survey(awards, strata = stype, col_total = TRUE, row_total = TRUE, deff = TRUE) s # making things pretty s %>% # wrap all "n" variables in braces (note space before n). epikit::augment_redundant(" (n)" = " n") %>% # relabel all columns containing "prop" to "% (95% CI)" epikit::rename_redundant( "% (95% CI)" = ci, "Design Effect" = deff ) # long data surv %>% tab_survey(awards, strata = stype, wide = FALSE) # tabulate binary variables surv %>% tab_survey(yr.rnd, sch.wide, awards, keep = "Yes") # stratify the binary variables surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, keep = "Yes" ) # invert the tabulation surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, drop = "Yes", deff = TRUE, row_total = TRUE ) }) }
#> > data(api) #> > surv <- apistrat %>% as_survey_design(strata = stype, weights = pw) #> > s <- surv %>% tab_survey(awards, strata = stype, col_total = TRUE, row_total = TRUE, #> + deff = TRUE) #> > s #> # A tibble: 3 x 12 #> variable value `E n` `E ci` `E deff` `H n` `H ci` `H deff` `M n` `M ci` #> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> #> 1 awards No 1194. 27.0%… 0.768 513. 68.0%… 0.388 529. 52.0%… #> 2 awards Yes 3227. 73.0%… 0.319 242. 32.0%… 0.308 489. 48.0%… #> 3 awards Total 4421. NA (N… NA 755. NA (N… NA 1018. NA (N… #> # … with 2 more variables: `M deff` <dbl>, `Total n` <dbl> #> > s %>% epikit::augment_redundant(` (n)` = " n") %>% epikit::rename_redundant(`% (95% CI)` = ci, #> + `Design Effect` = deff) #> # A tibble: 3 x 12 #> variable value `E (n)` `% (95% CI)` `Design Effect` `H (n)` `% (95% CI)` #> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> #> 1 awards No 1194. 27.0% (19.1… 0.768 513. 68.0% (53.5… #> 2 awards Yes 3227. 73.0% (63.3… 0.319 242. 32.0% (20.3… #> 3 awards Total 4421. NA (NA--NA) NA 755. NA (NA--NA) #> # … with 5 more variables: `Design Effect` <dbl>, `M (n)` <dbl>, `% (95% #> # CI)` <chr>, `Design Effect` <dbl>, `Total (n)` <dbl> #> > surv %>% tab_survey(awards, strata = stype, wide = FALSE) #> # A tibble: 6 x 5 #> variable value stype n ci #> <chr> <chr> <fct> <dbl> <chr> #> 1 awards No E 1194. 27.0% (19.1--36.7) #> 2 awards No H 513. 68.0% (53.5--79.7) #> 3 awards No M 529. 52.0% (37.9--65.8) #> 4 awards Yes E 3227. 73.0% (63.3--80.9) #> 5 awards Yes H 242. 32.0% (20.3--46.5) #> 6 awards Yes M 489. 48.0% (34.2--62.1) #> > surv %>% tab_survey(yr.rnd, sch.wide, awards, keep = "Yes") #> # A tibble: 3 x 4 #> variable value n ci #> <chr> <chr> <dbl> <chr> #> 1 yr.rnd Yes 852. 13.7% (9.1--20.3) #> 2 sch.wide Yes 5128. 82.8% (77.4--87.1) #> 3 awards Yes 3958. 63.9% (56.8--70.5) #> > surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, keep = "Yes") #> # A tibble: 3 x 8 #> variable value `E n` `E ci` `H n` `H ci` `M n` `M ci` #> <chr> <chr> <dbl> <chr> <dbl> <chr> <dbl> <chr> #> 1 yr.rnd Yes 796. 18.0% (11.6--2… 15.1 2.0% (0.3--13… 40.7 4.0% (1.0--15… #> 2 sch.wide Yes 4023. 91.0% (83.4--9… 393. 52.0% (37.9--… 713. 70.0% (55.5--… #> 3 awards Yes 3227. 73.0% (63.3--8… 242. 32.0% (20.3--… 489. 48.0% (34.2--… #> > surv %>% tab_survey(yr.rnd, sch.wide, awards, strata = stype, drop = "Yes", #> + deff = TRUE, row_total = TRUE) #> # A tibble: 3 x 12 #> variable value `E n` `E ci` `E deff` `H n` `H ci` `H deff` `M n` `M ci` #> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> #> 1 yr.rnd No 3625. 82.0%… 0.103 740. 98.0%… 0.0402 977. 96.0%… #> 2 sch.wide No 398. 9.0% … 1.37 362. 48.0%… 0.704 305. 30.0%… #> 3 awards No 1194. 27.0%… 0.768 513. 68.0%… 0.388 529. 52.0%… #> # … with 2 more variables: `M deff` <dbl>, `Total n` <dbl>