Creates weight based on dividing stratified population counts from the source population by surveyed counts in the sample population.

add_weights_strata(
  x,
  p,
  ...,
  population = population,
  surv_weight = "surv_weight",
  surv_weight_ID = "surv_weight_ID"
)

Arguments

x

a data frame of survey data

p

a data frame containing population data for groups in ...

...

shared grouping columns across both x and p. These are used to match the weights to the correct subset of the population.

population

the column in p that defines the population numbers

surv_weight

the name of the new column to store the weights. Defaults to "surv_weight".

surv_weight_ID

the name of the new ID column to be created. Defaults to "surv_weight_ID"

Author

Zhian N. Kamvar Alex Spina Lukas Richter

Examples


# define a fake dataset of survey data
# including household and individual information
x <- data.frame(stringsAsFactors=FALSE,
         cluster = c("Village A", "Village A", "Village A", "Village A",
                     "Village A", "Village B", "Village B", "Village B"),
    household_id = c(1, 1, 1, 1, 2, 2, 2, 2),
     eligibile_n = c(6, 6, 6, 6, 6, 3, 3, 3),
      surveyed_n = c(4, 4, 4, 4, 4, 3, 3, 3),
   individual_id = c(1, 2, 3, 4, 4, 1, 2, 3),
         age_grp = c("0-10", "20-30", "30-40", "50-60", "50-60", "20-30",
                     "50-60", "30-40"),
             sex = c("Male", "Female", "Male", "Female", "Female", "Male",
                     "Female", "Female"),
         outcome = c("Y", "Y", "N", "N", "N", "N", "N", "Y")
)

# define a fake population data set
# including age group, sex, counts and proportions
p <- epikit::gen_population(total = 10000,
  groups = c("0-10", "10-20", "20-30", "30-40", "40-50", "50-60"),
  proportions = c(0.1, 0.2, 0.3, 0.4, 0.2, 0.1))
#> Warning: Given proportions (or counts) is not the same as
#> groups multiplied by strata length, they will be repeated to match
#> Warning: The sum of given proportions is more than 5% away from 100% 
#> If the intention was to provide proportions by strata then ignore this message

  # make sure col names match survey dataset
p <- dplyr::rename(p, age_grp = groups, sex = strata, population = n)

# add weights to a stratified simple random sample
# weight based on age group and sex
add_weights_strata(x, p = p, age_grp, sex, population = population)
#>     cluster household_id eligibile_n surveyed_n individual_id age_grp    sex
#> 1 Village A            1           6          4             1    0-10   Male
#> 2 Village A            1           6          4             2   20-30 Female
#> 3 Village A            1           6          4             3   30-40   Male
#> 4 Village A            1           6          4             4   50-60 Female
#> 5 Village A            2           6          4             4   50-60 Female
#> 6 Village B            2           3          3             1   20-30   Male
#> 7 Village B            2           3          3             2   50-60 Female
#> 8 Village B            2           3          3             3   30-40 Female
#>   outcome surv_weight_ID surv_weight
#> 1       Y      0-10_Male   1000.0000
#> 2       Y   20-30_Female   3000.0000
#> 3       N     30-40_Male   4000.0000
#> 4       N   50-60_Female    333.3333
#> 5       N   50-60_Female    333.3333
#> 6       N     20-30_Male   3000.0000
#> 7       N   50-60_Female    333.3333
#> 8       Y   30-40_Female   4000.0000