Based on a dictionary generator like msf_dict() or msf_dict_survey(), this function will generate a randomized data set based on values defined in the dictionaries. The randomized dataset produced should mimic an excel export from DHIS2 for outbreaks and a Kobo export for surveys.

gen_data(
  dictionary,
  varnames = "data_element_shortname",
  numcases = 300,
  org = "MSF"
)

Arguments

dictionary

Specify which dictionary you would like to use.

varnames

Specify name of column that contains variable names. If dictionary is a survey, varnames needs to be "column_name"`.

numcases

Specify the number of cases you want (default is 300)

org

the organization the dictionary belongs to. Currently, only MSF exists. In the future, dictionaries from WHO and other organizations may become available.

Value

a data frame with cases in rows and varaibles in columns. The number of columns will vary from dictionary to ditctionary, so please use the dictionary functions to generate a corresponding dictionary.

Examples

if (require("dplyr") & require("matchmaker")) { withAutoprint({ # You will often want to use MSF dictionaries to translate codes to human- # readable variables. Here, we generate a data set of 20 cases: dat <- gen_data( dictionary = "Cholera", varnames = "data_element_shortname", numcases = 20, org = "MSF" ) print(dat) # We want the expanded dictionary, so we will select `compact = FALSE` dict <- msf_dict(disease = "Cholera", long = TRUE, compact = FALSE, tibble = TRUE) print(dict) # Now we can use matchmaker to filter the data: dat_clean <- matchmaker::match_df(dat, dict, from = "option_code", to = "option_name", by = "data_element_shortname", order = "option_order_in_set" ) print(dat_clean) }) }
#> Loading required package: dplyr
#> #> Attaching package: ‘dplyr’
#> The following objects are masked from ‘package:stats’: #> #> filter, lag
#> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union
#> Loading required package: matchmaker
#> > dat <- gen_data(dictionary = "Cholera", varnames = "data_element_shortname", #> + numcases = 20, org = "MSF") #> > print(dat) #> # A tibble: 20 x 45 #> trimester exit_status treatment_facil… dehydration_lev… comments_on_lab… #> <fct> <fct> <lgl> <fct> <lgl> #> 1 NA DOA NA NO NA #> 2 NA AD NA UN NA #> 3 NA AD NA UN NA #> 4 NA LA NA SO NA #> 5 NA AD NA SE NA #> 6 NA DD NA NO NA #> 7 NA TR NA SO NA #> 8 NA LA NA SO NA #> 9 2 DD NA SE NA #> 10 NA DOA NA SO NA #> 11 NA DH NA NO NA #> 12 NA TR NA NO NA #> 13 NA DOA NA SE NA #> 14 NA AD NA SO NA #> 15 NA TR NA NO NA #> 16 NA DH NA UN NA #> 17 NA TR NA NO NA #> 18 3 DD NA UN NA #> 19 3 AD NA UN NA #> 20 NA DD NA SE NA #> # … with 40 more variables: fluids_treatment_plan <fct>, time_to_death <fct>, #> # malaria_rdt_at_admission <fct>, previously_vaccinated <fct>, #> # cholera_pcr_result <fct>, iv_fluids_received_litres <int>, #> # patient_origin_free_text <chr>, previous_vaccine_doses_received <fct>, #> # age_years <int>, arrival_date_in_area_if_3m <date>, age_days <int>, #> # msf_involvement <fct>, date_lab_sample_taken <date>, readmission <fct>, #> # age_months <int>, date_of_last_vaccination <date>, case_number <chr>, #> # sex <fct>, patient_origin <chr>, prescribed_zinc_supplement <fct>, #> # delivery_event <fct>, pregnancy_outcome_at_exit <fct>, date_of_exit <date>, #> # date_of_consultation_admission <date>, oedema <fct>, event_file_type <fct>, #> # residential_status_brief <fct>, pregnant <fct>, cholera_rdt_result <fct>, #> # dehydration_severity_during_stay <fct>, cholera_culture_result <fct>, #> # foetus_alive_at_admission <fct>, treatment_facility_site <int>, #> # prescribed_antibiotics <fct>, cholera_referred_to <fct>, #> # ors_consumed_litres <int>, treatment_location <chr>, #> # cholera_referred_from <fct>, cholera_treatment_facility_type <fct>, #> # date_of_onset <date> #> > dict <- msf_dict(disease = "Cholera", long = TRUE, compact = FALSE, tibble = TRUE) #> > print(dict) #> # A tibble: 182 x 11 #> data_element_uid data_element_na… data_element_sh… data_element_de… #> <chr> <chr> <chr> <chr> #> 1 FF7d81Zy0yQ egen_013_pregna… trimester If pregnant, tr… #> 2 FF7d81Zy0yQ egen_013_pregna… trimester If pregnant, tr… #> 3 FF7d81Zy0yQ egen_013_pregna… trimester If pregnant, tr… #> 4 ADfNqpCL5kf egen_015_exit_s… exit_status Final status of… #> 5 ADfNqpCL5kf egen_015_exit_s… exit_status Final status of… #> 6 ADfNqpCL5kf egen_015_exit_s… exit_status Final status of… #> 7 ADfNqpCL5kf egen_015_exit_s… exit_status Final status of… #> 8 ADfNqpCL5kf egen_015_exit_s… exit_status Final status of… #> 9 ADfNqpCL5kf egen_015_exit_s… exit_status Final status of… #> 10 wjCDTwXmtix egen_064_treatm… treatment_facil… Name of facilit… #> # … with 172 more rows, and 7 more variables: data_element_valuetype <chr>, #> # data_element_formname <chr>, used_optionset_uid <chr>, option_code <chr>, #> # option_name <chr>, option_uid <chr>, option_order_in_set <dbl> #> > dat_clean <- matchmaker::match_df(dat, dict, from = "option_code", to = "option_name", #> + by = "data_element_shortname", order = "option_order_in_set") #> > print(dat_clean) #> # A tibble: 20 x 45 #> trimester exit_status treatment_facil… dehydration_lev… comments_on_lab… #> <fct> <fct> <lgl> <fct> <lgl> #> 1 NA Dead on ar… NA None NA #> 2 NA Transferre… NA Unknown NA #> 3 NA Transferre… NA Unknown NA #> 4 NA Left again… NA Some NA #> 5 NA Transferre… NA Severe NA #> 6 NA Dead in fa… NA None NA #> 7 NA Transferre… NA Some NA #> 8 NA Left again… NA Some NA #> 9 2nd trim… Dead in fa… NA Severe NA #> 10 NA Dead on ar… NA Some NA #> 11 NA Discharged… NA None NA #> 12 NA Transferre… NA None NA #> 13 NA Dead on ar… NA Severe NA #> 14 NA Transferre… NA Some NA #> 15 NA Transferre… NA None NA #> 16 NA Discharged… NA Unknown NA #> 17 NA Transferre… NA None NA #> 18 3rd trim… Dead in fa… NA Unknown NA #> 19 3rd trim… Transferre… NA Unknown NA #> 20 NA Dead in fa… NA Severe NA #> # … with 40 more variables: fluids_treatment_plan <fct>, time_to_death <fct>, #> # malaria_rdt_at_admission <fct>, previously_vaccinated <fct>, #> # cholera_pcr_result <fct>, iv_fluids_received_litres <int>, #> # patient_origin_free_text <chr>, previous_vaccine_doses_received <fct>, #> # age_years <int>, arrival_date_in_area_if_3m <date>, age_days <int>, #> # msf_involvement <fct>, date_lab_sample_taken <date>, readmission <fct>, #> # age_months <int>, date_of_last_vaccination <date>, case_number <chr>, #> # sex <fct>, patient_origin <chr>, prescribed_zinc_supplement <fct>, #> # delivery_event <fct>, pregnancy_outcome_at_exit <fct>, date_of_exit <date>, #> # date_of_consultation_admission <date>, oedema <fct>, event_file_type <fct>, #> # residential_status_brief <fct>, pregnant <fct>, cholera_rdt_result <fct>, #> # dehydration_severity_during_stay <fct>, cholera_culture_result <fct>, #> # foetus_alive_at_admission <fct>, treatment_facility_site <chr>, #> # prescribed_antibiotics <fct>, cholera_referred_to <fct>, #> # ors_consumed_litres <int>, treatment_location <chr>, #> # cholera_referred_from <fct>, cholera_treatment_facility_type <fct>, #> # date_of_onset <date>