R/find_date_cause.R
find_date_cause.Rd
This function will find the first date in an orderd series of columns that is either before or after a cutoff date, inclusive.
find_date_cause(
x,
...,
period_start = NULL,
period_end = NULL,
datecol = "start_date",
datereason = "start_date_reason",
na_fill = "start"
)
find_start_date(
x,
...,
period_start = NULL,
period_end = NULL,
datecol = "start_date",
datereason = "start_date_reason"
)
find_end_date(
x,
...,
period_start = NULL,
period_end = NULL,
datecol = "end_date",
datereason = "end_date_reason"
)
constrain_dates(i, period_start, period_end, boundary = "both")
assert_positive_timespan(x, date_start, date_end)
a data frame
an ordered series of date columns (i.e. the most important date to be considered first).
for the find_ functions, this should be the
name of a column in x
that contains the start/end of the recall period.
For constrain_dates
, this should be a vector of dates.
the name of the new column to contain the dates
the name of the column to contain the name of the column from which the date came.
one of either "before" or "after" indicating that the new column should only contain dates before or after the cutoff date.
a vector of dates
one of "both", "start", or "end". Dates outside of the boundary will be set to NA.
column name of a date vector
d <- data.frame(
s1 = c(as.Date("2013-01-01") + 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
s2 = c(as.Date("2013-02-01") + 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
s3 = c(as.Date("2013-01-10") - 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
ps = as.Date("2012-12-31"),
pe = as.Date("2013-01-09")
)
print(dd <- find_date_cause(d, s1, s2, s3, period_start = ps, period_end = pe))
#> start_date start_date_reason s1 s2 s3 ps
#> 1 2013-01-01 s1 2013-01-01 2013-02-01 2013-01-10 2012-12-31
#> 2 2013-01-02 s1 2013-01-02 2013-02-02 2013-01-09 2012-12-31
#> 3 2013-01-03 s1 2013-01-03 2013-02-03 2013-01-08 2012-12-31
#> 4 2013-01-04 s1 2013-01-04 2013-02-04 2013-01-07 2012-12-31
#> 5 2013-01-05 s1 2013-01-05 2013-02-05 2013-01-06 2012-12-31
#> 6 2013-01-06 s1 2013-01-06 2013-02-06 2013-01-05 2012-12-31
#> 7 2013-01-07 s1 2013-01-07 2013-02-07 2013-01-04 2012-12-31
#> 8 2013-01-08 s1 2013-01-08 2013-02-08 2013-01-03 2012-12-31
#> 9 2013-01-09 s1 2013-01-09 2013-02-09 2013-01-02 2012-12-31
#> 10 2013-01-01 s3 2013-01-10 2013-02-10 2013-01-01 2012-12-31
#> 11 2012-12-31 s3 2013-01-11 2013-02-11 2012-12-31 2012-12-31
#> 12 2012-12-31 s1 2012-01-01 2012-01-01 2012-01-01 2012-12-31
#> 13 <NA> 2014-01-01 2014-01-01 2014-01-01 2012-12-31
#> pe
#> 1 2013-01-09
#> 2 2013-01-09
#> 3 2013-01-09
#> 4 2013-01-09
#> 5 2013-01-09
#> 6 2013-01-09
#> 7 2013-01-09
#> 8 2013-01-09
#> 9 2013-01-09
#> 10 2013-01-09
#> 11 2013-01-09
#> 12 2013-01-09
#> 13 2013-01-09
print(bb <- find_date_cause(d, s1, s2, s3, period_start = ps, period_end = pe,
na_fill = "end",
datecol = "enddate",
datereason = "endcause"))
#> enddate endcause s1 s2 s3 ps pe
#> 1 2013-01-01 s1 2013-01-01 2013-02-01 2013-01-10 2012-12-31 2013-01-09
#> 2 2013-01-02 s1 2013-01-02 2013-02-02 2013-01-09 2012-12-31 2013-01-09
#> 3 2013-01-03 s1 2013-01-03 2013-02-03 2013-01-08 2012-12-31 2013-01-09
#> 4 2013-01-04 s1 2013-01-04 2013-02-04 2013-01-07 2012-12-31 2013-01-09
#> 5 2013-01-05 s1 2013-01-05 2013-02-05 2013-01-06 2012-12-31 2013-01-09
#> 6 2013-01-06 s1 2013-01-06 2013-02-06 2013-01-05 2012-12-31 2013-01-09
#> 7 2013-01-07 s1 2013-01-07 2013-02-07 2013-01-04 2012-12-31 2013-01-09
#> 8 2013-01-08 s1 2013-01-08 2013-02-08 2013-01-03 2012-12-31 2013-01-09
#> 9 2013-01-09 s1 2013-01-09 2013-02-09 2013-01-02 2012-12-31 2013-01-09
#> 10 2013-01-09 s1 2013-01-10 2013-02-10 2013-01-01 2012-12-31 2013-01-09
#> 11 2013-01-09 s1 2013-01-11 2013-02-11 2012-12-31 2012-12-31 2013-01-09
#> 12 <NA> 2012-01-01 2012-01-01 2012-01-01 2012-12-31 2013-01-09
#> 13 2013-01-09 s1 2014-01-01 2014-01-01 2014-01-01 2012-12-31 2013-01-09
find_date_cause(d, s3, s2, s1, period_start = ps, period_end = pe)
#> s1 s2 start_date start_date_reason s3 ps
#> 1 2013-01-01 2013-02-01 2013-01-01 s1 2013-01-10 2012-12-31
#> 2 2013-01-02 2013-02-02 2013-01-09 s3 2013-01-09 2012-12-31
#> 3 2013-01-03 2013-02-03 2013-01-08 s3 2013-01-08 2012-12-31
#> 4 2013-01-04 2013-02-04 2013-01-07 s3 2013-01-07 2012-12-31
#> 5 2013-01-05 2013-02-05 2013-01-06 s3 2013-01-06 2012-12-31
#> 6 2013-01-06 2013-02-06 2013-01-05 s3 2013-01-05 2012-12-31
#> 7 2013-01-07 2013-02-07 2013-01-04 s3 2013-01-04 2012-12-31
#> 8 2013-01-08 2013-02-08 2013-01-03 s3 2013-01-03 2012-12-31
#> 9 2013-01-09 2013-02-09 2013-01-02 s3 2013-01-02 2012-12-31
#> 10 2013-01-10 2013-02-10 2013-01-01 s3 2013-01-01 2012-12-31
#> 11 2013-01-11 2013-02-11 2012-12-31 s3 2012-12-31 2012-12-31
#> 12 2012-01-01 2012-01-01 2012-12-31 s3 2012-01-01 2012-12-31
#> 13 2014-01-01 2014-01-01 <NA> 2014-01-01 2012-12-31
#> pe
#> 1 2013-01-09
#> 2 2013-01-09
#> 3 2013-01-09
#> 4 2013-01-09
#> 5 2013-01-09
#> 6 2013-01-09
#> 7 2013-01-09
#> 8 2013-01-09
#> 9 2013-01-09
#> 10 2013-01-09
#> 11 2013-01-09
#> 12 2013-01-09
#> 13 2013-01-09
# works
assert_positive_timespan(dd, start_date, pe)
# returns a warning because the last date isn't later than the start_date
assert_positive_timespan(dd, start_date, s2)
#> Warning: 2 rows had negative timespans
#> start_date start_date_reason s1 s2 s3 ps
#> 12 2012-12-31 s1 2012-01-01 2012-01-01 2012-01-01 2012-12-31
#> NA <NA> <NA> <NA> <NA> <NA> <NA>
#> pe
#> 12 2013-01-09
#> NA <NA>
with(d, constrain_dates(s1, ps, pe))
#> [1] "2013-01-01" "2013-01-02" "2013-01-03" "2013-01-04" "2013-01-05"
#> [6] "2013-01-06" "2013-01-07" "2013-01-08" "2013-01-09" NA
#> [11] NA NA NA
with(d, constrain_dates(s2, ps, pe))
#> [1] NA NA NA NA NA NA NA NA NA NA NA NA NA
with(d, constrain_dates(s3, ps, pe))
#> [1] NA "2013-01-09" "2013-01-08" "2013-01-07" "2013-01-06"
#> [6] "2013-01-05" "2013-01-04" "2013-01-03" "2013-01-02" "2013-01-01"
#> [11] "2012-12-31" NA NA