This function will find the first date in an orderd series of columns that is either before or after a cutoff date, inclusive.

find_date_cause(
  x,
  ...,
  period_start = NULL,
  period_end = NULL,
  datecol = "start_date",
  datereason = "start_date_reason",
  na_fill = "start"
)

find_start_date(
  x,
  ...,
  period_start = NULL,
  period_end = NULL,
  datecol = "start_date",
  datereason = "start_date_reason"
)

find_end_date(
  x,
  ...,
  period_start = NULL,
  period_end = NULL,
  datecol = "end_date",
  datereason = "end_date_reason"
)

constrain_dates(i, period_start, period_end, boundary = "both")

assert_positive_timespan(x, date_start, date_end)

Arguments

x

a data frame

...

an ordered series of date columns (i.e. the most important date to be considered first).

period_start, period_end

for the find_ functions, this should be the name of a column in x that contains the start/end of the recall period. For constrain_dates, this should be a vector of dates.

datecol

the name of the new column to contain the dates

datereason

the name of the column to contain the name of the column from which the date came.

na_fill

one of either "before" or "after" indicating that the new column should only contain dates before or after the cutoff date.

i

a vector of dates

boundary

one of "both", "start", or "end". Dates outside of the boundary will be set to NA.

date_start, date_end

column name of a date vector

Examples

d <- data.frame(
  s1 = c(as.Date("2013-01-01") + 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
  s2 = c(as.Date("2013-02-01") + 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
  s3 = c(as.Date("2013-01-10") - 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
  ps = as.Date("2012-12-31"),
  pe = as.Date("2013-01-09")
)
print(dd <- find_date_cause(d, s1, s2, s3, period_start = ps, period_end = pe))
#>    start_date start_date_reason         s1         s2         s3         ps
#> 1  2013-01-01                s1 2013-01-01 2013-02-01 2013-01-10 2012-12-31
#> 2  2013-01-02                s1 2013-01-02 2013-02-02 2013-01-09 2012-12-31
#> 3  2013-01-03                s1 2013-01-03 2013-02-03 2013-01-08 2012-12-31
#> 4  2013-01-04                s1 2013-01-04 2013-02-04 2013-01-07 2012-12-31
#> 5  2013-01-05                s1 2013-01-05 2013-02-05 2013-01-06 2012-12-31
#> 6  2013-01-06                s1 2013-01-06 2013-02-06 2013-01-05 2012-12-31
#> 7  2013-01-07                s1 2013-01-07 2013-02-07 2013-01-04 2012-12-31
#> 8  2013-01-08                s1 2013-01-08 2013-02-08 2013-01-03 2012-12-31
#> 9  2013-01-09                s1 2013-01-09 2013-02-09 2013-01-02 2012-12-31
#> 10 2013-01-01                s3 2013-01-10 2013-02-10 2013-01-01 2012-12-31
#> 11 2012-12-31                s3 2013-01-11 2013-02-11 2012-12-31 2012-12-31
#> 12 2012-12-31                s1 2012-01-01 2012-01-01 2012-01-01 2012-12-31
#> 13       <NA>                   2014-01-01 2014-01-01 2014-01-01 2012-12-31
#>            pe
#> 1  2013-01-09
#> 2  2013-01-09
#> 3  2013-01-09
#> 4  2013-01-09
#> 5  2013-01-09
#> 6  2013-01-09
#> 7  2013-01-09
#> 8  2013-01-09
#> 9  2013-01-09
#> 10 2013-01-09
#> 11 2013-01-09
#> 12 2013-01-09
#> 13 2013-01-09
print(bb <- find_date_cause(d, s1, s2, s3, period_start = ps, period_end = pe,
                            na_fill = "end", 
                            datecol = "enddate",
                            datereason = "endcause"))
#>       enddate endcause         s1         s2         s3         ps         pe
#> 1  2013-01-01       s1 2013-01-01 2013-02-01 2013-01-10 2012-12-31 2013-01-09
#> 2  2013-01-02       s1 2013-01-02 2013-02-02 2013-01-09 2012-12-31 2013-01-09
#> 3  2013-01-03       s1 2013-01-03 2013-02-03 2013-01-08 2012-12-31 2013-01-09
#> 4  2013-01-04       s1 2013-01-04 2013-02-04 2013-01-07 2012-12-31 2013-01-09
#> 5  2013-01-05       s1 2013-01-05 2013-02-05 2013-01-06 2012-12-31 2013-01-09
#> 6  2013-01-06       s1 2013-01-06 2013-02-06 2013-01-05 2012-12-31 2013-01-09
#> 7  2013-01-07       s1 2013-01-07 2013-02-07 2013-01-04 2012-12-31 2013-01-09
#> 8  2013-01-08       s1 2013-01-08 2013-02-08 2013-01-03 2012-12-31 2013-01-09
#> 9  2013-01-09       s1 2013-01-09 2013-02-09 2013-01-02 2012-12-31 2013-01-09
#> 10 2013-01-09       s1 2013-01-10 2013-02-10 2013-01-01 2012-12-31 2013-01-09
#> 11 2013-01-09       s1 2013-01-11 2013-02-11 2012-12-31 2012-12-31 2013-01-09
#> 12       <NA>          2012-01-01 2012-01-01 2012-01-01 2012-12-31 2013-01-09
#> 13 2013-01-09       s1 2014-01-01 2014-01-01 2014-01-01 2012-12-31 2013-01-09
find_date_cause(d, s3, s2, s1, period_start = ps, period_end = pe)
#>            s1         s2 start_date start_date_reason         s3         ps
#> 1  2013-01-01 2013-02-01 2013-01-01                s1 2013-01-10 2012-12-31
#> 2  2013-01-02 2013-02-02 2013-01-09                s3 2013-01-09 2012-12-31
#> 3  2013-01-03 2013-02-03 2013-01-08                s3 2013-01-08 2012-12-31
#> 4  2013-01-04 2013-02-04 2013-01-07                s3 2013-01-07 2012-12-31
#> 5  2013-01-05 2013-02-05 2013-01-06                s3 2013-01-06 2012-12-31
#> 6  2013-01-06 2013-02-06 2013-01-05                s3 2013-01-05 2012-12-31
#> 7  2013-01-07 2013-02-07 2013-01-04                s3 2013-01-04 2012-12-31
#> 8  2013-01-08 2013-02-08 2013-01-03                s3 2013-01-03 2012-12-31
#> 9  2013-01-09 2013-02-09 2013-01-02                s3 2013-01-02 2012-12-31
#> 10 2013-01-10 2013-02-10 2013-01-01                s3 2013-01-01 2012-12-31
#> 11 2013-01-11 2013-02-11 2012-12-31                s3 2012-12-31 2012-12-31
#> 12 2012-01-01 2012-01-01 2012-12-31                s3 2012-01-01 2012-12-31
#> 13 2014-01-01 2014-01-01       <NA>                   2014-01-01 2012-12-31
#>            pe
#> 1  2013-01-09
#> 2  2013-01-09
#> 3  2013-01-09
#> 4  2013-01-09
#> 5  2013-01-09
#> 6  2013-01-09
#> 7  2013-01-09
#> 8  2013-01-09
#> 9  2013-01-09
#> 10 2013-01-09
#> 11 2013-01-09
#> 12 2013-01-09
#> 13 2013-01-09

# works
assert_positive_timespan(dd, start_date, pe)

# returns a warning because the last date isn't later than the start_date
assert_positive_timespan(dd, start_date, s2)
#> Warning: 2 rows had negative timespans
#>    start_date start_date_reason         s1         s2         s3         ps
#> 12 2012-12-31                s1 2012-01-01 2012-01-01 2012-01-01 2012-12-31
#> NA       <NA>              <NA>       <NA>       <NA>       <NA>       <NA>
#>            pe
#> 12 2013-01-09
#> NA       <NA>


with(d, constrain_dates(s1, ps, pe))
#>  [1] "2013-01-01" "2013-01-02" "2013-01-03" "2013-01-04" "2013-01-05"
#>  [6] "2013-01-06" "2013-01-07" "2013-01-08" "2013-01-09" NA          
#> [11] NA           NA           NA          
with(d, constrain_dates(s2, ps, pe))
#>  [1] NA NA NA NA NA NA NA NA NA NA NA NA NA
with(d, constrain_dates(s3, ps, pe))
#>  [1] NA           "2013-01-09" "2013-01-08" "2013-01-07" "2013-01-06"
#>  [6] "2013-01-05" "2013-01-04" "2013-01-03" "2013-01-02" "2013-01-01"
#> [11] "2012-12-31" NA           NA