This function will find the first date in an ordered series of columns that falls within a specified period. If no dates from the provided columns fall within the period, it returns the period boundary (start or end) as a fallback.

find_date_cause(
  x,
  ...,
  period_start = NULL,
  period_end = NULL,
  datecol = "start_date",
  datereason = "start_date_reason",
  na_fill = "start"
)

find_start_date(
  x,
  ...,
  period_start = NULL,
  period_end = NULL,
  datecol = "start_date",
  datereason = "start_date_reason"
)

find_end_date(
  x,
  ...,
  period_start = NULL,
  period_end = NULL,
  datecol = "end_date",
  datereason = "end_date_reason"
)

constrain_dates(i, period_start, period_end, boundary = "both")

assert_positive_timespan(x, date_start, date_end)

Arguments

x

a data frame

...

an ordered series of date columns (i.e. the most important date to be considered first). Earlier columns take precedence in case of ties.

period_start, period_end

for the find_ functions, this should be the name of a column in x that contains the start/end of the recall period. For constrain_dates, this should be a vector of dates.

datecol

the name of the new column to contain the dates

datereason

the name of the column to contain the name of the column from which the date came.

na_fill

one of "start", "end", or NULL. If "start" or "end", NA values in the result will be replaced with the corresponding period boundary. If NULL, NAs are left as-is.

i

a vector of dates

boundary

one of "both", "start", or "end". Dates outside of the boundary will be set to NA.

date_start, date_end

column name of a date vector

Examples

d <- data.frame(
  s1 = c(as.Date("2013-01-01") + 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
  s2 = c(as.Date("2013-02-01") + 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
  s3 = c(as.Date("2013-01-10") - 0:10, as.Date(c("2012-01-01", "2014-01-01"))),
  ps = as.Date("2012-12-31"),
  pe = as.Date("2013-01-09")
)
print(dd <- find_date_cause(d, s1, s2, s3, period_start = ps, period_end = pe))
#> Warning: Row 1: ignored date(s) after period_end while selecting in-period date 2013-01-01
#> Warning: Row 2: ignored date(s) after period_end while selecting in-period date 2013-01-02
#> Warning: Row 3: ignored date(s) after period_end while selecting in-period date 2013-01-03
#> Warning: Row 4: ignored date(s) after period_end while selecting in-period date 2013-01-04
#> Warning: Row 5: ignored date(s) after period_end while selecting in-period date 2013-01-05
#> Warning: Row 6: ignored date(s) after period_end while selecting in-period date 2013-01-06
#> Warning: Row 7: ignored date(s) after period_end while selecting in-period date 2013-01-07
#> Warning: Row 8: ignored date(s) after period_end while selecting in-period date 2013-01-08
#> Warning: Row 9: ignored date(s) after period_end while selecting in-period date 2013-01-09
#> Warning: Row 10: ignored date(s) after period_end while selecting in-period date 2013-01-01
#> Warning: Row 11: ignored date(s) after period_end while selecting in-period date 2012-12-31
#> Warning: 1 row(s) had valid dates after period_end but were filled with period_start: rows 13
#>    start_date start_date_reason         s1         s2         s3         ps
#> 1  2013-01-01                s1 2013-01-01 2013-02-01 2013-01-10 2012-12-31
#> 2  2013-01-02                s1 2013-01-02 2013-02-02 2013-01-09 2012-12-31
#> 3  2013-01-03                s1 2013-01-03 2013-02-03 2013-01-08 2012-12-31
#> 4  2013-01-04                s1 2013-01-04 2013-02-04 2013-01-07 2012-12-31
#> 5  2013-01-05                s1 2013-01-05 2013-02-05 2013-01-06 2012-12-31
#> 6  2013-01-06                s1 2013-01-06 2013-02-06 2013-01-05 2012-12-31
#> 7  2013-01-07                s1 2013-01-07 2013-02-07 2013-01-04 2012-12-31
#> 8  2013-01-08                s1 2013-01-08 2013-02-08 2013-01-03 2012-12-31
#> 9  2013-01-09                s1 2013-01-09 2013-02-09 2013-01-02 2012-12-31
#> 10 2013-01-01                s3 2013-01-10 2013-02-10 2013-01-01 2012-12-31
#> 11 2012-12-31                s3 2013-01-11 2013-02-11 2012-12-31 2012-12-31
#> 12 2012-12-31      period_start 2012-01-01 2012-01-01 2012-01-01 2012-12-31
#> 13 2012-12-31      period_start 2014-01-01 2014-01-01 2014-01-01 2012-12-31
#>            pe
#> 1  2013-01-09
#> 2  2013-01-09
#> 3  2013-01-09
#> 4  2013-01-09
#> 5  2013-01-09
#> 6  2013-01-09
#> 7  2013-01-09
#> 8  2013-01-09
#> 9  2013-01-09
#> 10 2013-01-09
#> 11 2013-01-09
#> 12 2013-01-09
#> 13 2013-01-09
print(bb <- find_date_cause(d, s1, s2, s3, period_start = ps, period_end = pe,
                            na_fill = "end",
                            datecol = "enddate",
                            datereason = "endcause"))
#> Warning: Row 1: ignored date(s) after period_end while selecting in-period date 2013-01-01
#> Warning: Row 2: ignored date(s) after period_end while selecting in-period date 2013-01-02
#> Warning: Row 3: ignored date(s) after period_end while selecting in-period date 2013-01-03
#> Warning: Row 4: ignored date(s) after period_end while selecting in-period date 2013-01-04
#> Warning: Row 5: ignored date(s) after period_end while selecting in-period date 2013-01-05
#> Warning: Row 6: ignored date(s) after period_end while selecting in-period date 2013-01-06
#> Warning: Row 7: ignored date(s) after period_end while selecting in-period date 2013-01-07
#> Warning: Row 8: ignored date(s) after period_end while selecting in-period date 2013-01-08
#> Warning: Row 9: ignored date(s) after period_end while selecting in-period date 2013-01-09
#> Warning: Row 10: ignored date(s) after period_end while selecting in-period date 2013-01-01
#> Warning: Row 11: ignored date(s) after period_end while selecting in-period date 2012-12-31
#> Warning: 1 row(s) had valid dates before period_start but were filled with period_end: rows 12
#>       enddate   endcause         s1         s2         s3         ps         pe
#> 1  2013-01-01         s1 2013-01-01 2013-02-01 2013-01-10 2012-12-31 2013-01-09
#> 2  2013-01-02         s1 2013-01-02 2013-02-02 2013-01-09 2012-12-31 2013-01-09
#> 3  2013-01-03         s1 2013-01-03 2013-02-03 2013-01-08 2012-12-31 2013-01-09
#> 4  2013-01-04         s1 2013-01-04 2013-02-04 2013-01-07 2012-12-31 2013-01-09
#> 5  2013-01-05         s1 2013-01-05 2013-02-05 2013-01-06 2012-12-31 2013-01-09
#> 6  2013-01-06         s1 2013-01-06 2013-02-06 2013-01-05 2012-12-31 2013-01-09
#> 7  2013-01-07         s1 2013-01-07 2013-02-07 2013-01-04 2012-12-31 2013-01-09
#> 8  2013-01-08         s1 2013-01-08 2013-02-08 2013-01-03 2012-12-31 2013-01-09
#> 9  2013-01-09         s1 2013-01-09 2013-02-09 2013-01-02 2012-12-31 2013-01-09
#> 10 2013-01-01         s3 2013-01-10 2013-02-10 2013-01-01 2012-12-31 2013-01-09
#> 11 2012-12-31         s3 2013-01-11 2013-02-11 2012-12-31 2012-12-31 2013-01-09
#> 12 2013-01-09 period_end 2012-01-01 2012-01-01 2012-01-01 2012-12-31 2013-01-09
#> 13 2013-01-09 period_end 2014-01-01 2014-01-01 2014-01-01 2012-12-31 2013-01-09
find_date_cause(d, s3, s2, s1, period_start = ps, period_end = pe)
#> Warning: Row 1: ignored date(s) after period_end while selecting in-period date 2013-01-01
#> Warning: Row 2: ignored date(s) after period_end while selecting in-period date 2013-01-09
#> Warning: Row 3: ignored date(s) after period_end while selecting in-period date 2013-01-08
#> Warning: Row 4: ignored date(s) after period_end while selecting in-period date 2013-01-07
#> Warning: Row 5: ignored date(s) after period_end while selecting in-period date 2013-01-06
#> Warning: Row 6: ignored date(s) after period_end while selecting in-period date 2013-01-05
#> Warning: Row 7: ignored date(s) after period_end while selecting in-period date 2013-01-04
#> Warning: Row 8: ignored date(s) after period_end while selecting in-period date 2013-01-03
#> Warning: Row 9: ignored date(s) after period_end while selecting in-period date 2013-01-02
#> Warning: Row 10: ignored date(s) after period_end while selecting in-period date 2013-01-01
#> Warning: Row 11: ignored date(s) after period_end while selecting in-period date 2012-12-31
#> Warning: 1 row(s) had valid dates after period_end but were filled with period_start: rows 13
#>            s1         s2 start_date start_date_reason         s3         ps
#> 1  2013-01-01 2013-02-01 2013-01-01                s1 2013-01-10 2012-12-31
#> 2  2013-01-02 2013-02-02 2013-01-09                s3 2013-01-09 2012-12-31
#> 3  2013-01-03 2013-02-03 2013-01-08                s3 2013-01-08 2012-12-31
#> 4  2013-01-04 2013-02-04 2013-01-07                s3 2013-01-07 2012-12-31
#> 5  2013-01-05 2013-02-05 2013-01-06                s3 2013-01-06 2012-12-31
#> 6  2013-01-06 2013-02-06 2013-01-05                s3 2013-01-05 2012-12-31
#> 7  2013-01-07 2013-02-07 2013-01-04                s3 2013-01-04 2012-12-31
#> 8  2013-01-08 2013-02-08 2013-01-03                s3 2013-01-03 2012-12-31
#> 9  2013-01-09 2013-02-09 2013-01-02                s3 2013-01-02 2012-12-31
#> 10 2013-01-10 2013-02-10 2013-01-01                s3 2013-01-01 2012-12-31
#> 11 2013-01-11 2013-02-11 2012-12-31                s3 2012-12-31 2012-12-31
#> 12 2012-01-01 2012-01-01 2012-12-31      period_start 2012-01-01 2012-12-31
#> 13 2014-01-01 2014-01-01 2012-12-31      period_start 2014-01-01 2012-12-31
#>            pe
#> 1  2013-01-09
#> 2  2013-01-09
#> 3  2013-01-09
#> 4  2013-01-09
#> 5  2013-01-09
#> 6  2013-01-09
#> 7  2013-01-09
#> 8  2013-01-09
#> 9  2013-01-09
#> 10 2013-01-09
#> 11 2013-01-09
#> 12 2013-01-09
#> 13 2013-01-09

# works
assert_positive_timespan(dd, start_date, pe)

# returns a warning because the last date isn't later than the start_date
assert_positive_timespan(dd, start_date, s2)
#> Warning: 1 rows had negative timespans
#>    start_date start_date_reason         s1         s2         s3         ps
#> 12 2012-12-31      period_start 2012-01-01 2012-01-01 2012-01-01 2012-12-31
#>            pe
#> 12 2013-01-09


with(d, constrain_dates(s1, ps, pe))
#>  [1] "2013-01-01" "2013-01-02" "2013-01-03" "2013-01-04" "2013-01-05"
#>  [6] "2013-01-06" "2013-01-07" "2013-01-08" "2013-01-09" NA          
#> [11] NA           NA           NA          
with(d, constrain_dates(s2, ps, pe))
#>  [1] NA NA NA NA NA NA NA NA NA NA NA NA NA
with(d, constrain_dates(s3, ps, pe))
#>  [1] NA           "2013-01-09" "2013-01-08" "2013-01-07" "2013-01-06"
#>  [6] "2013-01-05" "2013-01-04" "2013-01-03" "2013-01-02" "2013-01-01"
#> [11] "2012-12-31" NA           NA