I am conducting an interrupted time series analysis. The time series is the monthly incidence of new users of a given medication from January 2015 to December 2024 in France. I want to assess the impact of three different regulatory interventions that were implemented on September 2021, June 2022, and June 2023 to facilitate prescription of this medication (intervention 1-3 on the figure below).
In the code below, I implemented the method proposed by Schaffer et al. (i.e. build an ARIMA model using the pre-intervention period and forecast the post-intervention period) to assess the impact of each intervention successively. However, in Schaffer et al. the time series had a single intervention during the study period.
My questions:
- Is this strategy reasonable? E.g. is it a problem that the pre-intervention period of later interventions include the previous interventions? I am rather surprised that the forecast trends are downwards (figure below), so I wonder if there is something wrong with the analysis.
- Additionally, the first Covid-19 lockdown (from 2020-03-17 to 2020-05-11 in France) also had an impact. While assessing the impact of Covid-19 lockdown is not my goal, should a take extra measure to avoid bias in forcasts? Is there something available in R? This was discussed here, but without precise method. Here is a guide using
skforecastin Python.
This question is different from this one where a given intervention is introduced and removed at specific intervals throughout the time series.
library(forecast)
x <- data.frame(
date = seq(as.Date("2015-01-01"), as.Date("2024-12-01"), by = "month"),
prev = c(3.2, 2.7, 3.1, 3, 2.6, 2.2, 1.6, 1.8, 3.1, 3, 2.7, 2.7, 3,
2.9, 3.3, 2.9, 3, 2.5, 1.5, 2, 3, 2.9, 2.9, 2.7, 3.4, 2.8, 3.8,
3.2, 3.2, 2.6, 1.7, 1.9, 3.5, 3, 3.3, 2.8, 3.6, 3, 3.7, 3, 3.2,
2.8, 1.9, 2.1, 3.4, 3.3, 3.5, 2.9, 4, 3.4, 3.9, 3.5, 3.6, 2.8,
2.2, 2.4, 3.7, 3.9, 3.7, 3.4, 4.3, 3.9, 3.3, 2, 2.4, 2.8, 2.4,
2.4, 4, 3.9, 4, 4.1, 4.5, 4.5, 5.3, 4.7, 4.6, 4.5, 3, 3, 5.4,
5.4, 5.2, 5.2, 5.8, 5.3, 7.2, 6, 6.3, 5.7, 3.9, 4.6, 7.3, 6.7,
6.9, 6.2, 8, 6.9, 8.6, 7, 7.1, 7.3, 5.4, 5.7, 9, 8.8, 9.3, 8.2,
9.5, 10, 10.5, 10.3, 9.6, 9.6, 8.2, 7.5, 11.6, 12.5, 12, 11.3)
)
x$prev <- ts(x$prev, start = 2015, frequency = 12)
# to stabilize the variance using Box-Cox transformation
lambda <- BoxCox.lambda(x$prev)
# intervention 1 ----------------------------------------------------------
# variables for ITS analysis
x$step1 <- x$date >= as.Date("2021-09-13")
x$ramp1 <- cumsum(x$step1)
xreg1 <- as.matrix(x[c("step1", "ramp1")])
# automatically identify p, d, q, and P, D, Q parameters
(fit1 <- auto.arima(
x$prev,
stepwise = FALSE,
trace = TRUE,
approximation = FALSE,
xreg = xreg1,
lambda = lambda
))
# Series: x$prev
# Regression with ARIMA(1,0,0)(2,1,2)[12] errors
# Box Cox transformation: lambda= 0.2460429
#
# Coefficients:
# ar1 sar1 sar2 sma1 sma2 drift step ramp
# 0.5903 0.4695 -0.2460 -1.3244 0.4385 0.0082 0.1703 0.0316
# s.e. 0.0821 0.9263 0.2544 0.7366 0.9953 0.0012 0.0980 0.0041
#
# sigma^2 = 0.0153: log likelihood = 66.42
# AIC=-114.84 AICc=-113 BIC=-90.7
checkresiduals(fit1)
params1 <- arimaorder(fit1)
# model data excluding post-intervention period
# and forecast the counterfactual up to December 2025
pred1 <-
x$prev |>
window(end = c(2021, 8)) |>
Arima(
order = params1[1:3],
seasonal = list(order = params1[4:6], period = params1[7]),
lambda = lambda
) |>
forecast(h = 4 * 12 - 8)
# re-run for intervention 2 -----------------------------------------------
x$step2 <- x$date >= as.Date("2022-06-02")
x$ramp2 <- cumsum(x$step2)
xreg2 <- as.matrix(x[c("step2", "ramp2")])
(fit2 <- auto.arima(
x$prev,
stepwise = FALSE,
trace = TRUE,
approximation = FALSE,
xreg = xreg2,
lambda = lambda
))
# Series: x$prev
# Regression with ARIMA(1,0,0)(2,1,2)[12] errors
# Box Cox transformation: lambda= 0.2460429
#
# Coefficients:
# ar1 sar1 sar2 sma1 sma2 drift step ramp
# 0.7215 0.5493 -0.3077 -1.3180 0.4354 0.0111 0.0665 0.0388
# s.e. 0.0742 0.7063 0.2458 0.6138 0.7753 0.0016 0.1252 0.0080
#
# sigma^2 = 0.01679: log likelihood = 61.55
# AIC=-105.09 AICc=-103.26 BIC=-80.96
checkresiduals(fit2)
params2 <- arimaorder(fit2)
pred2 <-
x$prev |>
window(end = c(2022, 5)) |>
Arima(
order = params2[1:3],
seasonal = list(order = params2[4:6], period = params2[7]),
lambda = lambda
) |>
forecast(h = 3 * 12 - 5)
# re-run for intervention 3 -----------------------------------------------
# variables for ITS analysis
x$step3 <- x$date >= as.Date("2023-06-29")
x$ramp3 <- cumsum(x$step3)
xreg3 <- as.matrix(x[c("step3", "ramp3")])
# automatically identify p, d, q, and P, D, Q parameters
(fit3 <- auto.arima(
x$prev,
stepwise = FALSE,
trace = TRUE,
approximation = FALSE,
xreg = xreg3,
lambda = lambda
))
# Series: x$prev
# Regression with ARIMA(4,1,0)(0,1,1)[12] errors
# Box Cox transformation: lambda= 0.2460429
#
# Coefficients:
# ar1 ar2 ar3 ar4 sma1 step3 ramp3
# -0.3484 -0.2845 -0.2100 -0.2100 -0.8645 0.0106 0.0246
# s.e. 0.0948 0.0994 0.0989 0.1002 0.1844 0.1182 0.0177
#
# sigma^2 = 0.01756: log likelihood = 59.96
# AIC=-103.92 AICc=-102.45 BIC=-82.53
checkresiduals(fit3)
params3 <- arimaorder(fit3)
pred3 <-
x$prev |>
window(end = c(2023, 5)) |>
Arima(
order = params3[1:3],
seasonal = list(order = params3[4:6], period = params3[7]),
lambda = lambda
) |>
forecast(h = 2 * 12 - 5)
# plot --------------------------------------------------------------------
plot(
ts.union(x$prev, pred1$mean, pred2$mean, pred3$mean),
plot.type = "single",
ylab = "Prevalence",
col = 1:4
)
abline(
v = c(2021 + 8 / 12, 2022 + 5 / 12, 2023 + 5 / 12),
col = 2:4,
lty = 3
)

