Flexible Forecasting ED Visits

Screenshot

Introduction

A 10-year forecast on emergency department visits was conducted to support expansion planning. The forecasts were used to assist with scoping the architectural design and staffing models.

Data

Monthly ED visits at the hospital from 1997 to 2015 was sourced for this analysis. Another data source exists on ED visits from the Hawaii Health Information Corporation; however, the ED statistics from that set does not include multiple visits, left without being seen and observational stays.

visits <- readxl::read_excel("input/visits.xlsx", sheet=5)

rn <- as.numeric(1997:2015)
Years <- rn[1]:rn[length(rn)]

visits <- melt(visits, variable.name=c("month"), value.name="visits")

setDT(visits)[, ':='(
  year  = rep(Years, times=12),
  month = rep(1:12, each=length(Years))
)]

visits <- visits[order(year, month)]

Dependent Variable

In the ED setting, a visit is defined as a patient visit for a condition where a delay of several hours would not increase the likelihood of an adverse outcome. ED visits during this time frame varied substantially from year-to-year. Fitting a global, linear trend to the entire data would most likely not provide a good fit.

Annual Visits
visits_annual <- visits[, .(visits = sum(visits)), by=year]

ggplot(visits_annual[year<2016,], aes(x=year, y=visits)) +
  geom_point(aes(size=visits, colour=visits)) +
  geom_line() +
  geom_text_repel(aes(label=round(visits, 1))) +
  scale_colour_gradientn(colours=c("blue", "red")) +
  scale_y_continuous(limit=c(0, 15000)) +
  labs(title="", subtitle="", x="Year", y="ED Visits") +
  theme_bw() +
  theme(plot.background   = element_rect(fill="#FAFAFA", color="#FAFAFA"),
        panel.background  = element_rect(fill='#FAFAFA'),
        panel.border      = element_blank(),
        panel.grid.major  = element_blank(),
        panel.grid.minor  = element_blank(),
        legend.key        = element_rect(colour=NA, fill=NA),
        legend.background = element_rect(fill="#FAFAFA"))

center

Monthly Visits
visits_monthly <- visits[, .(visits = sum(visits)), by=.(year, month)]

list.months <- c("1"="Jan", "2"="Feb", "3"="Mar", "4"="Apr", "5"="May", "6"="Jun", 
                 "7"="Jul", "8"="Aug", "9"="Sep", "10"="Oct", "11"="Nov", "12"="Dec")

visits_monthly[, pp_month := factor(month)]
visits_monthly[, pp_month :=plyr::revalue(pp_month, list.months)]

ggplot(visits_monthly, aes(pp_month, year)) +
  geom_tile(aes(fill=visits), colour="white") +
  geom_text(aes(label=comma(visits))) +
  scale_fill_gradient(low="white", high="red") +
  labs(title="", subtitle="", x="", y="") +
  scale_x_discrete(expand=c(0, 0)) +
  theme_bw() +
  theme(plot.background   = element_rect(fill="#FAFAFA", color="#FAFAFA"),
        panel.background  = element_rect(fill='#FAFAFA'),
        panel.border      = element_blank(),
        panel.grid.major  = element_blank(),
        panel.grid.minor  = element_blank(),
        legend.key        = element_rect(colour=NA, fill=NA),
        legend.background = element_rect(fill="#FAFAFA"))

center

Trend
visits_trend <- visits[order(year, month)]

visits_trend[, ':='(
  visits = visits,
  visits_ma03 = rollmean(visits, k=3, fill=NA),
  visits_ma06 = rollmean(visits, k=6, fill=NA),
  visits_ma12 = rollmean(visits, k=12, fill=NA)
  )]

visits_trend <- melt(visits_trend, 
                     id.vars=c("year", "month"), 
                     measure.vars=c("visits", "visits_ma03", "visits_ma06", "visits_ma12"))

visits_trend[, date := as.Date(paste(year, month, 1, sep="-"))]

ggplot(visits_trend, aes(x=date, y=value, colour=variable)) +
  geom_line() +
  # scale_y_continuous(limit=c(0, 15000)) +
  labs(title="", subtitle="", x="Year", y="ED Visits") +
  theme_bw() +
  theme(plot.background   = element_rect(fill="#FAFAFA", color="#FAFAFA"),
        panel.background  = element_rect(fill='#FAFAFA'),
        panel.border      = element_blank(),
        panel.grid.major  = element_blank(),
        panel.grid.minor  = element_blank(),
        legend.key        = element_rect(colour=NA, fill=NA),
        legend.background = element_rect(fill="#FAFAFA"))

center

Model 1

The data was fit using a generalized additive mixed model (GAMM), which is a representation of the generalized additive model as a mixed effects model via the equivalence between random effects and smoothed splines. Unlike ordinary least squares regression, GAMMs can be used to model correlated responses such as the dependence structure of longitudinal data.

The smoothed splines performed well to fit both the year and month terms. The autocorrelation plots, however, reaffirmed the autocorrelated residual structure of the data.

visits[, date := as.Date(paste(year, month, 1, sep="-"))]

visits[, time := as.numeric(date)/1000]

ctrl <- list(niterEM=0, msVerbose=F, optimMethod="L-BFGS-B")

m0 <- gamm(visits~s(month, bs="cc", k=12) + s(time), 
           control=ctrl, data=visits)
summary(m0$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## visits ~ s(month, bs = "cc", k = 12) + s(time)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  869.281      3.544   245.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df      F p-value    
## s(month) 8.862 10.000  13.55  <2e-16 ***
## s(time)  7.337  7.337 400.22  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.931   
##   Scale est. = 2851.4    n = 228

Fitted GAMM

par(bg=NA)
vis.gam(m0$gam, n.grid=50, theta=35, phi=32, zlab="",
        ticktype="detailed", color="topo", main="")

center

Fitted Terms

par(bg=NA) 
layout(matrix(1:2, ncol=2))
plot(m0$gam, scale = 0)

center

layout(1)

ACF

# estimate total MA terms needed
par(bg=NA) 
acf(resid(m0$lme, type="normalized"), lag.max=36, main="")

center

PACF

# estimate total AR terms needed
par(bg=NA) 
pacf(resid(m0$lme, type="normalized"), plot=TRUE, main="")

center

Model 2

An AR(1) term was added to the model to remove the remaining residual due to autocorrelation as evident from the PACF plot. The likelihood ratio test confirmed that the updated model better fit the longitudinal data. The final model explained 93% of the variance.

m1 <- gamm(visits~s(month, bs="cc", k=12) + s(time), 
           control=ctrl, data=visits, correlation=corARMA(form=~1|year, p=1))

summary(m1$gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## visits ~ s(month, bs = "cc", k = 12) + s(time)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  869.527      5.214   166.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df      F p-value    
## s(month) 9.435  10.00  15.11  <2e-16 ***
## s(time)  6.170   6.17 226.51  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.93   
##   Scale est. = 2991.5    n = 228
anova(m0$lme, m1$lme)
##        Model df      AIC      BIC   logLik   Test  L.Ratio p-value
## m0$lme     1  5 2530.980 2548.126 -1260.49                        
## m1$lme     2  6 2508.421 2528.997 -1248.21 1 vs 2 24.55918  <.0001

Fitted GAMM

par(bg=NA) 
vis.gam(m1$gam, n.grid=50, theta=35, phi=32, zlab="",
        ticktype="detailed", color="topo", main="")

center

Fitted Terms

par(bg=NA) 
layout(matrix(1:2, ncol=2))
plot(m1$gam, scale = 0)

center

layout(1)

ACF

# estimate total MA terms needed
par(bg=NA) 
acf(resid(m1$lme, type="normalized"), plot=TRUE, main="")

center

PACF

# estimate total AR terms needed
par(bg=NA) 
pacf(resid(m1$lme, type="normalized"), plot=TRUE, main="")

center

Results

Fit v. Actual

ts_len <- seq(1, nrow(visits), length.out=228)
pdat <- with(visits, data.frame(time=time[ts_len], date=date[ts_len], month=month[ts_len]))
pdat <- setDT(pdat)[order(time)]

p <- predict(m1$gam, newdata=visits, type="terms", se.fit=TRUE)

pdat <- transform(pdat, p=p$fit[,2], se=p$se.fit[,2])

p1 <- ggplot(pdat, aes(x=date, y=p)) +
  geom_line() +
  labs(title="Fitted Trend", subtitle="", x="Year", y="Fitted") +
  theme_bw() +
  theme(plot.background   = element_rect(fill="#FAFAFA", color="#FAFAFA"),
        panel.background  = element_rect(fill='#FAFAFA'),
        panel.border      = element_blank(),
        panel.grid.major  = element_blank(),
        panel.grid.minor  = element_blank(),
        legend.position   = "none")

p2 <- ggplot(visits, aes(x=date, y=visits, colour=visits)) +
  geom_line() +
  # scale_y_continuous(limit=c(0, 15000)) +
  labs(title="ED Visits (actuals)", subtitle="", x="Year", y="ED Visits") +
  theme_bw() +
  theme(plot.background   = element_rect(fill="#FAFAFA", color="#FAFAFA"),
        panel.background  = element_rect(fill='#FAFAFA'),
        panel.border      = element_blank(),
        panel.grid.major  = element_blank(),
        panel.grid.minor  = element_blank(),
        legend.position   = "none")

layout <- matrix(c(2, 1), 2, 1, byrow=TRUE)
multiplot(p1, p2, layout=layout)

center

Monthly Forecast

## annual
pdat <- setDT(expand.grid(year=c(2016:2020), month=c(1:12)))
pdat[, date := as.Date(paste(year, month, 1, sep="-"), format="%Y-%m-%d")]
pdat[, time := as.numeric(date)/1000]

pred <- predict(m1$gam, newdata=pdat, se.fit=TRUE)
crit <- qt(0.975, df=df.residual(m1$gam)) # ~95% interval critical t

pdat <- transform(pdat, fitted=pred$fit, se=pred$se.fit, fYear=as.factor(year))
pdat <- transform(pdat, upper=fitted+(crit*se), lower=fitted-(crit*se))

ggplot(pdat, aes(x=date, y=fitted)) +
  geom_ribbon(mapping=aes(ymin=lower, ymax=upper), alpha=0.2) + # confidence band
  geom_line() +
  labs(title="", subtitle="", x="Year", y="ED Visits") +
  theme_bw() +
  theme(plot.background   = element_rect(fill="#FAFAFA", color="#FAFAFA"),
        panel.background  = element_rect(fill='#FAFAFA'),
        panel.border      = element_blank(),
        panel.grid.major  = element_blank(),
        panel.grid.minor  = element_blank(),
        legend.background = element_rect(fill="#FAFAFA"))

center

Annual Forecast

pdat2 <- pdat[, .(fitted = sum(fitted)), by=year]

ggplot(pdat2, aes(x=year, y=fitted)) +
  geom_point(aes(size=fitted, colour=fitted)) +
  geom_line() +
  geom_text_repel(aes(label=round(fitted, 1))) +
  scale_colour_gradientn(colours=c("blue", "red")) +
  scale_y_continuous(limit=c(14000, 17000)) +
  labs(title="", subtitle="", x="Year", y="ED Visits") +
  theme_bw() +
  theme(plot.background   = element_rect(fill="#FAFAFA", color="#FAFAFA"),
        panel.background  = element_rect(fill='#FAFAFA'),
        panel.border      = element_blank(),
        panel.grid.major  = element_blank(),
        panel.grid.minor  = element_blank(),
        legend.key        = element_rect(colour=NA, fill=NA),
        legend.background = element_rect(fill="#FAFAFA"))

center


Other Projects


© 2017. All rights reserved.

Powered by Hydejack v6.6.0