Dynamic Factor Analysis

Eli Holmes

2023-05-03

The MARSS_tmb() function allows you to fit DFAs with the same form as MARSS(x, form="dfa"). This has a diagonal \(\mathbf{Q}\) with 1 on the diagonal and a stochastic \(\mathbf{x}_1\) with mean 0 and variance of 5 (diagonal variance-covariance matrix). There are only 3 options allowed for \(\mathbf{R}\): diagonal and equal, diagonal and unequal, and unconstrained.

Example data

library(MARSS)
data(lakeWAplankton, package = "MARSS")
phytoplankton <- c("Cryptomonas", "Diatoms", "Greens", "Unicells", "Other.algae")
dat <- as.data.frame(lakeWAplanktonTrans) |>
  subset(Year >= 1980 & Year <= 1989) |>
  subset(select=phytoplankton) |>
  t() |>
  MARSS::zscore()

Fit models without covariates

mod.list <- list(R='unconstrained', m=1, tinitx=1)

Fit with MARSS with EM or optim and BFGS.

m1 <- MARSS(dat, model=mod.list, form='dfa', z.score=FALSE, silent = TRUE)
m2 <- MARSS(dat, model=mod.list, form='dfa', z.score=FALSE, silent = TRUE, method="BFGS")

Fit with TMB.

library(marssTMB)
m3 <- dfaTMB(dat, model=list(m=1, R='unconstrained'))
m4 <- MARSS_tmb(dat, model=mod.list)
m5 <- MARSS_tmb(dat, model=mod.list, control=list(fun.opt="optim"))

Log likelihoods

name logLik
MARSS-EM -772.4017
MARSS-BFGS -772.4011
dfaTMB-nlminb -772.4011
MARSS_tmb-nlminb -772.3091
MARSS_tmb-optim-BFGS -875.1003

Compare parameter estimates

Add example with covariates

For form="dfa", pass in covariates with covariates=xyz. If using the default model form (not dfa), then pass in covariates with model$d or model$c.

Fit model

# use a simpler R
mod.list2 <- list(m=1, R='diagonal and unequal', tinitx=1)
# add a temperature covariate
temp <- as.data.frame(lakeWAplanktonTrans) |>
    subset(Year >= 1980 & Year <= 1989) |>
    subset(select=Temp)
covar <- t(temp) |> zscore()
t6 <- system.time(m6 <- MARSS_tmb(dat, model=mod.list2, form="dfa", covariates=covar, silent = TRUE, z.score = FALSE))
t7 <- system.time(m7 <- MARSS(dat, model=mod.list2, form="dfa", covariates=covar, silent = TRUE, control=list(maxit=10000), z.score = FALSE))

Add a 2nd covariate

TP <- as.data.frame(lakeWAplanktonTrans) |>
    subset(Year >= 1980 & Year <= 1989) |>
    subset(select=TP)
covar <- rbind(covar, t(TP)) |> zscore()
t8 <- system.time(m8 <- MARSS_tmb(dat, model=mod.list2, form="dfa", covariates=covar, silent = TRUE, z.score=FALSE))
t9 <- system.time(m9 <- MARSS(dat, model=mod.list2, form="dfa", covariates=covar, silent = TRUE, control=list(maxit=10000), z.score=FALSE))

Compare time and log likelihoods

name num_covar time logLik
MARSS-EM 1 0.426 -752.5472
MARSS_tmb-nlminb 1 0.339 -752.3948
MARSS-EM 2 0.478 -745.5611
MARSS_tmb-nlminb 2 0.397 -745.3983

Compare parameter estimates

MARSS EM would need to be forced to run for more iterations to reach the same maximum likelihood.

Run some time comparisons

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
df2 <- df |> mutate(mod = paste0(fun, "-", opt.function)) |>
  mutate(ncovar = as.factor(ncovar)) |>
  group_by(ncovar)
ggplot(df2, aes(alpha=ncovar, fill=mod, y=time, x=m)) + 
    geom_bar(stat="identity", position="dodge", color="black") +
  facet_wrap(~R, scales = "free_y") +
  scale_y_continuous() +
  ggtitle("TMB is faster than MARSS EM")
#> Warning: Using alpha for a discrete variable is not advised.

df2 <- df |> mutate(mod = paste0(fun, "-", opt.function))
df2$ncovar <- as.factor(df2$ncovar)
ggplot(df2, aes(col=ncovar, y=logLik, x=m, shape=mod)) + 
    geom_point(position=position_dodge(width=0.3)) +
  facet_wrap(~R, scales = "free_y") +
  scale_y_continuous() +
  ggtitle("logLik comparison")