In this vignette, we will demonstrate the main capabilities
of the GPvecchia
package. These include estimating
parameters and making spatial predictions. We also show how to use the
package for processing non-linear, non-Gaussian data by combining the
Vecchia with a Laplace approximation.
We start by importing the GPvecchia
library.
library(GPvecchia)
library(Matrix)
library(fields)
#> Loading required package: spam
#> Spam version 2.11-0 (2024-10-03) is loaded.
#> Type 'help( Spam)' or 'demo( spam)' for a short introduction
#> and overview of this package.
#> Help for individual functions is also obtained by adding the
#> suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
#>
#> Attaching package: 'spam'
#> The following object is masked from 'package:Matrix':
#>
#> det
#> The following objects are masked from 'package:base':
#>
#> backsolve, forwardsolve
#> Loading required package: viridisLite
#>
#> Try help(fields) to get started.
To illustrate our method, we simulate a small dataset. First, consider a unit square and randomly select observation locations. (Set spatial.dim=1 to consider data on the one-dimensional unit interval.)
set.seed(1988)
spatial.dim=2
n=50
if(spatial.dim==1){
locs=matrix(runif(n),ncol=1)
} else {
locs <- cbind(runif(n),runif(n))
}
Next, we define the covariance function of the field as well as the scale of the measurement error
beta=2
sig2=1; range=.1; smooth=1.5
covparms =c(sig2,range,smooth)
covfun <- function(locs) sig2*MaternFun(fields::rdist(locs),covparms)
nuggets=rep(.1,n)
We are now ready to simulate the field and visualize it as a sanity check.
Om0 <- covfun(locs)+diag(nuggets)
z=as.numeric(t(chol(Om0))%*%rnorm(n))
data=z+beta
# plot simulated data
if(spatial.dim==1) {
plot(locs,data)
} else {
fields::quilt.plot(locs,data, nx=n, ny=n)
}
We also create a grid of n.p locations at which we would like to make predictions.
Let us now estimate the mean and covariance parameters using the default settings, which assume a spatially constant mean or trend, and a Matern covariance structure. Note that the following code might take a minute or so to run.
vecchia.est=vecchia_estimate(data,locs,,output.level=0)
#> Nelder-Mead direct search function minimizer
#> function value for initial parameters = 61.133162
#> Scaled convergence tolerance is 9.10955e-07
#> Stepsize computed as 0.100000
#> BUILD 5 61.381731 60.754055
#> LO-REDUCTION 7 61.182128 60.754055
#> LO-REDUCTION 9 61.133162 60.754055
#> LO-REDUCTION 11 61.070965 60.754055
#> LO-REDUCTION 13 60.846320 60.754055
#> LO-REDUCTION 15 60.820974 60.754055
#> LO-REDUCTION 17 60.788447 60.754055
#> EXTENSION 19 60.781072 60.725514
#> LO-REDUCTION 21 60.764690 60.725514
#> HI-REDUCTION 23 60.764237 60.725514
#> REFLECTION 25 60.754055 60.724434
#> REFLECTION 27 60.745117 60.719430
#> REFLECTION 29 60.728191 60.719258
#> EXTENSION 31 60.725514 60.708763
#> HI-REDUCTION 33 60.724434 60.708763
#> LO-REDUCTION 35 60.719430 60.708763
#> LO-REDUCTION 37 60.719258 60.708763
#> EXTENSION 39 60.715353 60.697299
#> LO-REDUCTION 41 60.710655 60.697299
#> LO-REDUCTION 43 60.710599 60.697299
#> REFLECTION 45 60.708763 60.697132
#> EXTENSION 47 60.705199 60.692044
#> LO-REDUCTION 49 60.697702 60.692044
#> LO-REDUCTION 51 60.697299 60.692044
#> LO-REDUCTION 53 60.697132 60.692044
#> HI-REDUCTION 55 60.693648 60.692044
#> EXTENSION 57 60.693185 60.690552
#> LO-REDUCTION 59 60.692802 60.690552
#> LO-REDUCTION 61 60.692353 60.690552
#> LO-REDUCTION 63 60.692044 60.690552
#> REFLECTION 65 60.691268 60.690489
#> LO-REDUCTION 67 60.691153 60.690489
#> REFLECTION 69 60.690624 60.690421
#> HI-REDUCTION 71 60.690604 60.690260
#> EXTENSION 73 60.690552 60.690088
#> EXTENSION 75 60.690489 60.689662
#> LO-REDUCTION 77 60.690421 60.689662
#> LO-REDUCTION 79 60.690260 60.689662
#> LO-REDUCTION 81 60.690088 60.689662
#> LO-REDUCTION 83 60.689910 60.689643
#> EXTENSION 85 60.689750 60.689335
#> LO-REDUCTION 87 60.689668 60.689335
#> HI-REDUCTION 89 60.689662 60.689335
#> EXTENSION 91 60.689643 60.689126
#> EXTENSION 93 60.689521 60.688750
#> LO-REDUCTION 95 60.689485 60.688750
#> EXTENSION 97 60.689335 60.688626
#> EXTENSION 99 60.689126 60.687839
#> LO-REDUCTION 101 60.688960 60.687839
#> EXTENSION 103 60.688750 60.687295
#> LO-REDUCTION 105 60.688626 60.687295
#> EXTENSION 107 60.688189 60.686386
#> LO-REDUCTION 109 60.687866 60.686386
#> EXTENSION 111 60.687839 60.685778
#> REFLECTION 113 60.687295 60.685655
#> REFLECTION 115 60.686674 60.685381
#> EXTENSION 117 60.686386 60.684860
#> LO-REDUCTION 119 60.685778 60.684860
#> LO-REDUCTION 121 60.685655 60.684860
#> LO-REDUCTION 123 60.685381 60.684860
#> LO-REDUCTION 125 60.685048 60.684821
#> REFLECTION 127 60.684991 60.684749
#> LO-REDUCTION 129 60.684956 60.684749
#> LO-REDUCTION 131 60.684860 60.684749
#> EXTENSION 133 60.684821 60.684642
#> REFLECTION 135 60.684765 60.684638
#> HI-REDUCTION 137 60.684761 60.684638
#> HI-REDUCTION 139 60.684749 60.684638
#> EXTENSION 141 60.684679 60.684568
#> REFLECTION 143 60.684677 60.684557
#> REFLECTION 145 60.684642 60.684517
#> EXTENSION 147 60.684638 60.684430
#> LO-REDUCTION 149 60.684568 60.684430
#> LO-REDUCTION 151 60.684557 60.684430
#> EXTENSION 153 60.684517 60.684361
#> LO-REDUCTION 155 60.684482 60.684361
#> LO-REDUCTION 157 60.684457 60.684361
#> HI-REDUCTION 159 60.684430 60.684361
#> REFLECTION 161 60.684410 60.684343
#> EXTENSION 163 60.684389 60.684311
#> LO-REDUCTION 165 60.684363 60.684311
#> HI-REDUCTION 167 60.684361 60.684311
#> LO-REDUCTION 169 60.684343 60.684311
#> LO-REDUCTION 171 60.684329 60.684309
#> LO-REDUCTION 173 60.684320 60.684309
#> HI-REDUCTION 175 60.684314 60.684309
#> REFLECTION 177 60.684311 60.684303
#> REFLECTION 179 60.684311 60.684303
#> LO-REDUCTION 181 60.684309 60.684303
#> REFLECTION 183 60.684309 60.684301
#> REFLECTION 185 60.684305 60.684299
#> LO-REDUCTION 187 60.684303 60.684299
#> LO-REDUCTION 189 60.684303 60.684299
#> REFLECTION 191 60.684301 60.684298
#> HI-REDUCTION 193 60.684300 60.684298
#> EXTENSION 195 60.684299 60.684296
#> HI-REDUCTION 197 60.684299 60.684296
#> LO-REDUCTION 199 60.684299 60.684296
#> EXTENSION 201 60.684298 60.684293
#> LO-REDUCTION 203 60.684297 60.684293
#> LO-REDUCTION 205 60.684297 60.684293
#> REFLECTION 207 60.684296 60.684293
#> REFLECTION 209 60.684295 60.684293
#> REFLECTION 211 60.684294 60.684292
#> LO-REDUCTION 213 60.684293 60.684292
#> REFLECTION 215 60.684293 60.684292
#> Exiting from Nelder Mead minimizer
#> 217 function evaluations used
Based on these parameter estimates, we can then make predictions at the grid of locations we had specified above.
Finally, we compare the approximate predictions with the best possible ones (i.e. those obtained using analytic expressions for conditional mean in the Gaussian distribution).
## exact prediction
mu.exact=as.numeric(covfun(rbind(locs,locs.pred))[,1:n]%*%solve(Om0,data-beta))+beta
cov.exact=covfun(rbind(locs,locs.pred))-
covfun(rbind(locs,locs.pred))[,1:n]%*%solve(Om0,t(covfun(rbind(locs,locs.pred))[,1:n]))
var.exact=diag(cov.exact)
cov.exact.pred=cov.exact[n+(1:n.p),n+(1:n.p)]
### plot Vecchia and exact predictions
if(spatial.dim==1) {
plot(locs,z)
lines(locs.pred,preds$mean.pred,col='blue')
lines(locs.pred,preds$mean.pred-1.96*sqrt(preds$var.pred),col='blue',lty=2)
lines(locs.pred,preds$mean.pred+1.96*sqrt(preds$var.pred),col='blue',lty=2)
lines(locs.pred,mu.exact[n+(1:n.p)],col='red')
lines(locs.pred,mu.exact[n+(1:n.p)]-1.96*sqrt(var.exact[n+(1:n.p)]),col='red',lty=2)
lines(locs.pred,mu.exact[n+(1:n.p)]+1.96*sqrt(var.exact[n+(1:n.p)]),col='red',lty=2)
} else {
sdrange=range(sqrt(c(preds$var.pred,var.exact[n+(1:n.p)])))
defpar = par(mfrow=c(2,3))
fields::quilt.plot(locs,z, nx=sqrt(n.p), ny=sqrt(n.p))
fields::quilt.plot(locs.pred,preds$mean.pred, nx=sqrt(n.p), ny=sqrt(n.p))
fields::quilt.plot(locs.pred,sqrt(preds$var.pred),zlim=sdrange, nx=sqrt(n.p), ny=sqrt(n.p))
fields::quilt.plot(locs,z, nx=sqrt(n.p), ny=sqrt(n.p))
fields::quilt.plot(locs.pred,mu.exact[n+(1:n.p)], nx=sqrt(n.p), ny=sqrt(n.p))
fields::quilt.plot(locs.pred,sqrt(var.exact[n+(1:n.p)]),zlim=sdrange, nx=sqrt(n.p), ny=sqrt(n.p))
par(defpar)
}
Let’s take a closer look at how the likelihood is evaluated using Vecchia. Most importantly, we can specify a parameter, m. Its value determines the number of “neighbours” of each point, or, in other words, how many other points a given point conditions on. The larger this parameter, the more accurate and expensive the approximation will be.
m=20
vecchia.approx=vecchia_specify(locs,m)
vecchia_likelihood(z,vecchia.approx,covparms,nuggets)
#> [1] -61.09357
Note that the function vecchia_specify determines the general properties of the approximation, but it does not depend on the data or the specific parameter values. Hence, it does not have to be re-run when searching over different parameter values in an estimation procedure.
We can also compare the results to the exact likelihood:
library(mvtnorm)
#>
#> Attaching package: 'mvtnorm'
#> The following objects are masked from 'package:spam':
#>
#> rmvnorm, rmvt
dmvnorm(z,mean=rep(0,n),sigma=Om0,log=TRUE)
#> [1] -61.09842
In this case the approximation is very good. In general, m = 20 is a good value, and m should usually be between 10 and 40. For one-dimensional space, we can get good approximations even with m = 5 or smaller.
Similar to the previous section we next specify the approximation and indicate at which locations prediction is desired.
m=30
vecchia.approx=vecchia_specify(locs,m,locs.pred=locs.pred)
preds=vecchia_prediction(z,vecchia.approx,covparms,nuggets)
# returns a list with elements mu.pred,mu.obs,var.pred,var.obs,V.ord
It is also possible to print the entire predictive covariance matrix. We do it here only for the purpose of illustration. If n.p is very large, this matrix might use up a lot of memory and we generally do not recommend plotting it directly.
Sigma=V2covmat(preds)$Sigma.pred
cov.range=quantile(rbind(Sigma,cov.exact.pred),c(.01,.99))
defpar = par(mfrow=c(1,2))
fields::image.plot(cov.exact.pred,zlim=cov.range)
fields::image.plot(Sigma,zlim=cov.range)
We might sometimes be interested in a linear combination of the predicted values. In particular, we can limit our attention to only a subset of our predictions. This can be accomplished by specifying the linear combination coefficients as a matrix. As an example, we assume we are only interested in predictions at the unobserved prediction locations (not at the first n observed locations):
H=Matrix::sparseMatrix(i=1:(n+n.p),j=1:(n+n.p),x=1)[(n+1):(n+n.p),]
# compute variances of Hy
lincomb.vars=vecchia_lincomb(H,preds$U.obj,preds$V.ord)
plot(preds$var.pred,lincomb.vars)
As another example, we consider the overall mean of the process at
all prediction locations. Using the vecchia_lincomb()
function enables us to get the variance estimates easily.
By specifying appropriate options in vecchia_specify, we can do everything described above for several other GP approximations: Modified predictive process, FSA, MRA, latent, standard Vecchia
Setting M = 1 results in block full-scale approximation, specifically one with $r_0 = \frac{m}{2}$ knots spread over the entire domain and the remaining locations being partitioned into blocks of size $<\frac{m}{2}+1$.
m=20
mra.options.fulls=list(M=1)
blockFS = vecchia_specify(locs, m, 'maxmin', conditioning='mra', mra.options=mra.options.fulls, verbose=TRUE)
#> MRA params: m=19; J=4; r=10,10; M=1
Another popular existing approximation method, modified predictive process (MPP), can also be obtained by specifying appropriate parameter settings:
mra.options.mpproc=list(r=c(m,1))
MPP = vecchia_specify(locs, m, 'maxmin', conditioning='mra', mra.options=mra.options.mpproc, verbose=TRUE)
#> MRA params: m=20; J=32; r=20; M=0
As we can see, MPP can be viewed as a special case of the multi-resolution approximation (MRA).
A general MRA is obtained my specifying all of its three parameters
mra.options.mra = list(r=c(10, 5, 5), M=2, J=2)
MRA_rJM = vecchia_specify(locs, m, 'maxmin', conditioning='mra', mra.options=mra.options.mra, verbose=TRUE)
#> Warning in get.mra.params(n, mra.options, m): M, r set for MRA. If parameter m
#> was given, it will be overridden
#> MRA params: m=22; J=2,2; r=10,5,7; M=2
We should note two things to note about this full specifiction of an MRA. First, providing all three r,J and M overrides whatever value of m was provided. Second, in order to be able to place a knot at each point of the grid, the provided parameters might need to be adjusted.
Finally, we can also use the GPvecchia
package to
specify a Nearest Neighbour Gaussian Process (NNGP)
approximation. This can be accomplished as shown below.
We can now easily compare different approximation methods and compare it with SGV and exact likelihood.
vecchia_likelihood(z,blockFS,covparms,nuggets)
#> [1] -61.24514
vecchia_likelihood(z,MPP,covparms,nuggets)
#> [1] -63.68507
vecchia_likelihood(z,MRA_rJM,covparms,nuggets)
#> [1] -61.00043
vecchia_likelihood(z,NNGP,covparms,nuggets)
#> [1] -61.09416
vecchia_likelihood(z, vecchia_specify(locs, m), covparms, nuggets)
#> [1] -61.09357
dmvnorm(z,mean=rep(0,n),sigma=Om0,log=TRUE)
#> [1] -61.09842
Here we demonstrate how GPVecchia can fit a latent model to non-Gaussian data using the Vecchia-Laplace method. We simulate data by first generating a correlated latent field without noise, assuming the same covariance and locations generated earlier:
Then we sample a single non-Gaussian value for each latent value. The variability introduced by the sampling induces heteroskedasticity, in contrast the the constant noise added to the Gaussian case. Below we use a logistic model for binary data, but there are implementations for count and continuous positive (right-skewed) data as well.
data.model = "logistic"
# simulate data
if(data.model=='poisson'){
z = rpois(n, exp(y))
} else if(data.model=='logistic'){
z = rbinom(n,1,prob = exp(y)/(1+exp(y)))
} else if(data.model=='gamma'){
z = rgamma(n, shape = default_lh_params$alpha, rate = default_lh_params$alpha*exp(-y))
}else{
print('Error: Distribution not implemented yet.')
}
# plot simulated data, 1 or 2D
defpar = par(mfrow=c(1,2))
if(spatial.dim==1) {
plot(locs,y, main = "latent")
plot(locs,z, main = "observed")
} else {
fields::quilt.plot(locs,y, main = "Latent")
fields::quilt.plot(locs,z, main = "Observed")
}
Given the simulated data, we now can efficiently estimate the latent field by specifying the number of conditioning points m described earlier. Interweaved ordering is best for 1D data while response-first (‘zy’) ordering is best for higher dimensions.
m=10
if(spatial.dim==1){
vecchia.approx=vecchia_specify(locs,m) #IW ordering
} else {
vecchia.approx=vecchia_specify(locs,m,cond.yz='zy') #RF ordering
}
With the approximated covariance structure, we can calculate the posterior estimate for the latent field using the Vecchia-Laplace method and plot the result. Pure Laplace approximation is included for comparison; even with a small value for m, we can get a result similar to Laplace but with much lower cost.
posterior = calculate_posterior_VL(z,vecchia.approx,likelihood_model=data.model,
covparms = covparms)
if (spatial.dim==1){
par(mfrow=c(1,1))
ord = order(locs) # order so that lines appear correctly
y_limits = c(min(y, posterior$mean[ord]), max(y, posterior$mean[ord]))
plot(locs[ord], y[ord], type = "l", ylim = y_limits )
lines(locs[ord], posterior$mean[ord], type = "l", col=3, lwd=3)
legend("bottomright", legend = c("Latent", "VL"), col= c(1,3), lwd=c(1,3))
} else if (spatial.dim==2){
dfpar = par(mfrow=c(1,2))
# ordering unnecessary; we are using a scatter plot rather than lines
quilt.plot(locs, y, main= "Truth")
quilt.plot(locs, posterior$mean, main= "VL m=10")
par(defpar)
}
Predictions are computed as before, using Vecchia-Laplace methods where needed
###### specify prediction locations #######
n.p=30^2
if(spatial.dim==1){ # 1-D case
locs.pred=matrix(seq(0,1,length=n.p),ncol=1)
} else { # 2-D case
grid.oneside=seq(0,1,length=round(sqrt(n.p)))
locs.pred=as.matrix(expand.grid(grid.oneside,grid.oneside)) # grid of pred.locs
}
n.p=nrow(locs.pred)
###### specify Vecchia approximation #######
vecchia.approx.pred = vecchia_specify(locs, m=10, locs.pred=locs.pred)
### carry out prediction
preds = vecchia_laplace_prediction(posterior, vecchia.approx.pred, covparms)
# plotting predicitions
if (spatial.dim==1){
defpar = par(mfrow=c(1,1))
ord = order(locs) # order so that lines appear correctly
plot(locs[ord], y[ord], type = "l", xlim=c(0,1.2), ylim = c(-1,3))
lines(locs, posterior$mean, type = "p", col=4, lwd=3, lty=1)
lines(locs.pred, preds$mu.pred, type = "l", col=3, lwd=3, lty=1)
lines(locs.pred,preds$mu.pred+sqrt(preds$var.pred), type = "l", lty = 3, col=3)
lines(locs.pred,preds$mu.pred-sqrt(preds$var.pred), type = "l", lty = 3, col=3)
legend("topleft", legend = c("Latent", "VL: Pred", "VL: 1 stdev"),
col= c(1,3,3), lwd=c(1,2,1), lty = c(1,1,3))
par(defpar)
} else if (spatial.dim==2){
defpar = par(mfrow=c(1,2))
# ordering unnecessary; we are using a scatter plot rather than lines
quilt.plot(locs, y, main= "True Latent",
xlim = c(0,1), ylim = c(0,1), nx=64, ny=64)
quilt.plot(locs.pred, preds$mu.pred, main= "VL Prediction",nx = 30, ny=30)
par(defpar)
}
The likelihood of the data for a set of parameters can be computed efficiently using the command below.
vecchia_laplace_likelihood(z,vecchia.approx,likelihood_model=data.model,covparms = covparms)
#> [1] -35.38816
This can be used for parameter estimation by evaluating the likelihood over a grid of parameter values or in an iterative optimization method such as Nelder-Mead. Reparameterizing the parameters improves performance.
# currently set up for covariance estimation
vecchia.approx=vecchia_specify(locs, m=10, cond.yz = "zy") # for posterior
vecchia.approx.IW = vecchia_specify(locs, m=10) # for integrated likelihood
if (spatial.dim==1) vecchia.approx=vecchia.approx.IW
vl_likelihood = function(x0){
theta = exp(x0)
covparms=c(theta[1], theta[2], theta[3]) # sigma range smoothness
prior_mean = 0 # can be a parameter as well
# Perform inference on latent mean with Vecchia Laplace approximation
vll = vecchia_laplace_likelihood(z,vecchia.approx, likelihood_model=data.model,
covparms, return_all = FALSE,
likparms = default_lh_params, prior_mean = prior_mean,
vecchia.approx.IW = vecchia.approx.IW)
return(-vll)
}
x0 = log(c(.07,1.88, 1.9))
vl_likelihood(x0)
# Issues with R aborting, maxit set to 1
res = optim(x0, vl_likelihood, method = "Nelder-Mead", control = list("trace" = 1, "maxit" = 1))
exp(res$par[1:3])
vl_likelihood(x0)