laGP
A replication of the Gaussian Process regression implementation from chapter 5 of Surrogates. Application of the same code to real data.
Create some dummy data being an independent and dependent variable, along with a grid of the independent variable values.
# Training data
<- 8
n <- matrix(seq(0, 2*pi, length=n), ncol=1) # independent variable
X <- sin(X)
y
# Predictive grid
<- matrix(seq(-0.5, 2*pi + 0.5, length=100), ncol=1)
XX
# Nugget / jitter
<- sqrt(.Machine$double.eps) eps
newGP
Note the parameters:
d: a positive scalar lengthscale parameter for an isotropic Gaussian correlation function (newGP); or a vector for a separable version (newGPsep), and
g: a positive scalar nugget / jitter parameter
are seeded with the darg``` and
garg``` functions.
library('laGP')
## Warning: package 'laGP' was built under R version 4.1.3
##
## Attaching package: 'laGP'
## The following object is masked from 'package:plgp':
##
## distance
# Derive sensible parameter ranges
<- darg(NULL, X)
d
1] <- 1e-3 # negate garg error: Error in check.arg(g) : d$min should be a postive scalar < d$max
y[<- garg(NULL, y)
g
par(mfrow=c(1,2))
for(parm in c('d','g')) {
# GP
<- newGP(X, y, d=d$start, g=g$start*0.5, dK=TRUE)
gpi #mle <- mleGP(gpi)
<- mleGP(gpi, param=parm, tmin=ifelse(parm == 'd', d$min, g$min), tmax=ifelse(parm == 'd', d$max, g$max))
mle <- predGP(gpi, XX)
p <- mvtnorm::rmvnorm(100, p$mean, p$Sigma)
YY <- p$mean + qnorm(0.05, 0, sqrt(diag(p$Sigma)))
q1 <- p$mean + qnorm(0.95, 0, sqrt(diag(p$Sigma)))
q2
# Plot
matplot(XX, t(YY), type="l", col="gray", lty=1,
main = paste0("Optimising ", ifelse(parm == 'd', 'lengthscale', 'nugget / jitter')),
xlab="x", ylab="sin(x)", bty = "n")
points(X, y, pch=20, cex=2)
lines(XX, p$mean, lwd=2)
lines(XX, sin(XX), col="blue")
lines(XX, q1, lwd=2, lty=2, col=2)
lines(XX, q2, lwd=2, lty=2, col=2)
deleteGP(gpi)
rm(mle)
}
newGPsep
A separable GP, the lengthscale parameter is allowed to decay at differing rates dependent on the input data. This is an anisotropic, rather than isotropic model.
<- newGPsep(X, y, d=d$start, g=g$start*0.5, dK=TRUE)
gpi <- mleGPsep(gpi, param='both', tmin=c(d$min, g$min), tmax=c(d$max, g$max))
mle <- predGPsep(gpi, XX)
p <- mvtnorm::rmvnorm(100, p$mean, p$Sigma)
YY <- p$mean + qnorm(0.05, 0, sqrt(diag(p$Sigma)))
q1 <- p$mean + qnorm(0.95, 0, sqrt(diag(p$Sigma)))
q2
# Plot
matplot(XX, t(YY), type="l", col="gray", lty=1,
main = "seperable GP ",
xlab="x", ylab="sin(x)", bty = "n")
points(X, y, pch=20, cex=2)
lines(XX, p$mean, lwd=2)
lines(XX, sin(XX), col="blue")
lines(XX, q1, lwd=2, lty=2, col=2)
lines(XX, q2, lwd=2, lty=2, col=2)
deleteGPsep(gpi)
rm(mle)
The procedures above are now applied to real data, that being an estimation of stock valuation based on fundamental data.
library(romerb)
data("stock_data")
<- stock_data
fundamental_raw rm(stock_data)
# Medical devices sector only
<- fundamental_raw[fundamental_raw$sector == 7, ]
df $log_mkt_cap <- log(df$mkt_cap)
df$log_book <- log(-df$total_equity_cln)
df$roe <- -df$roe
df<- df[df$date_stamp == as.Date('2021-06-30'), c('log_book','log_mkt_cap','log_pb','roe','leverage')]
df
# nugget / jitter
<- sqrt(.Machine$double.eps)
eps
# Training data
<- matrix(df$roe)
X <- matrix(df$log_pb)
y
# Predictive grid
<- matrix(seq(min(X), max(X), length=200), ncol=1)
XX
# Plot
plot(X, y, main = "Medical devices sector",
xlab = "Return on equity", ylab = "Price book ratio",
pch = 19, frame = FALSE)
<- darg(NULL, X)
d <- garg(NULL, y)
g
<- newGPsep(X, y, d=d$start, g=g$start*0.5, dK=TRUE)
gpi <- mleGPsep(gpi, param='both', tmin=c(d$min, g$min), tmax=c(d$max, g$max))
mle <- predGPsep(gpi, XX)
p <- mvtnorm::rmvnorm(100, p$mean, p$Sigma)
YY <- p$mean + qnorm(0.05, 0, sqrt(diag(p$Sigma)))
q1 <- p$mean + qnorm(0.95, 0, sqrt(diag(p$Sigma)))
q2
# Plot
matplot(XX, t(YY), type="l", col="gray", lty=1,
xlab = "Return on equity", ylab = "Price book ratio", bty = "n")
points(X, y, pch=20, cex=2)
lines(XX, p$mean, lwd=2)
lines(XX, q1, lwd=2, lty=2, col=2)
lines(XX, q2, lwd=2, lty=2, col=2)
deleteGPsep(gpi)
rm(mle)
Functions creating dummy data. Useful for model testing.
tpg vignette, refer page 3 for dummy data function
Gold
price function from the GPfit
doucmentation.
Other Gaussian Process packages