# how to mcmc sample with rjags in R
# note: the comment character in R is #
# unbuffer the output
# (0) type 'rjags' in your favorite browser search window;
# roughly the fourth entry down from the top will say
# JAGS: Just Another Gibbs Sampler
# clicking on that link will take you to sourceforge.net ,
# where you now click on the field that says
# Download Latest Version JAGS-4.3.0.exe (33.9 MB)
# double-left click on the .exe download icon and follow
# the resulting instructions to install JAGS (i recommend
# that you accept all default settings offered to you)
# (1) create a new directory; for illustration here, i'm going
# to call it
# NB10-rjags
# (2) in a text file, write your model in the same way you would
# for winbugs, except that the first line has to say
# model {
# instead of
# {
# and store this text file in the directory NB10-rjags ; for
# illustration here i'll call that file
# nb10-model-3.txt
# for clarity, this file looks like the following here
# (without the # comment symbols, of course):
# model {
# mu ~ dnorm( 0.0, 1.0E-6 )
# tau ~ dgamma( 0.001, 0.001 )
# nu ~ dunif( 2.0, 12.0 )
# for ( i in 1:n ) {
#
# y[ i ] ~ dt( mu, tau, nu )
# }
#
# sigma <- 1.0 / sqrt( tau )
# y.new ~ dt( mu, tau, nu )
# }
# (3) open up an R session and change directory to NB10-rjags
# (4) if this is your first time using rjags, you have to install
# the rjags package and then load it; if you've already
# installed it you just need to load it now (you have to be
# connected to the internet for the next step to work).
# at the R prompt > issue the command
install.packages( 'rjags' )
# you'll need to select a CRAN mirror site: i use
# USA (CA 1) [https]
# R will now mumble a few things and finish with a remark like
# The downloaded binary packages are in blah-blah
# to load rjags, at the R prompt > issue the command
library( rjags )
# R will again mumble a few things, possibly finishing with
# some warning messages, which you can ignore, saying that
# various things were built under a different version of R
# than the one you're using; when you get a > prompt at the end
# of the mumbling, rjags will be loaded
# (5) your data file is the same as in winbugs except that you
# assign it to an object in your current R session; for
# illustration here it's assigned to nb10.data with the
# following command:
nb10.data <- list(
y = c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
398., 403., 407., 402., 401., 399., 400., 401., 405., 402.,
408., 399., 399., 402., 399., 397., 407., 401., 399., 401.,
403., 400., 410., 401., 407., 423., 406., 406., 402., 405.,
405., 409., 399., 402., 407., 406., 413., 409., 404., 402.,
404., 406., 407., 405., 411., 410., 410., 410., 401., 402.,
404., 405., 392., 407., 406., 404., 403., 408., 404., 407.,
412., 406., 409., 400., 408., 404., 401., 404., 408., 406.,
408., 406., 401., 412., 393., 437., 418., 415., 404., 401.,
401., 407., 412., 375., 409., 406., 398., 406., 403., 404.),
n = 100 )
# (6) your initial values file is the same as in winbugs except
# that you assign it to an object in your current R session; for
# illustration here it's assigned to nb10.inits.2 with the
# following command:
nb10.inits.2 <- list( mu = 404.59, tau = 0.05, nu = 5.0, y.new = 400.0 )
# (7) now you do the model checking and compiling with the following
# command (using the names i've chosen for illustration above):
nb10.run.1 <- jags.model( file = "nb10-model-3.txt", data = nb10.data,
inits = nb10.inits.2 )
# (8) before doing the random MCMC sampling, it's good form to set the
# random number seed, so that other people running your code get
# the same results you do:
set.seed( 314159 )
# (9) now you do a burn-in of (e.g.) 1000 iterations from your
# initial values with the following commands:
n.burnin <- 1000
system.time(
update( nb10.run.1, n.iter = n.burnin )
)
# on my laptop, with an Intel Core i7-7820HQ CPU at 2.90 GHz,
# single-threaded, 1000 burn-in iterations took about 0.9 seconds
# (10) now you do a monitoring run of (e.g.) 100000 iterations with
# the following command (at decent GHz this will take about
# 90 seconds):
n.monitor <- 100000
system.time(
nb10.run.1.results <- jags.samples( nb10.run.1,
variable.names = c( "mu", "sigma", "nu", "y.new" ),
n.iter = n.monitor )
)
# on my laptop (single-threaded), 100000 monitoring iterations
# took about 81 seconds (rjags is slower than winbugs)
# (11) now you get to summarize the posterior in a variety of ways;
# here's some useful code to do so; first you extract each of
# the monitored columns of the MCMC data set into individual
# variables:
mu.star <- nb10.run.1.results$mu
sigma.star <- nb10.run.1.results$sigma
nu.star <- nb10.run.1.results$nu
y.new.star <- nb10.run.1.results$y.new
# then you can create graphical and numerical summaries of each
# monitored quantity, as follows:
##### summary for mu
# (11a) first you create what i call the MCMC 4-plot:
par( mfrow = c( 2, 2 ) )
plot( 1:n.monitor, mu.star, type = 'l', xlab = 'Iteration Number',
ylab = 'mu' )
# the upper left graph is a time-series plot of the monitored
# iterations (to visually check for stationarity)
plot( density( mu.star ), xlab = 'mu', main = '' )
# the upper right graph is a density trace of the marginal posterior
# distribution for the quantity being monitored
acf( as.ts( mu.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
pacf( as.ts( mu.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
# the lower left and right graphs are plots of the autocorrelation
# function (ACF) and the partial autocorrelation function (PACF)
# for the time series of monitored iterations; if these iterations
# behave like an autoregressive time series of order 1 with first-
# order autocorrelation rho (often denoted by AR1( rho )), then
# (a) the PACF will have a single spike of height rho at lag 1 and
# no other significant spikes and (b) the ACF will exhibit a
# geometric decay pattern with a spike of height rho at lag 1, a
# spike of approximate height rho^2 at lag 2, and so on -- for
# the parameter mu the ACF and PACF plots look perfectly like an
# AR1 with a first-order autocorrelation of about rho.1.hat = 0.21
# an MCMC time series with a rho.1.hat value close to 0 (which
# would be equivalent to IID sampling) is said to be "mixing well";
# here we would say that this is true of the mu series
# (11b) then you can make numerical summaries of the marginal
# posterior for the monitored quantity:
c( mean( mu.star ), sd( mu.star ), quantile( mu.star,
probs = c( 0.025, 0.975 ) ) )
# 2.5% 97.5%
# 404.29713 0.46744 403.38488 405.22240
print( rho.1.hat.mu.star <- cor( mu.star[ 1:( n.monitor - 1 ) ],
mu.star[ 2:n.monitor ] ) )
# 0.2125716
# the reason for making the ACF and PACF plots is that if the time
# series of monitored iterations for a given quantity behaves like
# an AR1( rho ), then the Monte Carlo standard error (MCSE) of the
# mean of this series is given by the following expression:
print( se.mu.star.mean <- ( sd( mu.star ) / sqrt( n.monitor ) ) *
sqrt( ( 1 + rho.1.hat.mu.star ) / ( 1 - rho.1.hat.mu.star ) ) )
# 0.001834315
# this is also the right order of magnitude for the MCSE of the
# MCMC estimate of the posterior SD and the quantiles giving the
# 95% interval for the monitored quantity
# we would conclude, based on this model, that the true weight of
# NB10 was about 404.30 micrograms below the nominal weight of 10
# grams, give or take about 0.47 micrograms, and that a 95%
# posterior interval for the true weight runs from about 403.38
# to about 405.22 micrograms below 10 grams
##### summary for sigma
plot( 1:n.monitor, sigma.star, type = 'l', xlab = 'Iteration Number',
ylab = 'sigma' )
plot( density( sigma.star ), xlab = 'sigma', main = '' )
acf( as.ts( sigma.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
pacf( as.ts( sigma.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
# the marginal posterior for sigma has a bit of a long right-hand
# tail, as you would expect for a scale parameter; the ACF and PACF
# plots show that the sigma time series behaves more or less like an
# AR1 with a first-order autocorrelation of about 0.45 (so the
# MCMC output for this parameter is not mixing quite as well as
# the output for mu, but 0.45 is still a fairly low autocorrelation)
c( mean( sigma.star ), sd( sigma.star ), quantile( sigma.star,
probs = c( 0.025, 0.975 ) ) )
# 2.5% 97.5%
# 3.8625312 0.4383744 3.0775740 4.7879916
print( rho.1.hat.sigma.star <- cor( sigma.star[ 1:( n.monitor - 1 ) ],
sigma.star[ 2:n.monitor ] ) )
# 0.4480396
print( se.sigma.star.mean <- ( sd( sigma.star ) / sqrt( n.monitor ) ) *
sqrt( ( 1 + rho.1.hat.sigma.star ) / ( 1 - rho.1.hat.sigma.star ) ) )
# 0.002245337
# we would conclude that the population value of the scale parameter
# in the t model is about 3.86 micrograms, give or take about
# 0.44 micrograms, and that a 95% posterior interval for this
# parameter runs from about 3.08 to about 4.79 micrograms
##### summary for nu
plot( 1:n.monitor, nu.star, type = 'l', xlab = 'Iteration Number',
ylab = 'nu' )
plot( density( nu.star ), xlab = 'nu', main = '' )
acf( as.ts( nu.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
pacf( as.ts( nu.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
# the marginal posterior for nu has a pronounced long right-hand
# tail; the ACF and PACF plots show that the nu time series behaves
# more or less like an AR1 with a first-order autocorrelation of
# almost 0.6 (this parameter is mixing the least well, but 0.6 is
# still not too bad for a first-order autocorrelation)
c( mean( nu.star ), sd( nu.star ), quantile( nu.star,
probs = c( 0.025, 0.975 ) ) )
# 2.5% 97.5%
# 3.636421 1.170341 2.140174 6.567723
print( rho.1.hat.nu.star <- cor( nu.star[ 1:( n.monitor - 1 ) ],
nu.star[ 2:n.monitor ] ) )
# 0.5729619
print( se.nu.star.mean <- ( sd( nu.star ) / sqrt( n.monitor ) ) *
sqrt( ( 1 + rho.1.hat.nu.star ) / ( 1 - rho.1.hat.nu.star ) ) )
# 0.007102942
# notice the effect of the roughly 0.6 autocorrelation value: this
# MCSE, while still small, is about 3.5 times larger than the MCSEs
# for mu and sigma
# we would conclude that the population value of the degrees
# of freedom parameter in the t model is about 3.6, give or take
# about 1.2, and that a 95% posterior interval for this
# parameter runs from about 2.1 to about 6.6
##### summary for y.new
plot( 1:n.monitor, y.new.star, type = 'l', xlab = 'Iteration Number',
ylab = 'y.new' )
plot( density( y.new.star ), xlab = 'y.new', main = '' )
acf( as.ts( y.new.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
pacf( as.ts( y.new.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
# the time series plot for y.new shows that the t model with small
# degrees of freedom can generate some simulated predicted data
# values that are quite far from the center of the distribution
# (in fact, farther away from the middle than the actual data values)
quantile( nb10.data$y,
probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) )
# 1% 5% 10% 25% 50% 75% 90% 95% 99%
# 391.83 398.00 399.00 401.00 404.00 407.00 410.00 412.05 423.14
quantile( y.new.star,
probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) )
# 1% 5% 10% 25% 50% 75% 90% 95% 99%
# 387.86 395.53 398.12 401.36 404.29 407.21 410.48 413.09 420.59
# you can see that the tails of the predictive distribution are
# a bit heavier than those of the actual data values
# this is a good way to informally check a Bayesian model:
# compare the predictive distribution it produces for future data
# with the actual data values
par( mfrow = c( 1, 1 ) )
qqplot( nb10.data$y, y.new.star, xlab = 'NB10 data',
ylab = 'Predictive distribution from t model' )
# the fit of this plot is terrific except for a small number of
# extreme values in the predictive distribution that are far away
# from the actual extremes of the data; what to make of this fact
# is a bit unclear: on the one hand, it calls the t model slightly
# into question, but on the other hand it suggests that if the
# NBS people were to make more than 100 weighings of NB10 they
# might get some readings that were even more extreme than the
# outliers they actually observed
c( mean( y.new.star ), sd( y.new.star ), quantile( y.new.star,
probs = c( 0.025, 0.975 ) ) )
# 2.5% 97.5%
# 404.276509 6.463505 392.551676 415.951182
par( mfrow = c( 2, 2 ) )
plot( 1:n.monitor, y.new.star, type = 'l', xlab = 'Iteration Number',
ylab = 'y.new' )
plot( density( y.new.star ), xlab = 'y.new', main = '' )
acf( as.ts( y.new.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
pacf( as.ts( y.new.star[ 1:n.monitor ], start = 1,
end = n.monitor, frequency = 1 ), lag.max = 10, main = '' )
# the ACF and PACF plots show that the MCMC draws from the predictive
# distribution behave like white noise, and this is in fact a general
# phenomenon in MCMC simulation from predictive distributions (the
# resulting draws are IID)
print( rho.1.hat.y.new.star <- cor( y.new.star[ 1:( n.monitor - 1 ) ],
y.new.star[ 2:n.monitor ] ) )
# 0.006518032
print( se.y.new.star.mean <- ( sd( y.new.star ) / sqrt( n.monitor ) ) *
sqrt( ( 1 + rho.1.hat.y.new.star ) / ( 1 - rho.1.hat.y.new.star ) ) )
# 0.02057306
# even though rho.1.hat.y.new.star is essentially 0, the MCSE for
# the mean of the posterior predictive distribution is about 3 times
# larger than that for nu (the worst-mixing parameter); this is
# because the SD of the posterior predictive distribution (6.46)
# is so much larger than the posterior SDs of the parameters (we
# noticed this in the Poisson length-of-stay case study: prediction
# of future data values is much noisier than parameter estimation)
# we would conclude that, according to the t model, future weighings
# of NB10 should produces values around 404.3 micrograms below
# 10 grams, give or take about 6.5 micrograms, and that about 95%
# of those future data values should be between about 392.6 and
# 416.0 micrograms below 10 grams
c( mean( nb10.data$y ), sd( nb10.data$y ) )
# 404.590000 6.466846
c( mean( y.new.star ), sd( y.new.star ) )
# 404.276509 6.463505
sum( ( 392.6 < nb10.data$y ) * ( nb10.data$y < 416.0 ) ) / 100
# 0.95
# you can see that the actual data mean and SD values agree well with
# the mean and SD of the posterior predictive distribution, and that
# (as it happens) exactly 95% of the data values are within the
# 95% predictive distribution for future data values; all in all,
# this adds up to the conclusion that the t model fits the
# NB10 data set well