# Written by Chris Severs, edited by David Draper

# Some helpers and example usage of rjags in parallel mode in R. 

# The output from the foreach functions below give an mcmc object that 
# can be manipulated with the coda package, which is automatically loaded 
# with rjags.

# The base code is from: http://andrewgelman.com/2011/07/parallel-jags-rngs/
# with some updates and a nice combine function. 

# This builds on the NB10 rjags example in the file called 
# 'How to MCMC sample with R-JAGS' on the course web page:

# First, change directory to the location where you previously stored the
# file 'nb10-model.txt'

# on my laptop this is

#   setwd( 'C:/Users/draper/Teaching/AMS-206/NB10-rjags' )

# Second, if the following 3 packages have not yet been installed, 
# install them using the instructions in the file 
# 'How to MCMC sample with R-JAGS'

#   doParallel
#   rjags
#   random

dynamic.require <- function( package ) {

  if ( eval( parse( text = paste( 'require(', package, ')' ) ) ) ) {

    return( 'done' )

  }

  install.packages( package )

  return( eval( parse( text = paste( 'require(', package, ')' ) ) ) )

}

dynamic.require( 'doParallel' )

dynamic.require( 'rjags' )

dynamic.require( 'random' )

# For OS X/Linux you can optionally use this instead of the version below

# registerDoParallel( ) 

# Change to registerDoParallel( n ) to use n processes

# For Windows, you need to use the following:

cl <- makeCluster( 4 ) # change to makeCluster( n ) to use n processes

registerDoParallel( cl )

# Function to generate the initial values and random seeds for each model. 

nb10.inits.1 <- function( ) {

  return( list( mu = 404.59, tau = 0.05, nu = 5.0, y.new = 400.0, 
    .RNG.name = 'lecuyer::RngStream', 
    .RNG.seed = round( 1e+06 * runif( 1 ) ) ) )

}

# Data to use

nb10.data.1 <- list( 
  y = c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
         398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 
         408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 
         403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 
         405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 
         404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 
         404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 
         412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 
         408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 
         401., 407., 412., 375., 409., 406., 398., 406., 403., 404. ), 
  n = 100 )

# Helper function to combine multiple mcmc lists into a single one. 

mcmc.combine <- function( ... ) {

  return( as.mcmc.list( sapply( list( ... ), mcmc ) ) )

}

# Multi-threaded run (assuming 8 chains)

iters <- 25000

set.seed( 1 )

time.1 <- system.time(

  jags.parsamples <- foreach( i = 1:getDoParWorkers( ), .inorder = FALSE, 
    .packages = c( 'rjags', 'random' ), .combine = 'mcmc.combine', 
    .multicombine = TRUE ) 

  %dopar% {

    load.module( 'lecuyer' )

    model.jags <- jags.model( data = nb10.data.1, file = 'nb10-model.txt', 
      inits = nb10.inits.1 )

    result <- coda.samples( model.jags, variable.names = c( 'mu', 'sigma', 
      'nu', 'y.new' ), n.iter = iters )

    return( result )

  }

)

print( time.1 )

#    user  system elapsed 
#    0.05    0.03   45.68 

# Single-threaded run

iters <- 100000

time.2 <- system.time(

  jags.singlesample <- foreach( i = 1:1, .combine = 'mcmc.combine', 
    .multicombine = TRUE) 

  %do% {

    load.module( 'lecuyer' ) 

    model.jags <- jags.model( data = nb10.data.1, file = 'nb10-model.txt', 
      inits = nb10.inits.1 )

    result <- coda.samples( model.jags, variable.names = c( 'mu', 'sigma',
      'nu', 'y.new' ), n.iter = iters )

    return( result )

  }

)

print( time.2 )

#   user  system elapsed 
# 135.61    0.98  137.70

137.70 / 45.68 

# [1] 3.014448

# speed-up seem only to be by about a factor of 3.1,
# even with 4 threads in use

# this seems to be in part because, 
# even though i didn't ask for it,
# each chain goes through an undocumented
# burn-in of 1000 iterations before monitoring:

str( jags.parsamples )

# List of 4
#  $ : 'mcmc' num [1:25000, 1:4] 404 404 404 404 405 ...
#   ..- attr(*, "dimnames")=List of 2
#   .. ..$ : NULL
#   .. ..$ : chr [1:4] "mu" "nu" "sigma" "y.new"
#   ..- attr(*, "mcpar")= num [1:3] 1001 26000 1

#    .
#    .
#    .

#  $ : 'mcmc' num [1:25000, 1:4] 404 405 405 404 404 ...
#   ..- attr(*, "dimnames")=List of 2
#   .. ..$ : NULL
#   .. ..$ : chr [1:4] "mu" "nu" "sigma" "y.new"
#   ..- attr(*, "mcpar")= num [1:3] 1001 26000 1
#  - attr(*, "class")= chr "mcmc.list"

# so the parallel routine is creating 4 * 26000 = 104000
# samples in 45.68 seconds, which is ( 104000 / 45.68 ) =
# 2277 samples per second, and the single-thread routine
# is creating 101000 samples in 137.70 seconds, which is
# ( 101000 / 137.70 ) = 733 samples per second; and
# ( 2277 / 733 ) = 3.1 -- the real speed-up is about
# 3.1 with 4 threads

# the rest of the 'shortfall' is parallel-processing overhead

plot( jags.parsamples ) 

dynamic.require( 'lattice' )

xyplot( jags.parsamples[ , c( 'mu', 'nu' ) ] )

summary( jags.parsamples )

# Iterations = 1001:26000
# Thinning interval = 1 
# Number of chains = 4 
# Sample size per chain = 25000 

# 1. Empirical mean and standard deviation for each variable,
#    plus standard error of the mean:

#          Mean     SD Naive SE Time-series SE
# mu    404.298 0.4702 0.001487       0.001941
# nu      3.647 1.1776 0.003724       0.007812
# sigma   3.868 0.4369 0.001382       0.002405
# y.new 404.307 6.2303 0.019702       0.019764

# 2. Quantiles for each variable:

#          2.5%     25%     50%     75%   97.5%
# mu    403.377 403.982 404.295 404.612 405.229
# nu      2.141   2.810   3.404   4.201   6.611
# sigma   3.086   3.562   3.842   4.145   4.795
# y.new 392.523 401.395 404.317 407.231 415.986

summary( jags.singlesample )

# Iterations = 1001:101000
# Thinning interval = 1 
# Number of chains = 1 
# Sample size per chain = 1e+05 

# 1. Empirical mean and standard deviation for each variable,
#    plus standard error of the mean:

#          Mean     SD Naive SE Time-series SE
# mu    404.300 0.4699 0.001486       0.001901
# nu      3.630 1.1633 0.003679       0.007588
# sigma   3.862 0.4372 0.001383       0.002415
# y.new 404.325 6.4527 0.020405       0.020405

# 2. Quantiles for each variable:

#          2.5%     25%     50%     75%   97.5%
# mu    403.374 403.982 404.300 404.617 405.217
# nu      2.141   2.805   3.390   4.180   6.552
# sigma   3.073   3.557   3.836   4.142   4.789
# y.new 392.547 401.380 404.326 407.235 416.103

autocorr.plot( jags.parsamples )

autocorr.plot( jags.singlesample )