####### rjags code to fit the Gamma model to the income data ####### # unbuffer the output # install rjags if you haven't already done so library( rjags ) # change directory to the place where the income data file is setwd( 'C:/DD/Teaching/AMS-206/Winter-2018/Income-Data-Analysis' ) y <- scan( 'U.S.-family-income-2009.txt' ) income.data <- list( y = y, n = length( y ) ) income.gamma.initial.values <- list( log.alpha = 0.0, log.beta = 0.0, y.new = mean( y ) ) A <- 2 income.gamma.run.1 <- jags.model( file = 'income-gamma-model-rjags.txt', data = income.data, inits = income.gamma.initial.values, n.chains = A ) # choose your own random number seed, different from # my choice of 2718282, in the next line of code set.seed( 2718282 ) n.burnin <- 1000 system.time( update( income.gamma.run.1, n.iter = n.burnin ) ) # to save clock time you may want to make the number # of monitoring iterations smaller, but i recommend # no smaller than 20,000 n.monitor <- 100000 system.time( income.gamma.run.1.results <- jags.samples( income.gamma.run.1, variable.names = c( 'alpha', 'beta', 'theta', 'y.new' ), n.iter = n.monitor ) ) alpha.star <- income.gamma.run.1.results$alpha beta.star <- income.gamma.run.1.results$beta theta.star <- income.gamma.run.1.results$theta y.new.star <- income.gamma.run.1.results$y.new n.monitor <- A * n.monitor ######################################################################## par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, alpha.star, type = 'l', xlab = 'Iteration Number', ylab = 'alpha' ) plot( density( alpha.star ), xlab = 'alpha', main = '' ) acf( as.ts( alpha.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( alpha.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) c( mean( alpha.star ), sd( alpha.star ), quantile( alpha.star, probs = c( 0.025, 0.975 ) ) ) print( rho.1.hat.alpha.star <- cor( alpha.star[ 1:( n.monitor - 1 ) ], alpha.star[ 2:n.monitor ] ) ) print( se.alpha.star.mean <- ( sd( alpha.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.alpha.star ) / ( 1 - rho.1.hat.alpha.star ) ) ) par( mfrow = c( 1, 1 ) ) ######################################################################## par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, beta.star, type = 'l', xlab = 'Iteration Number', ylab = 'beta' ) plot( density( beta.star ), xlab = 'beta', main = '' ) acf( as.ts( beta.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( beta.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) c( mean( beta.star ), sd( beta.star ), quantile( beta.star, probs = c( 0.025, 0.975 ) ) ) print( rho.1.hat.beta.star <- cor( beta.star[ 1:( n.monitor - 1 ) ], beta.star[ 2:n.monitor ] ) ) print( se.beta.star.mean <- ( sd( beta.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.beta.star ) / ( 1 - rho.1.hat.beta.star ) ) ) par( mfrow = c( 1, 1 ) ) ######################################################################## par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, theta.star, type = 'l', xlab = 'Iteration Number', ylab = 'theta' ) plot( density( theta.star ), xlab = 'theta', main = '' ) acf( as.ts( theta.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( theta.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) c( mean( theta.star ), sd( theta.star ), quantile( theta.star, probs = c( 0.025, 0.975 ) ) ) print( rho.1.hat.theta.star <- cor( theta.star[ 1:( n.monitor - 1 ) ], theta.star[ 2:n.monitor ] ) ) print( se.theta.star.mean <- ( sd( theta.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.theta.star ) / ( 1 - rho.1.hat.theta.star ) ) ) par( mfrow = c( 1, 1 ) ) ######################################################################## par( mfrow = c( 2, 2 ) ) plot( 1:n.monitor, y.new.star, type = 'l', xlab = 'Iteration Number', ylab = 'y.new' ) plot( density( y.new.star ), xlab = 'y.new', main = '' ) acf( as.ts( y.new.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) pacf( as.ts( y.new.star[ 1:n.monitor ], start = 1, end = n.monitor, frequency = 1 ), lag.max = 10, main = '' ) par( mfrow = c( 1, 1 ) ) quantile( income.data$y, probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) ) quantile( y.new.star, probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) ) qqplot( income.data$y, y.new.star, xlab = 'income data', ylab = 'Predictive distribution from Gamma model' ) abline( 0, 1, lwd = 2, col = 'red' ) c( mean( y.new.star ), sd( y.new.star ), quantile( y.new.star, probs = c( 0.025, 0.975 ) ) ) print( rho.1.hat.y.new.star <- cor( y.new.star[ 1:( n.monitor - 1 ) ], y.new.star[ 2:n.monitor ] ) ) print( se.y.new.star.mean <- ( sd( y.new.star ) / sqrt( n.monitor ) ) * sqrt( ( 1 + rho.1.hat.y.new.star ) / ( 1 - rho.1.hat.y.new.star ) ) ) c( mean( income.data$y ), sd( income.data$y ) ) c( mean( y.new.star ), sd( y.new.star ) ) ######################################################################## # to save clock time you may want to make n.iter smaller # in the code below, but i recommend no smaller than 20,000 system.time( gamma.dic.summary <- dic.samples( income.gamma.run.1, n.iter = 100000, type = 'pD' ) ) print( gamma.mean.deviance <- sum( gamma.dic.summary$deviance ) ) print( gamma.complexity <- sum( gamma.dic.summary$penalty ) ) print( gamma.dic <- gamma.mean.deviance + gamma.complexity ) ########################################################################