# unbuffer the output # change directory to the location of 'U.S.'family-income-2009.txt' y <- scan( 'U.S.-family-income-2009.txt' ) print( n <- length( y ) ) # [1] 842 hist( y, breaks = 100, prob = T, xlab = 'Family Income', main = '' ) print( sample.median <- median( y ) ) # [1] 60.2555 m <- 100000 theta.star <- rep( NA, m ) set.seed( 1 ) system.time( { for ( i in 1:m ) { y.star <- sample( y, n, replace = T ) theta.star[ i ] <- median( y.star ) if ( ( i %% 10000 ) == 0 ) print( i ) } } ) # user system elapsed # 8.11 0.02 8.13 hist( theta.star, breaks = 100, prob = T, xlab = 'population median', main = '' ) # this is an approximation to the posterior distribution # for the population median given the data, using a # bayesian nonparametric model plot( density( theta.star ), xlab = 'population median', main = '' ) plot( density( theta.star, adjust = 1.5 ), xlab = 'population median', main = '' ) plot( density( theta.star, adjust = 2.0 ), xlab = 'population median', main = '' ) hist( theta.star, breaks = 50, prob = T, xlab = 'population median', main = '' ) print( theta.hat.bootstrap <- mean( theta.star ) ) # [1] 60.60706 # this is the bootstrap (frequentist) estimate # of the population median, and it's also an approximation # to the posterior mean under the bayesian nonparametric model print( sample.median.bias.estimate <- theta.hat.bootstrap - sample.median ) # [1] 0.3515582 # from the frequentist point of view, the sample median # has a small amount of bias in this problem print( se.theta.hat.bootstrap <- sd( theta.star ) ) # [1] 2.36691 # this is the bootstrap (frequentist) standard error # for the bootstrap estimate of the population median, # and it's also an approximation to the posterior sd # under the bayesian nonparametric model print( theta.hat.bootstrap.95.percent.interval <- quantile( theta.star, probs = c( 0.025, 0.975 ) ) ) # 2.5% 97.5% # 56.022 65.272 # this is the bootstrap (frequentist) 95% confidence interval # for the population median, and it's also an approximation # to the central 95% posterior interval under the # bayesian nonparametric model hist( theta.star, breaks = 50, prob = T, xlab = 'population median', main = '' ) abline( v = theta.hat.bootstrap, lwd = 2, col = 'red' ) segments( theta.hat.bootstrap.95.percent.interval[ 1 ], 0, theta.hat.bootstrap.95.percent.interval[ 1 ], 0.075, lwd = 2, col = 'red' ) segments( theta.hat.bootstrap.95.percent.interval[ 2 ], 0, theta.hat.bootstrap.95.percent.interval[ 2 ], 0.075, lwd = 2, col = 'red' ) par( mfrow = c( 2, 1 ) ) hist( y, breaks = 100, prob = T, xlab = 'Family Income', main = '', xlim = c( 0, 1000 ) ) hist( theta.star, breaks = 50, prob = T, xlab = 'population median family income', main = '', xlim = c( 0, 1000 ) ) par( mfrow = c( 1, 1 ) )