# unbuffer the output

# change directory to the location of 'U.S.'family-income-2009.txt'

y <- scan( 'U.S.-family-income-2009.txt' )

print( n <- length( y ) )

# [1] 842

hist( y, breaks = 100, prob = T, xlab = 'Family Income',
  main = '' )

print( sample.median <- median( y ) )

# [1] 60.2555

m <- 100000

theta.star <- rep( NA, m )

set.seed( 1 )

system.time( {

  for ( i in 1:m ) {

    y.star <- sample( y, n, replace = T )
  
    theta.star[ i ] <- median( y.star )

    if ( ( i %% 10000 ) == 0 ) print( i )

  }

} 

)

#   user  system elapsed 
#   8.11    0.02    8.13

hist( theta.star, breaks = 100, prob = T,
  xlab = 'population median', main = '' )

# this is an approximation to the posterior distribution
# for the population median given the data, using a
# bayesian nonparametric model

plot( density( theta.star ), xlab = 'population median', 
  main = '' )

plot( density( theta.star, adjust = 1.5 ), 
  xlab = 'population median', main = '' )

plot( density( theta.star, adjust = 2.0 ), 
  xlab = 'population median', main = '' )

hist( theta.star, breaks = 50, prob = T,
  xlab = 'population median', main = '' )

print( theta.hat.bootstrap <- mean( theta.star ) )

# [1] 60.60706

# this is the bootstrap (frequentist) estimate
# of the population median, and it's also an approximation
# to the posterior mean under the bayesian nonparametric model

print( sample.median.bias.estimate <- 
  theta.hat.bootstrap - sample.median )

# [1] 0.3515582

# from the frequentist point of view, the sample median 
# has a small amount of bias in this problem

print( se.theta.hat.bootstrap <- sd( theta.star ) )

# [1] 2.36691

# this is the bootstrap (frequentist) standard error
# for the bootstrap estimate of the population median,
# and it's also an approximation to the posterior sd
# under the bayesian nonparametric model

print( theta.hat.bootstrap.95.percent.interval <-
  quantile( theta.star, probs = c( 0.025, 0.975 ) ) )

#   2.5%  97.5% 
# 56.022 65.272

# this is the bootstrap (frequentist) 95% confidence interval
# for the population median, and it's also an approximation
# to the central 95% posterior interval under the 
# bayesian nonparametric model

hist( theta.star, breaks = 50, prob = T,
  xlab = 'population median', main = '' )

abline( v = theta.hat.bootstrap, lwd = 2, col = 'red' )

segments( theta.hat.bootstrap.95.percent.interval[ 1 ], 0,
  theta.hat.bootstrap.95.percent.interval[ 1 ], 0.075, lwd = 2,
  col = 'red' )

segments( theta.hat.bootstrap.95.percent.interval[ 2 ], 0,
  theta.hat.bootstrap.95.percent.interval[ 2 ], 0.075, lwd = 2,
  col = 'red' )

par( mfrow = c( 2, 1 ) )

hist( y, breaks = 100, prob = T, xlab = 'Family Income',
  main = '', xlim = c( 0, 1000 ) )

hist( theta.star, breaks = 50, prob = T,
  xlab = 'population median family income', 
  main = '', xlim = c( 0, 1000 ) )

par( mfrow = c( 1, 1 ) )