R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" Copyright (C) 2015 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > pdf(file = "mcmc.pdf") > > # set seed for reproducibility, remove to get different monte carlo > > set.seed(42) > > # Some data found on web attributed to > # Clarke et al. (1959), British Med J, 1, 603-607 > # nA = 186, nB = 38, nAB = 36, nO = 284 > > nA <- 186 > nB <- 38 > nAB <- 36 > nO <- 284 > > # mcmc > > logl <- function(theta) { + stopifnot(length(theta) == 2) + stopifnot(is.numeric(theta)) + stopifnot(is.finite(theta)) + alpha <- theta[1] + beta <- theta[2] + gamma <- 1 - alpha - beta + if (alpha <= 0 | beta <= 0 | gamma <= 0) return(-Inf) + return(nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) + + nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) + + nO * log((1 - alpha - beta)^2) + + nAB * log(2 * alpha * beta)) + } > > # use flat priors so log likelihood is unnormalized log posterior > > library(mcmc) > mout <- metrop(logl, c(1/3, 1/3), nbatch = 1e4) > mout$accept [1] 8e-04 > mout <- metrop(mout, scale = 1e-1) > mout$accept [1] 0.0159 > mout <- metrop(mout, scale = 1e-2) > mout$accept [1] 0.538 > mout <- metrop(mout, scale = 2e-2) > mout$accept [1] 0.287 > > # try a few plots > plot(ts(mout$batch[ , 1]), ylab = expression(alpha)) > plot(ts(mout$batch[ , 2]), ylab = expression(beta)) > plot(ts(1 - rowSums(mout$batch)), ylab = expression(gamma)) > > dim(mout$batch) [1] 10000 2 > acf(mout$batch) > # looks like significant autocorrelation exists out to lag 25 or so > # try batch length 50 > > mout <- metrop(mout, nbatch = 1e2, blen = 50) > mout$accept [1] 0.2882 > acf(mout$batch) > # looks good. We don't do formal test. > > # add outfun so we can calculate posterior variances as well as means > mout <- metrop(mout, + outfun = function(x) c(x, 1 - sum(x), x^2, (1 - sum(x))^2)) > # components of value of output function are > # (alpha, beta, gamma, alpha^2, beta^2, gamma^2) > mout$accept [1] 0.3006 > > b1 <- mout$batch[ , 1:3] > b2 <- mout$batch[ , 4:6] > pmean <- colMeans(b1) # MCMC estimates of posterior means > pvar <- colMeans(b2) - pmean^2 # MCMC estimates of posterior variances > psd <- sqrt(pvar) # MCMC estimates of posterior std. dev. > pmean [1] 0.22862303 0.07029313 0.70108384 > psd [1] 0.014116041 0.007977972 0.015257848 > > # another check for autocorrelation > omcsd <- apply(mout$batch, 2, sd) > imcsd <- apply(mout$batch, 2, function(x) sqrt(initseq(x)$var.pos)) > omcsd [1] 0.0067469026 0.0027422798 0.0070560103 0.0030941631 0.0003895906 [6] 0.0099118390 > imcsd [1] 0.0073720209 0.0029917301 0.0077117753 0.0033516190 0.0004225101 [6] 0.0108426421 > # about the same, no appreciable autocorrelation in batch means > # but it is clear that imcsd are higher so batches of length 50 > # do underestimate Monte Carlo variance, and we should use longer > # try again > > mout <- metrop(mout, blen = 200) > mout$accept [1] 0.2872 > omcsd <- apply(mout$batch, 2, sd) > imcsd <- apply(mout$batch, 2, function(x) sqrt(initseq(x)$var.pos)) > imcsd / omcsd [1] 1.167354 1.406509 1.090541 1.183624 1.391522 1.089239 > > # wow, this test is a lot more sensitive than acf plots, try again > mout <- metrop(mout, blen = 500, nbatch = 200) > mout$accept [1] 0.28904 > omcsd <- apply(mout$batch, 2, sd) > imcsd <- apply(mout$batch, 2, function(x) sqrt(initseq(x)$var.pos)) > imcsd / omcsd [1] 1.0832488 0.9003141 0.9972361 1.0788968 0.8897371 0.9978378 > # that looks OK, all near one > > pmean.mcse <- apply(b1, 2, sd) / sqrt(mout$nbatch) > foo <- cbind(pmean, pmean.mcse) > colnames(foo) <- c("posterior mean", "mcse") > rownames(foo) <- c("alpha", "beta", "gamma") > poo <- round(foo, 6) > poo <- formatC(poo, digits = 6, format = "f") > print(poo, quote = FALSE) posterior mean mcse alpha 0.228623 0.000477 beta 0.070293 0.000194 gamma 0.701084 0.000499 > > # mcse for posterior standard deviations is more complicated > # code copied from mcmc package vignette (demo.pdf) without explanation > u <- b1 > v <- b2 > ubar <- apply(u, 2, mean) > vbar <- apply(v, 2, mean) > deltau <- sweep(u, 2, ubar) > deltav <- sweep(v, 2, vbar) > foo <- sweep(deltau, 2, ubar, "*") > sigmasq.mcse <- sqrt(apply((deltav - 2 * foo)^2, 2, mean) / mout$nbatch) > sigmasq.mcse [1] 1.019655e-05 1.924410e-06 1.160319e-05 > sigma <- psd > sigma.mcse <- sigmasq.mcse / (2 * sigma) > sigma.mcse [1] 0.0003611687 0.0001206077 0.0003802367 > > foo <- cbind(psd, sigma.mcse) > colnames(foo) <- c("posterior sd", "mcse") > rownames(foo) <- c("alpha", "beta", "gamma") > poo <- round(foo, 6) > poo <- formatC(poo, digits = 6, format = "f") > print(poo, quote = FALSE) posterior sd mcse alpha 0.014116 0.000361 beta 0.007978 0.000121 gamma 0.015258 0.000380 > > proc.time() user system elapsed 3.773 0.023 3.933