##### GOFMC toy problem. X is bivariate normal with mean vector mu <- c(1, 1) ##### and covariance matrix V <- matrix(c(1, 0.5, 0.5, 1), 2, 2) ##### What is the probability that X lies in the first quadrant? ##### Note (as with many toy MC problems) can do by numerical integration detV <- prod(eigen(V, symmetric = TRUE, only.values = TRUE)$values) const <- 1 / (2 * pi * sqrt(detV)) invV <- solve(V) dens <- function(x) { stopifnot(is.numeric(x)) stopifnot(length(x) == 2) delta <- x - mu const * exp(- as.numeric(t(delta) %*% invV %*% delta) / 2) } ##### check library(adapt) adapt(2, lower = c(-10, -10), upper = c(10, 10), functn = dens) adapt(2, lower = c(-20, -20), upper = c(20, 20), functn = dens) ##### pretty sloppy! (by default) adapt(2, lower = c(-10, -10), upper = c(10, 10), functn = dens, eps = 1e-5) adapt(2, lower = c(-20, -20), upper = c(20, 20), functn = dens, eps = 1e-5) ##### much better! (know and use the optional arguments) true.answer <- adapt(2, lower = c(0, 0), upper = c(10, 10), functn = dens, eps = 1e-5)$value print(true.answer) ##### to about 5 sig. figs. ##### now for MCMC library(MASS) set.seed(42) nboot <- 1000 x <- mvrnorm(nboot, mu = mu, Sigma = V) mc.answer <- mean(apply(x > 0, 1, prod)) print(mc.answer) ##### MC standard error (if you don't do this, you're just fooling around) mc.error <- sqrt(mc.answer * (1 - mc.answer) / nboot) (mc.answer - true.answer) / mc.error ##### closer than we expect to be on average, try more nfoo <- 30 z <- as.double(nfoo) for (i in 1:nfoo) { x <- mvrnorm(nboot, mu = mu, Sigma = V) mc.answer <- mean(apply(x > 0, 1, prod)) mc.error <- sqrt(mc.answer * (1 - mc.answer) / nboot) z[i] <- (mc.answer - true.answer) / mc.error } stem(z) ##### Looks pretty standard normal to me. ##### COMMENTS ##### ## ## 1. Always do (at least some) MC standard errors ## ## 2. The way we did here only works for probabilities where the sum ## of indicator random variables is binomial. In general you have ## to calculate as in "z" procedures. ## ## 3. The calculation the dumb way (the nfoo for loop) is almost never ## necessary. We're statisticians. We know n p (1 - p) and the CLT ## and don't have to calculate them by simulation. In general, we know ## we can plug in the sample standard deviation for the population ## standard deviation and use the CLT. ## ## In short: we can use INTERNAL (one run) rather than EXTERNAL (multi-run, ## a. k. a., bootstrap) estimates of standard error. ## ## This "in short" will be a constant theme throughout the course. ## ##### END COMMENTS #####