R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" Copyright (C) 2015 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > n <- 20 > conf.level <- 0.95 > > pdf(file = "coverage.pdf") > > # We examine the exact performance (plots coverage as a function of the > # parameter value) for various confidence intervals for the binomial > # distribution. These intervals are derived in the Stat 5102 course slides > # (Deck 2, slides 92-93 and 112-121). > # The coverage probability considered as a function of the parameter can never > # be a constant function when the data have a discrete distribution. So even > # the "exact" confidence interval does not achieve exactly the nominal > # coverage for all parameter values but only at least the nominal > # coverage, so is perhaps better termed "conservative-exact". > > # "usual" Wald confidence interval > > cover <- function(p) { + stopifnot(is.numeric(p)) + stopifnot(length(p) == 1) + stopifnot(0 <= p & p <= 1) + x <- 0:n + fpx <- dbinom(x, n, p) + phat <- x / n + crit.val <- qnorm((1 + conf.level) / 2) + low <- phat - crit.val * sqrt(phat * (1 - phat) / n) + hig <- phat + crit.val * sqrt(phat * (1 - phat) / n) + inies <- as.numeric(low <= p & p <= hig) + sum(inies * fpx) + } > > p <- seq(0.001, 0.999, 0.001) > plot(p, vapply(p, cover, 0.5), type = "l", + ylab = "coverage probability") > abline(h = conf.level, lty = 2) > title(main = "Wald Confidence Interval") > > # score (Rao) interval > > cover <- function(p) { + stopifnot(is.numeric(p)) + stopifnot(length(p) == 1) + stopifnot(0 <= p & p <= 1) + x <- 0:n + fpx <- dbinom(x, n, p) + foo <- function(x) suppressWarnings(prop.test(x, n = n, + p = p, conf.level = conf.level, correct = FALSE)) + bar <- lapply(x, foo) + low <- sapply(bar, function(x) x$conf.int[1]) + hig <- sapply(bar, function(x) x$conf.int[2]) + inies <- as.numeric(low <= p & p <= hig) + sum(inies * fpx) + } > > p <- seq(0.001, 0.999, 0.001) > plot(p, vapply(p, cover, 0.5), type = "l", + ylab = "coverage probability") > abline(h = conf.level, lty = 2) > title(main = "Score (Rao or Wilson) Confidence Interval") > > # likelihood interval (invert Wilks test) > > cover <- function(p) { + stopifnot(is.numeric(p)) + stopifnot(length(p) == 1) + stopifnot(0 <= p & p <= 1) + x <- 0:n + fpx <- dbinom(x, n, p) + logl <- function(p, x, n) ifelse(x == 0, n * log(1 - p), + ifelse(x == n, n * log(p), x * log(p) + (n - x) * log(1 - p))) + tol <- sqrt(.Machine$double.eps) + foo <- function(x) { + crit <- qchisq(conf.level, df = 1) + fred <- function(p) 2 * (logl(phat, x, n) - logl(p, x, n)) - crit + phat <- x / n + if (phat == 0) { + low <- 0 + } else { + low <- uniroot(fred, lower = 0, upper = phat, tol = tol)$root + } + if (phat == 1) { + hig <- 1 + } else { + hig <- uniroot(fred, lower = phat, upper = 1, tol = tol)$root + } + c(low, hig) + } + bar <- lapply(x, foo) + low <- sapply(bar, function(x) x[1]) + hig <- sapply(bar, function(x) x[2]) + inies <- as.numeric(low <= p & p <= hig) + sum(inies * fpx) + } > > p <- seq(0.001, 0.999, 0.001) > plot(p, vapply(p, cover, 0.5), type = "l", + ylab = "coverage probability") > abline(h = conf.level, lty = 2) > title(main = "Likelihood Based (Wilks) Confidence Interval") > > # confidence interval using the variance stabilizing transformation > # (not any of Wald, Wilks, Rao) > # confidence interval using the variance stabilizing transformation > # (not any of Wald, Wilks, Rao) > > cover <- function(p) { + stopifnot(is.numeric(p)) + stopifnot(length(p) == 1) + stopifnot(0 <= p & p <= 1) + x <- 0:n + fpx <- dbinom(x, n, p) + phat <- x / n + thetahat <- asin(2 * phat - 1) + crit.val <- qnorm((1 + conf.level) / 2) + low <- (1 + sin(thetahat - crit.val / sqrt(n))) / 2 + hig <- (1 + sin(thetahat + crit.val / sqrt(n))) / 2 + inies <- as.numeric(low <= p & p <= hig) + sum(inies * fpx) + } > > p <- seq(0.001, 0.999, 0.001) > plot(p, vapply(p, cover, 0.5), type = "l", + ylab = "coverage probability") > abline(h = conf.level, lty = 2) > title(main = "Variance Stabilizing Transformation Confidence Interval") > > # Clopper-Pearson confidence interval (conservative-exact) > # (also not any of Wald, Wilks, Rao) > > cover <- function(p) { + stopifnot(is.numeric(p)) + stopifnot(length(p) == 1) + stopifnot(0 <= p & p <= 1) + x <- 0:n + fpx <- dbinom(x, n, p) + foo <- lapply(x, binom.test, n = n, p = p, + conf.level = conf.level) + low <- sapply(foo, function(x) x$conf.int[1]) + hig <- sapply(foo, function(x) x$conf.int[2]) + inies <- as.numeric(low <= p & p <= hig) + sum(inies * fpx) + } > > p <- seq(0.001, 0.999, 0.001) > plot(p, vapply(p, cover, 0.5), type = "l", + ylab = "coverage probability", ylim = c(conf.level, 1)) > abline(h = conf.level, lty = 2) > title(main = "Clopper-Pearson Confidence Interval") > > # modified usual (Wald) binomial confidence intervals > # Geyer (2009, Electronic Journal of Statistics, 3, 259-289) > # proposed a simple modification that fixes the behavior of binomial > # confidence intervals near zero and one (and also applies to other > # distributions, including logistic regression and log-linear models > # for categorical data analysis, more on this later). For the binomial > # distribution it says that when we observe zero successes, the confidence > # interval should be from zero to 1 - alpha^(1 / n), > # where coverage 1 - alpha is wanted and n is the sample size. > # And it says that when we observe all successes (n out of n), > # the confidence interval should be from alpha^(1 / n) to one. > > cover <- function(p) { + stopifnot(is.numeric(p)) + stopifnot(length(p) == 1) + stopifnot(0 <= p & p <= 1) + x <- 0:n + fpx <- dbinom(x, n, p) + phat <- x / n + crit.val <- qnorm((1 + conf.level) / 2) + low <- phat - crit.val * sqrt(phat * (1 - phat) / n) + hig <- phat + crit.val * sqrt(phat * (1 - phat) / n) + low[x == 0] <- 0 + hig[x == 0] <- 1 - (1 - conf.level)^(1 / n) + low[x == n] <- (1 - conf.level)^(1 / n) + hig[x == n] <- 1 + inies <- as.numeric(low <= p & p <= hig) + sum(inies * fpx) + } > > p <- seq(0.001, 0.999, 0.001) > plot(p, vapply(p, cover, 0.5), type = "l", + ylab = "coverage probability") > abline(h = conf.level, lty = 2) > title(main = "Modified Wald Confidence Interval") > > # same modification as in preceding section, but for variance stabilized > > cover <- function(p) { + stopifnot(is.numeric(p)) + stopifnot(length(p) == 1) + stopifnot(0 <= p & p <= 1) + x <- 0:n + fpx <- dbinom(x, n, p) + phat <- x / n + thetahat <- asin(2 * phat - 1) + crit.val <- qnorm((1 + conf.level) / 2) + low <- (1 + sin(thetahat - crit.val / sqrt(n))) / 2 + hig <- (1 + sin(thetahat + crit.val / sqrt(n))) / 2 + low[x == 0] <- 0 + hig[x == 0] <- 1 - (1 - conf.level)^(1 / n) + low[x == n] <- (1 - conf.level)^(1 / n) + hig[x == n] <- 1 + inies <- as.numeric(low <= p & p <= hig) + sum(inies * fpx) + } > > p <- seq(0.001, 0.999, 0.001) > plot(p, vapply(p, cover, 0.5), type = "l", + ylab = "coverage probability") > abline(h = conf.level, lty = 2) > title(main = "Modified Variance Stabilizing Transformation Confidence Interval") > > > proc.time() user system elapsed 11.885 0.019 11.994