# set seed for reproducibility, remove to get different simulated data set.seed(42) # this example uses the same likelihood as the file multi.R # (the ABO blood group model) foo <- expression(- (nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) + nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) + nO * log((1 - alpha - beta)^2) + nAB * log(2 * alpha * beta))) doo <- deriv(foo, c("alpha", "beta")) mlogl <- function(theta, x) { stopifnot(length(theta) == 2) stopifnot(is.numeric(theta)) stopifnot(is.finite(theta)) alpha <- theta[1] beta <- theta[2] stopifnot(length(x) == 4) stopifnot(is.numeric(x)) stopifnot(is.finite(x)) stopifnot(round(x) == x) stopifnot(x >= 0) nA <- x[1] nB <- x[2] nO <- x[3] nAB <- x[4] return(eval(doo)) } x <- c(186, 38, 284, 36) nout <- nlm(mlogl, c(1/3, 1/3), hessian = TRUE, x = x) nout <- nlm(mlogl, nout$estimate, hessian = TRUE, x = x) alpha.hat <- nout$estimate[1] beta.hat <- nout$estimate[2] gamma.hat <- 1 - alpha.hat - beta.hat p.hat <- c(alpha.hat^2 + 2 * alpha.hat * gamma.hat, beta.hat^2 + 2 * beta.hat * gamma.hat, gamma.hat^2, 2 * alpha.hat * beta.hat) theta.simulation.truth <- c(alpha.hat, beta.hat, gamma.hat) x <- as.vector(rmultinom(1, 200, p.hat)) x nout <- nlm(mlogl, c(1/3, 1/3), hessian = TRUE, x = x) nout <- nlm(mlogl, nout$estimate, hessian = TRUE, x = x) nout$estimate alpha.hat <- nout$estimate[1] beta.hat <- nout$estimate[2] gamma.hat <- 1 - alpha.hat - beta.hat # Wald type standard errors from inverse observed Fisher information inv.fisher <- solve(nout$hessian) se.inv.fisher <- sqrt(diag(inv.fisher)) se.inv.fisher # to get standard error for gamma.hat we need to know # var(a + b X) = b^2 Var(X) for constants a and b # var(X + Y) = var(X) + var(Y) + 2 cov(X, Y) # var(X - Y) = var(X) + var(Y) - 2 cov(X, Y) # so var(1 - alpha.hat - beta.hat) = var(alpha.hat + beta.hat) # = var(alpha.hat) + var(beta.hat) + 2 cov(alpha.hat, beta.hat) se.gamma.hat <- sqrt(inv.fisher[1,1] + inv.fisher[2,2] + 2 * inv.fisher[1,2]) foo <- cbind(c(alpha.hat, beta.hat, gamma.hat), c(se.inv.fisher, se.gamma.hat)) rownames(foo) <- c("alpha", "beta", "gamma") colnames(foo) <- c("estimate", "std. err.") round(foo, 3) # and, of course, point estimate +/- 1.96 standard errors is an asymptotic # 95% confidence interval # let us check that the confidence intervals cover the simulation truth bar <- cbind(foo[,1] - qnorm(0.975) * foo[,2], foo[,1] + qnorm(0.975) * foo[,2]) rownames(bar) <- rownames(foo) colnames(bar) <- c("lower", "upper") round(bar, 3) all(bar[,1] < theta.simulation.truth & theta.simulation.truth < bar[,2]) # of course, we just got lucky. Each interval has a 5% chance of failing # to cover and these intervals were not Bonferroni corrected to get 95% # probability of simultaneous coverage so the probability that all 3 intervals # cover is lower than 95%, perhaps a lot lower (unknown and uncalculable # except by doing a simulation study)