R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree" Copyright (C) 2015 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > # fit a multinomial that requires numerical optimization > > # alleles are A and B and O > # possible genotypes are AA, AB, AO, BB, BO, OO > # but cannot observe the difference between AA and AO or between BB and BO > # if allele frequencies of A, B, and O are alpha, beta, and gamma, > # respectively, then we have the following probability distribution > # > # type A type B type O type AB > # alpha^2 + 2 alpha gamma beta^2 + 2 beta gamma gamma^2 2 alpha beta > # > # but allele frequencies have to add to one so gamma = 1 - alpha - beta > # > # log likelihood is > > foo <- expression(nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) + + nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) + + nO * log((1 - alpha - beta)^2) + + nAB * log(2 * alpha * beta)) > doo <- deriv(foo, c("alpha", "beta")) > doo expression({ .expr2 <- 2 * alpha .expr4 <- 1 - alpha - beta .expr6 <- alpha^2 + .expr2 * .expr4 .expr10 <- 2 * beta .expr12 <- beta^2 + .expr10 * .expr4 .expr16 <- .expr4^2 .expr20 <- .expr2 * beta .expr24 <- 2 * .expr4 .expr33 <- nO * (.expr24/.expr16) .value <- nA * log(.expr6) + nB * log(.expr12) + nO * log(.expr16) + nAB * log(.expr20) .grad <- array(0, c(length(.value), 2L), list(NULL, c("alpha", "beta"))) .grad[, "alpha"] <- nA * ((.expr2 + (.expr24 - .expr2))/.expr6) - nB * (.expr10/.expr12) - .expr33 + nAB * (.expr10/.expr20) .grad[, "beta"] <- nB * ((.expr10 + (.expr24 - .expr10))/.expr12) - nA * (.expr2/.expr6) - .expr33 + nAB * (.expr2/.expr20) attr(.value, "gradient") <- .grad .value }) > > # not very easy to read, switch to Mathematica > > # logl = nA Log[alpha^2 + 2 alpha (1 - alpha - beta)] + > # nB Log[beta^2 + 2 beta (1 - alpha - beta)] + > # nO Log[(1 - alpha - beta)^2] + > # nAB Log[2 alpha beta] > # dalpha = D[logl, alpha] > # dbeta = D[logl, beta] > # Solve[ {dalpha == 0, dbeta == 0}, {alpha, beta} ] > # > # Mathematica does not find solution in reasonable amount of time > > foo <- expression(- (nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) + + nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) + + nO * log((1 - alpha - beta)^2) + + nAB * log(2 * alpha * beta))) > doo <- deriv(foo, c("alpha", "beta")) > mlogl <- function(theta, x) { + stopifnot(length(theta) == 2) + stopifnot(is.numeric(theta)) + stopifnot(is.finite(theta)) + alpha <- theta[1] + beta <- theta[2] + stopifnot(length(x) == 4) + stopifnot(is.numeric(x)) + stopifnot(is.finite(x)) + stopifnot(round(x) == x) + stopifnot(x >= 0) + nA <- x[1] + nB <- x[2] + nO <- x[3] + nAB <- x[4] + return(eval(doo)) + } > > # Some data found on web attributed to > # Clarke et al. (1959), British Med J, 1, 603-607 > # nA = 186, nB = 38, nAB = 36, nO = 284 > > x <- c(186, 38, 284, 36) > nout <- nlm(mlogl, c(1/3, 1/3), hessian = TRUE, x = x) There were 12 warnings (use warnings() to see them) > # since there were warnings, check that nlm did find a solution > nout <- nlm(mlogl, nout$estimate, hessian = TRUE, x = x) > nout $minimum [1] 595.1615 $estimate [1] 0.22790672 0.06966485 $gradient [1] -3.236736e-06 2.929300e-04 $hessian [,1] [,2] [1,] 5561.511 1326.037 [2,] 1326.037 16652.171 $code [1] 1 $iterations [1] 0 > > # now we are (finally!) ready to use the material in Section 1.5.5 in Agresti > > # chisq test > alpha.hat <- nout$estimate[1] > beta.hat <- nout$estimate[2] > gamma.hat <- 1 - alpha.hat - beta.hat > > p.hat <- c(alpha.hat^2 + 2 * alpha.hat * gamma.hat, + beta.hat^2 + 2 * beta.hat * gamma.hat, + gamma.hat^2, + 2 * alpha.hat * beta.hat) > ex <- sum(x) * p.hat > x [1] 186 38 284 36 > ex [1] 202.43208 55.88095 268.41270 17.27427 > chisq.stat <- sum((x - ex)^2 / ex) > lr.stat <- sum(2 * (x * log(x / sum(x)) - x * log(p.hat))) > chisq.stat [1] 28.25978 > lr.stat [1] 24.13129 > pchisq(chisq.stat, df = 1, lower.tail = FALSE) [1] 1.060775e-07 > pchisq(lr.stat, df = 1, lower.tail = FALSE) [1] 8.998616e-07 > > > proc.time() user system elapsed 0.119 0.015 0.195