# fit a multinomial that requires numerical optimization # alleles are A and B and O # possible genotypes are AA, AB, AO, BB, BO, OO # but cannot observe the difference between AA and AO or between BB and BO # if allele frequencies of A, B, and O are alpha, beta, and gamma, # respectively, then we have the following probability distribution # # type A type B type O type AB # alpha^2 + 2 alpha gamma beta^2 + 2 beta gamma gamma^2 2 alpha beta # # but allele frequencies have to add to one so gamma = 1 - alpha - beta # # log likelihood is foo <- expression(nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) + nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) + nO * log((1 - alpha - beta)^2) + nAB * log(2 * alpha * beta)) doo <- deriv(foo, c("alpha", "beta")) doo # not very easy to read, switch to Mathematica # logl = nA Log[alpha^2 + 2 alpha (1 - alpha - beta)] + # nB Log[beta^2 + 2 beta (1 - alpha - beta)] + # nO Log[(1 - alpha - beta)^2] + # nAB Log[2 alpha beta] # dalpha = D[logl, alpha] # dbeta = D[logl, beta] # Solve[ {dalpha == 0, dbeta == 0}, {alpha, beta} ] # # Mathematica does not find solution in reasonable amount of time foo <- expression(- (nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) + nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) + nO * log((1 - alpha - beta)^2) + nAB * log(2 * alpha * beta))) doo <- deriv(foo, c("alpha", "beta")) mlogl <- function(theta, x) { stopifnot(length(theta) == 2) stopifnot(is.numeric(theta)) stopifnot(is.finite(theta)) alpha <- theta[1] beta <- theta[2] stopifnot(length(x) == 4) stopifnot(is.numeric(x)) stopifnot(is.finite(x)) stopifnot(round(x) == x) stopifnot(x >= 0) nA <- x[1] nB <- x[2] nO <- x[3] nAB <- x[4] return(eval(doo)) } # Some data found on web attributed to # Clarke et al. (1959), British Med J, 1, 603-607 # nA = 186, nB = 38, nAB = 36, nO = 284 x <- c(186, 38, 284, 36) nout <- nlm(mlogl, c(1/3, 1/3), hessian = TRUE, x = x) # since there were warnings, check that nlm did find a solution nout <- nlm(mlogl, nout$estimate, hessian = TRUE, x = x) nout # now we are (finally!) ready to use the material in Section 1.5.5 in Agresti # chisq test alpha.hat <- nout$estimate[1] beta.hat <- nout$estimate[2] gamma.hat <- 1 - alpha.hat - beta.hat p.hat <- c(alpha.hat^2 + 2 * alpha.hat * gamma.hat, beta.hat^2 + 2 * beta.hat * gamma.hat, gamma.hat^2, 2 * alpha.hat * beta.hat) ex <- sum(x) * p.hat x ex chisq.stat <- sum((x - ex)^2 / ex) lr.stat <- sum(2 * (x * log(x / sum(x)) - x * log(p.hat))) chisq.stat lr.stat pchisq(chisq.stat, df = 1, lower.tail = FALSE) pchisq(lr.stat, df = 1, lower.tail = FALSE)