# fit a multinomial that requires numerical optimization

# alleles are A and B and O
# possible genotypes are AA, AB, AO, BB, BO, OO
# but cannot observe the difference between AA and AO or between BB and BO
# if allele frequencies of A, B, and O are alpha, beta, and gamma,
# respectively, then we have the following probability distribution
#
# type A                     type B                   type O     type AB
# alpha^2 + 2 alpha gamma    beta^2 + 2 beta gamma    gamma^2    2 alpha beta
#
# but allele frequencies have to add to one so gamma = 1 - alpha - beta
#
# log likelihood is

foo <- expression(nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) +
    nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) +
    nO * log((1 - alpha - beta)^2) +
    nAB * log(2 * alpha * beta))
doo <- deriv(foo, c("alpha", "beta"))
doo

# not very easy to read, switch to Mathematica

# logl = nA Log[alpha^2 + 2 alpha (1 - alpha - beta)] +
#        nB Log[beta^2 + 2 beta (1 - alpha - beta)] +
#        nO Log[(1 - alpha - beta)^2] +
#        nAB Log[2 alpha beta]
# dalpha = D[logl, alpha]
# dbeta = D[logl, beta]
# Solve[ {dalpha == 0, dbeta == 0}, {alpha, beta} ]
#
# Mathematica does not find solution in reasonable amount of time

foo <- expression(- (nA * log(alpha^2 + 2 * alpha * (1 - alpha - beta)) +
    nB * log(beta^2 + 2 * beta * (1 - alpha - beta)) +
    nO * log((1 - alpha - beta)^2) +
    nAB * log(2 * alpha * beta)))
doo <- deriv(foo, c("alpha", "beta"))
mlogl <- function(theta, x) {
    stopifnot(length(theta) == 2)
    stopifnot(is.numeric(theta))
    stopifnot(is.finite(theta))
    alpha <- theta[1]
    beta <- theta[2]
    stopifnot(length(x) == 4)
    stopifnot(is.numeric(x))
    stopifnot(is.finite(x))
    stopifnot(round(x) == x)
    stopifnot(x >= 0)
    nA <- x[1]
    nB <- x[2]
    nO <- x[3]
    nAB <- x[4]
    return(eval(doo))
}

# Some data found on web attributed to
# Clarke et al. (1959), British Med J, 1, 603-607
# nA = 186, nB = 38, nAB = 36, nO = 284

x <- c(186, 38, 284, 36)
nout <- nlm(mlogl, c(1/3, 1/3), hessian = TRUE, x = x)
# since there were warnings, check that nlm did find a solution
nout <- nlm(mlogl, nout$estimate, hessian = TRUE, x = x)
nout

# now we are (finally!) ready to use the material in Section 1.5.5 in Agresti

# chisq test
alpha.hat <- nout$estimate[1]
beta.hat <- nout$estimate[2]
gamma.hat <- 1 - alpha.hat - beta.hat

p.hat <- c(alpha.hat^2 + 2 * alpha.hat * gamma.hat,
        beta.hat^2 + 2 * beta.hat * gamma.hat,
        gamma.hat^2,
        2 * alpha.hat * beta.hat)
ex <- sum(x) * p.hat
x
ex
chisq.stat <- sum((x - ex)^2 / ex)
lr.stat <- sum(2 * (x * log(x / sum(x)) - x * log(p.hat)))
chisq.stat
lr.stat
pchisq(chisq.stat, df = 1, lower.tail = FALSE)
pchisq(lr.stat, df = 1, lower.tail = FALSE)