# data from Kihlberg, Narragon, and Campbell (1964) used in Fienberg (1980, # The Analysis of Cross-Classified Categorical Data, MIT Press), Table 5-9. foo <- read.table("auto.txt", header = TRUE) names(foo) foo out1 <- glm(count ~ severity + type + ejected + weight, family = poisson, data = foo) summary(out1) out2 <- glm(count ~ (severity + type + ejected + weight)^2, family = poisson, data = foo) summary(out2) out3 <- glm(count ~ (severity + type + ejected + weight)^3, family = poisson, data = foo) summary(out3) anova(out1, out2, out3, test = "Chisq") anova(out1, out2, out3, test = "LRT") # so 3-way interactions are unnecessary, now how about 2-way? # from summary(out2) it seems that all 2-way interactions are statistically # significant except for ejected * weight, so our final model is out4 <- glm(count ~ (severity + type + ejected + weight)^2 - ejected : weight, family = poisson, data = foo) summary(out4) # now try loglin bar <- xtabs(count ~ severity + type + ejected + weight, data = foo) bar lout1 <- loglin(bar, list(1, 2, 3, 4)) lout2 <- loglin(bar, list(c(1, 2), c(1, 3), c(1, 4), c(2, 3), c(2, 4), c(3, 4))) lout3 <- loglin(bar, list(c(1, 2, 3), c(1, 2, 4), c(1, 3, 4), c(2, 3, 4))) names(lout1) lout1$lrt lout2$lrt lout3$lrt # appears these are differences from the saturated model lout1$df lout2$df lout3$df # ditto here # LRT qux <- matrix(NA, 3, 5) qux[1, 1] <- lout1$df qux[2, 1] <- lout2$df qux[3, 1] <- lout3$df qux[1, 2] <- lout1$lrt qux[2, 2] <- lout2$lrt qux[3, 2] <- lout3$lrt qux[2:3, 3] <- qux[1:2, 1] - qux[2:3, 1] qux[2:3, 4] <- qux[1:2, 2] - qux[2:3, 2] qux[2:3, 5] <- pchisq(qux[2:3, 4], df = qux[2:3, 3], lower.tail = FALSE) colnames(qux) <- c("Resid. Df", "Resid. Dev", "Df", "Deviance", "Pr(>Chi)") rownames(qux) <- 1:3 class(qux) <- "anova" print(qux) anova(out1, out2, out3, test = "LRT") # Pearson Chi-square quux <- matrix(NA, 3, 5) quux[1, 1] <- lout1$df quux[2, 1] <- lout2$df quux[3, 1] <- lout3$df quux[1, 2] <- lout1$pearson quux[2, 2] <- lout2$pearson quux[3, 2] <- lout3$pearson quux[2:3, 3] <- quux[1:2, 1] - quux[2:3, 1] quux[2:3, 4] <- quux[1:2, 2] - quux[2:3, 2] quux[2:3, 5] <- pchisq(quux[2:3, 4], df = quux[2:3, 3], lower.tail = FALSE) colnames(quux) <- c("Resid. Df", "Resid. Dev", "Df", "Deviance", "Pr(>Chi)") rownames(quux) <- 1:3 class(quux) <- "anova" print(quux) anova(out1, out2, out3, test = "Chisq") # Clearly, in the latter "Chisq" means "LRT". How quaint! # The help for loglin says the loglm function in the MASS package # is more user-friendly. Let's try it. library(MASS) mout1 <- loglm(count ~ severity + type + ejected + weight, data = foo) summary(mout1) tout1 <- loglm( ~ severity + type + ejected + weight, data = bar) summary(tout1) ##### seems to work either way !!!!! mout2 <- loglm(count ~ (severity + type + ejected + weight)^2, data = foo) mout3 <- loglm(count ~ (severity + type + ejected + weight)^3, data = foo) tout2 <- loglm( ~ (severity + type + ejected + weight)^2, data = bar) tout3 <- loglm( ~ (severity + type + ejected + weight)^3, data = bar) #### tests anova(tout1, tout2, tout3, test = "LR") print(qux) anova(tout1, tout2, tout3, test = "Chisq") print(quux) ##### a bunch of not so user-friendly stuff about loglm ##### ##### * no help page for anova.loglm (although function exists) ##### * summary.loglm leaves a lot to be desired ##### * both loglm and glm heavily biased against Pearson test statistic ##### If you want it, you have to do it yourself using loglin ##### (Well, not quite. We saw that loglm calculates the Pearson ##### statistic. It's anova.loglm that does not use it no matter ##### what arguments are specified.) ### the book Graphical Models with R by Hojsgaard, Edwards, and Lauritzen ### recommends the function dmod in the R package gRim (this package is ### a pain to install, since one must install the Bioconductor package ### RGBL and the R function install.packages doesn't handle that library(gRim) gout1 <- dmod( ~ severity + type + ejected + weight, data = bar) #### according for help for dmod can also do gout1 <- dmod(~.^., interactions = 1, data = bar) gout1 gout2 <- dmod(~.^., interactions = 2, data = bar) gout2 gout3 <- dmod(~.^., interactions = 3, data = bar) gout3 # there is no anova function in this package # how annoying # have to do LRT by hand comparemodels <- function(...) { foo <- list(...) bar <- sapply(foo, function(x) inherits(x, "dModel")) if (! all(bar)) stop("all arguments must be objects of class \"dModel\"") if (length(foo) < 2) stop("must have at least 2 arguments") dev <- sapply(foo, function(x) x$fitinfo$dev) df <- sapply(foo, function(x) x$fitinfo$dimension[1]) if (! all(diff(df) > 0)) stop("dimensions of models not increasing; models cannot be nested") qux <- cbind(df, dev) qux <- cbind(qux, c(NA, diff(df))) qux <- cbind(qux, c(NA, - diff(dev))) qux <- cbind(qux, pchisq(qux[ , 4], df = qux[ , 3], lower.tail = FALSE)) colnames(qux) <- c("Resid. Df", "Resid. Dev", "Df", "Deviance", "Pr(>Chi)") rownames(qux) <- 1:nrow(qux) class(qux) <- "anova" return(qux) } comparemodels(gout1, gout2, gout3) # as before we find no point in 3-way interactions # no how about graphical models ? bout <- backward(gout2) bout bout$fitinfo$margin fout <- forward(gout1) fout fout$fitinfo$margin #### clearly the forward function disagrees with our analysis up to this point gout4 <- dmod(~.^., data = bar) gout4 sout <- stepwise(gout4) sout sout$fitinfo$margin #### same as the model found by the forward function #### hopefully, this is the best model by AIC #### apparently does not have an all decomposible models function soutu <- stepwise(gout4, type = "unrestricted") soutu soutu$fitinfo$margin #### same model again #### according to this analysis, the best hierarchical model is decomposible