R version 3.1.1 (2014-07-10) -- "Sock it to Me" Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > pdf(file = "glm-plots.pdf") > > set.seed(42) > > x <- 1:81 > z <- (x - mean(x)) / sd(x) > z <- z / max(z) * 3.0 > range(z) [1] -3 3 > fred <- function(x) ifelse(x > 0, 1 / (1 + exp(- x)), exp(x) / (1 + exp(x))) > moo <- fred(z) > > plot(x, moo, ylim = c(0, 1)) > title(main = "Simulation Truth Mean Values") > > y <- as.numeric(runif(length(x)) < moo) > plot(x, y) > title(main = "Simulated Data") > > # logistic regression > plot(x, y) > out <- glm(y ~ x, family = binomial) > curve(predict(out, type = "response", newdata = data.frame(x = x)), add = TRUE) > title(main = "Logistic Regression") > > # linear regression (justified by Gauss-Markov theorem) > plot(x, y) > out <- lm(y ~ x) > curve(predict(out, newdata = data.frame(x = x)), add = TRUE) > title(main = "Linear Regression") > > # three types of latent variable woof regression > cc <- c("black", "red", "darkseagreen") > plot(x, y) > out <- glm(y ~ x, family = binomial) > curve(predict(out, type = "response", newdata = data.frame(x = x)), + add = TRUE, col = cc[1]) > out <- glm(y ~ x, family = binomial(link = "probit")) > curve(predict(out, type = "response", newdata = data.frame(x = x)), + add = TRUE, col = cc[2]) > out <- glm(y ~ x, family = binomial(link = "cauchit")) > curve(predict(out, type = "response", newdata = data.frame(x = x)), + add = TRUE, col = cc[3]) > legend(0, 0.8, legend = c("logit", "probit", "cauchit"), + col = cc, lwd = 2) > title(main = "Various Latent Variable Distribution Regressions") > > plot(x, y) > out <- glm(y ~ x, family = binomial(link = "cloglog")) > curve(predict(out, type = "response", newdata = data.frame(x = x)), + add = TRUE) > title(main = "Complementary Log Log") > > plot(x, y) > out <- glm(y ~ x, family=quasi(variance="mu(1-mu)"), + start = c(0.5, 0)) There were 25 warnings (use warnings() to see them) > summary(out, dispersion=1) Call: glm(formula = y ~ x, family = quasi(variance = "mu(1-mu)"), start = c(0.5, 0)) Deviance Residuals: Min 1Q Median 3Q Max -1.69563 -0.80198 -0.00001 0.75853 1.76001 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -1.250e-02 7.266e-06 -1720 <2e-16 *** x 1.250e-02 1.957e-06 6388 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for quasi family taken to be 1) Null deviance: 112.277 on 80 degrees of freedom Residual deviance: 63.395 on 79 degrees of freedom AIC: NA Number of Fisher Scoring iterations: 24 > curve(predict(out, type = "response", newdata = data.frame(x = x), + dispersion = 1), add = TRUE) > title(main = "Quasi-Likelihood, Identity Link") > > plot(x, y) > out <- glm(y ~ x, family = binomial(link = "log"), start = c(-1, 0)) There were 28 warnings (use warnings() to see them) > warnings() Warning messages: 1: step size truncated due to divergence 2: step size truncated: out of bounds 3: step size truncated: out of bounds 4: step size truncated: out of bounds 5: step size truncated: out of bounds 6: step size truncated: out of bounds 7: step size truncated: out of bounds 8: step size truncated: out of bounds 9: step size truncated: out of bounds 10: step size truncated: out of bounds 11: step size truncated: out of bounds 12: step size truncated: out of bounds 13: step size truncated: out of bounds 14: step size truncated: out of bounds 15: step size truncated: out of bounds 16: step size truncated: out of bounds 17: step size truncated: out of bounds 18: step size truncated: out of bounds 19: step size truncated: out of bounds 20: step size truncated: out of bounds 21: step size truncated: out of bounds 22: step size truncated: out of bounds 23: step size truncated: out of bounds 24: step size truncated: out of bounds 25: step size truncated: out of bounds 26: step size truncated: out of bounds 27: glm.fit: algorithm did not converge 28: glm.fit: algorithm stopped at boundary value > curve(predict(out, type = "response", newdata = data.frame(x = x)), + add = TRUE) > title(main = "Log") > > # all links > okLinks <- c("logit", "probit", "cloglog", "cauchit", "log") > cc <- c("black", "red", "darkgreen", "blue", "magenta") > plot(x, y) > for (i in seq(along = okLinks)) { + out <- glm(y ~ x, family = binomial(link = okLinks[i]), + start = c(-1, 0)) + curve(predict(out, type = "response", newdata = data.frame(x = x)), + add = TRUE, col = cc[i]) + } There were 28 warnings (use warnings() to see them) > legend(0, 0.8, legend = okLinks, col = cc, lwd = 2) > title(main = "All Link Functions R Function GLM Knows About") > > > > proc.time() user system elapsed 0.349 0.034 0.506