\documentclass[11pt]{article}
\usepackage{indentfirst}
\usepackage{amsmath}
% \usepackage{amssymb}
% \usepackage{amsthm}
% \usepackage{amscd}
\usepackage{natbib}
\usepackage{url}
% \usepackage{graphicx}
\usepackage[utf8]{inputenc}
\usepackage[tableposition=top]{caption}
\newcommand{\real}{\mathbb{R}}
\newcommand{\inner}[1]{\langle #1 \rangle}
\newcommand{\set}[1]{\{\, #1 \,\}}
\newcommand{\bigset}[1]{\left\{\, #1 \,\right\}}
\newcommand{\fatdot}{\,\cdot\,}
\newcommand{\opand}{\mathbin{\rm and}}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\DeclareMathOperator{\var}{var}
\DeclareMathOperator{\rint}{rint}
\DeclareMathOperator{\con}{con}
\DeclareMathOperator{\pos}{pos}
\DeclareMathOperator{\cl}{cl}
\DeclareMathOperator{\spam}{span}
\DeclareMathOperator{\rc}{rc}
\DeclareMathOperator{\myPr}{Pr}
\DeclareMathOperator{\supp}{supp}
\DeclareMathOperator{\rank}{rank}
\DeclareMathOperator{\logit}{logit}
\setlength{\textheight}{\paperheight}
\addtolength{\textheight}{- 2 in}
\setlength{\topmargin}{0.25 pt}
\setlength{\headheight}{0 pt}
\setlength{\headsep}{0 pt}
\addtolength{\textheight}{- \topmargin}
\addtolength{\textheight}{- \topmargin}
\addtolength{\textheight}{- \footskip}
\setlength{\textwidth}{\paperwidth}
\addtolength{\textwidth}{- 2 in}
\setlength{\oddsidemargin}{0.25 in}
\setlength{\evensidemargin}{0.25 in}
\addtolength{\textwidth}{- \oddsidemargin}
\addtolength{\textwidth}{- \evensidemargin}
\begin{document}
\vspace*{0.9375in}
\begin{center}
{\bfseries More Supporting Data Analysis for ``Likelihood Inference
in Exponential Families and Directions of Recession''} \\
By \\
Charles J. Geyer \\
Technical Report No.~673 \\
School of Statistics \\
University of Minnesota \\
% April 20, 2005 \\
\today
\end{center}
\thispagestyle{empty}
\cleardoublepage
\setcounter{page}{1}
\thispagestyle{empty}
<>=
options(keep.source = TRUE, width = 65)
ps.options(pointsize = 15)
pdf.options(pointsize = 15)
@
\section{R Package Rcdd}
We use the R statistical computing environment \citep{rcore} in our analysis.
It is free software and can be obtained from
\url{http://cran.r-project.org}. Precompiled binaries
are available for Windows, Macintosh, and popular Linux distributions.
We use the contributed package \verb@rcdd@.
If R has been installed, but this package has
not yet been installed, do
\begin{verbatim}
install.packages("rcdd")
\end{verbatim}
from the R command line
(or do the equivalent using the GUI menus if on Macintosh
or Windows). This may require root or administrator privileges.
If the \verb@rcdd@ package has been installed, we load it
<>=
library(rcdd)
@
<>=
baz <- library(help = "rcdd")
baz <- baz$info[[1]]
baz <- baz[grep("Version", baz)]
baz <- sub("^Version: *", "", baz)
bazzer <- paste(R.version$major, R.version$minor, sep = ".")
@
The version of the package used to make this document
is \Sexpr{baz}.
The version of R used to make this document is \Sexpr{bazzer}.
This entire document and all of the calculations shown were made using
the R command \texttt{Sweave} and hence are exactly reproducible by
anyone who has R and the R noweb (RNW) file from which it was created.
Both the RNW file and and the PDF document produced from it are available at
\url{http://www.stat.umn.edu/geyer/gdor}.
\section{Introduction}
This technical report provides one more example, suggested by the referee,
of an exponential family model in which solutions are ``at infinity'' so
the maximum likelihood estimate (MLE) does not exist in the conventional
sense. This is a Bradley-Terry model without ties.
When team $i$ plays team $j$ the probability that team $i$ wins is
$\logit^{-1}(\theta_i - \theta_j)$,
where
$$
\logit^{-1}(\theta) = \frac{1}{1 + \exp(- \theta)}.
$$
\section{Data}
\subsection{Matrix Form}
We make up such data
below for a sports league with 8 teams and a season in which each team
plays each other team twice, so each team plays 14 games.
<>=
team.names <- c("ants", "beetles", "cows", "dogs",
"egrets", "foxes", "gerbils", "hogs")
data <- matrix(c(NA, 2, 2, 2, 2, 2, 2, 2,
0, NA, 1, 2, 2, 2, 2, 2,
0, 1, NA, 2, 1, 2, 2, 2,
0, 0, 0, NA, 1, 1, 2, 2,
0, 0, 1, 1, NA, 1, 2, 2,
0, 0, 0, 1, 1, NA, 2, 2,
0, 0, 0, 0, 0, 0, NA, 1,
0, 0, 0, 0, 0, 0, 1, NA), byrow = TRUE, nrow = 8)
dimnames(data) <- list(team.names, team.names)
print(data)
@
And we make it a nice \LaTeX\ table
(Table~\ref{tab:two}, page~\pageref{tab:two}).
<>=
library(xtable)
print(xtable(data, align = "l|cccccccc",
caption = paste("Data for Bradley-Terry model.", "Entries give number",
"of games (out of 2) that the team which is the row label beat the",
"team which is the column label."), digits = 0,
label = "tab:two"), caption.placement = "top",
table.placement = "tbp", include.rownames = TRUE)
@
\subsection{Vector Form}
Now we need to put these data in a form preferred by
the R function \texttt{glm}, where the data are strung out in one long vector.
<>=
wins <- data[upper.tri(data)]
team.plus <- row(data)[upper.tri(data)]
team.minus <- col(data)[upper.tri(data)]
modmat <- matrix(0, length(wins), nrow(data))
for (i in 1:ncol(modmat)) {
modmat[team.plus == i, i] <- 1
modmat[team.minus == i, i] <- (- 1)
}
losses <- 2 - wins
resp <- cbind(wins, losses)
@
\section{Try to Fit using GLM}
Now we are ready to attempt to fit the model with \texttt{glm}.
<>=
out <- glm(resp ~ modmat + 0, family = binomial)
summary(out)
@
The function \texttt{glm} does emit a warning (not shown
because \texttt{Sweave} does not capture it) suggesting the MLE does not
exist in the conventional sense.
If we ignore the warning and apply the summary function,
we get incomprehensible nonsense.
\section{Linearity}
\subsection{Vector Form}
Now we determine the linearity. See \citet[Section~3.12]{gdor} for details.
<>=
tanv <- modmat
tanv[losses == 0, ] <- (- tanv[losses == 0])
vrep <- cbind(0, 0, tanv)
linear <- wins > 0 & losses > 0
vrep[linear, 1] <- 1
lout <- linearity(vrep, rep = "V")
linear[lout] <- TRUE
sum(linear == TRUE)
sum(linear == FALSE)
@
The limiting conditional model (LCM), for which see \citet[Section~3.14]{gdor},
conditions on \Sexpr{sum(linear == FALSE)} components of the response,
leaving \Sexpr{sum(linear == TRUE)} components free.
\subsection{Matrix Form}
It is hard to see what we are conditioning on when the data are strung
out in a vector. Back to matrix form.
<>=
foo <- matrix(FALSE, nrow(data), ncol(data))
dimnames(foo) <- dimnames(data)
foo[upper.tri(foo)] <- linear
foo <- foo | t(foo)
diag(foo) <- NA
print(foo)
@
The \texttt{TRUE} entries are the ones that are free (we do not condition
on the observed data) in the LCM.
The \texttt{FALSE} entries are the ones that are fixed (we do condition
on the observed data) in the LCM.
The MLE says the ants always win, because they won all their games
and we condition on this observed result.
The MLE says the gerbils and the hogs always lose when playing anyone
but each other, because they lost all their games, except to each other,
and we condition on this result.
\section{Generic Direction of Recession}
Find the generic direction of recession (GDOR), for which see
\citet[Section~3.6]{gdor}.
<>=
p <- ncol(tanv)
hrep <- cbind(0, 0, -tanv, 0)
hrep[!linear, ncol(hrep)] <- (-1)
hrep[linear, 1] <- 1
hrep <- rbind(hrep, c(0, 1, rep(0, p), -1))
objv <- c(rep(0, p), 1)
pout <- lpcdd(hrep, objv, minimize = FALSE)
gdor <- pout$primal.solution[1:p]
names(gdor) <- team.names
print(gdor)
@
This agrees with our previous analysis. The natural parameter for
a game between team $i$ and team $j$ has the form $\theta_i - \theta_j$.
If $\delta$ is the GDOR, then we obtain the LCM by taking limits
as $s \to \infty$ for linear predictor (matrix form) with components
\begin{equation} \label{eq:lin-pred}
\eta_{i j} = \theta_i - \theta_j + s (\delta_i - \delta_j)
\end{equation}
when $\delta_i = \delta_j$, the limit changes nothing.
When $\delta_i \neq \delta_j$, the limit makes the mean value parameter
equal to the observed value for the corresponding natural statistic.
We see there are three groups having equal components of the GDOR.
The first group is the singleton set $\{\text{ants}\}$.
The second group is $\{\text{beetles}, \text{cows},
\text{dogs}, \text{foxes}, \text{egrets}\}$.
The third group is $\{\text{gerbils}, \text{hogs}\}$.
We can see that the ants always win, according to the LCM, for either
of two reasons. The first reason is that we are conditioning on the
observed data for inter-group games and they always won in the observed data. The second
reason is that their component of the GDOR is higher than anyone else's
so \eqref{eq:lin-pred} goes to $+ \infty$ as $s \to \infty$ when $i = 1$
(ants being the team with index 1).
We can see that teams in group two (beetles, cows, dogs, foxes, egrets)
always win when playing teams in group three (gerbils, hogs), according
to the LCM, for either of two reasons. The first reason is that we are
conditioning on the observed data for inter-group games and they always won in the observed data.
The second reason is that their component of the GDOR for teams in group
two is higher than that for teams in group three,
so \eqref{eq:lin-pred} goes to $+ \infty$ as $s \to \infty$ when
$2 \le i \le 6$ and $7 \le j \le 8$ (those being the ranges that
have team $i$ in group two and team $j$ in group three).
\section{Limiting Conditional Model}
Fit the LCM
<>=
resp.cond <- resp[linear, ]
modmat.cond <- modmat[linear, ]
out.cond <- glm(resp.cond ~ modmat.cond + 0, family = binomial)
summary(out.cond)
@
And extract the MLE
<>=
beta.hat <- coefficients(out.cond)
beta.hat[is.na(beta.hat)] <- 0
beta.hat <- round(beta.hat, 15)
names(beta.hat) <- team.names
cbind(beta.hat, gdor)
@
The MLE in the original model can be thought of as \texttt{beta.hat}
sent to infinity in the direction \texttt{gdor}.
We make it a nice \LaTeX\ table
(Table~\ref{tab:mle}, page~\pageref{tab:mle}).
<>=
foo <- cbind(round(beta.hat, 3), gdor)
colnames(foo) <- c("$\\hat{\\beta}$", "$\\delta$")
library(xtable)
print(xtable(foo, align = "l|cc", caption = paste("Maximum Likelihood",
"Estimate.", "$\\hat{\\beta}$ the MLE for the LCM. $\\delta$, the GDOR."),
digits = c(3, 3, 0), label = "tab:mle"), caption.placement = "top",
table.placement = "tbp", include.rownames = TRUE,
sanitize.colnames.function = function(x) return(x))
@
\section{Constancy Spaces}
\subsection{Original Model}
<>=
hrep <- cbind(1, 0, tanv)
rout <- redundant(hrep, rep = "H")
sout <- scdd(rout$output)
const.orig <- sout$output[ , - c(1, 2) ]
print(const.orig)
@
The vector having all components equal to one is a direction of constancy
for the original model.
\subsection{Limiting Conditional Model}
<>=
rout <- redundant(hrep[linear, ], rep = "H")
sout <- scdd(rout$output)
const.lcm <- sout$output[ , - c(1, 2) ]
print(const.lcm)
@
Each of the indicator vectors for the three groups is a direction of
constancy for the LCM.
\section{One-Sided Confidence Intervals}
We try out the scheme discussed in \citet[Section~3.16.2 and~3.16.4]{gdor}.
In calculating confidence intervals we need to take a union over a subspace
\citep[Equation~23]{gdor}. This subspace may be taken to be the subspace
of the constancy space of the LCM that is orthogonal to both the constancy
space of the original model and to the GDOR.
<>=
hrep.foo <- rout$output
hrep.foo <- rbind(hrep.foo, c(1, 0, gdor))
hrep.foo <- rbind(hrep.foo, c(1, 0, const.orig))
sout.foo <- scdd(hrep.foo)
qux <- sout.foo$output[ , - c(1, 2)]
qux
@
To find $100 (1 - \alpha) \%$ confidence intervals,
for each real number $r$ we are supposed to find the number $s$ such
that
$$
\Pr\nolimits_{\hat{\beta} + r \gamma + s \delta}(H) = \alpha,
$$
where $H$ is the support of the LCM, $\hat{\beta}$ is the vector
denoted \texttt{beta.hat} above, $\gamma$ is the vector denoted \texttt{qux}
above, and $\delta$ is the vector denoted \texttt{gdor} above.
<>=
alpha <- 0.05
rr <- seq(-3, 3, 0.5)
sr <- double(length(rr))
prob.lcm <- function(s, r) {
beta <- beta.hat + r * qux + s * gdor
eta <- modmat %*% beta
pp <- 1 / (1 + exp(- eta))
qq <- 1 / (1 + exp(eta))
probs <- pp^wins * qq^losses
return(prod(probs[!linear]))
}
for (i in 1:length(rr)) {
foo <- function(s) prob.lcm(s, rr[i]) - alpha
uout <- uniroot(foo, lower = -100, upper = 100)
sr[i] <- uout$root
}
print(sr)
@
Now we need to say what this means in terms of mean value parameters.
<>=
mu.win.bnd <- rep(Inf, length(wins))
mu.loss.bnd <- rep(-Inf, length(losses))
for (i in 1:length(rr)) {
beta <- beta.hat + rr[i] * qux + sr[i] * gdor
eta <- modmat %*% beta
pp <- 1 / (1 + exp(- eta))
qq <- 1 / (1 + exp(eta))
mu.win <- (wins + losses) * pp
mu.loss <- (wins + losses) * qq
##### code below only works because all components of gdor are nonnegative
##### which only happens because groups of teams are ordered by ability
mu.win.bnd <- pmin(mu.win.bnd, mu.win)
mu.loss.bnd <- pmax(mu.loss.bnd, mu.loss)
}
mu.win.bnd[linear] <- NA
mu.loss.bnd[linear] <- NA
mu.win.bnd
mu.loss.bnd
@
\subsection{Matrix Form}
We put these back in matrix form.
<>=
foo <- matrix(0, nrow(data), ncol(data))
dimnames(foo) <- dimnames(data)
bar <- foo
foo[upper.tri(foo)] <- mu.win.bnd
bar[upper.tri(foo)] <- mu.loss.bnd
foo <- foo + t(bar)
diag(foo) <- NA
foo <- round(foo, 3)
print(foo)
@
And we make it a nice \LaTeX\ table
(Table~\ref{tab:one}, page~\pageref{tab:one}).
<>=
library(xtable)
print(xtable(foo, align = "l|cccccccc", caption = paste("One-sided 95\\%",
"Confidence intervals for Mean Values.", "Entries give expected number",
"of games (out of 2) that the team which is the row label beats the",
"team which is the column label.", "Numbers in the upper triangle",
"are lower bounds.", "Numbers in the lower triangle are upper bounds."),
digits = 3, label = "tab:one"), caption.placement = "top",
table.placement = "tbp", include.rownames = TRUE)
@
A little bit of experimentation (not shown) proves that the numbers in
Table~\ref{tab:one} are unchanged (to the three decimal precision used
for this table) if the definition of \texttt{rr} is changed to
\begin{verbatim}
rr <- seq(-10, 10, 0.1)
\end{verbatim}
and the numbers are only changed by at most one in the third decimal place
if the definition of \texttt{rr} is changed to
\begin{verbatim}
rr <- seq(-2, 2, 0.5)
\end{verbatim}
So it appears that we are getting sensible confidence intervals, even though
we have no proof that the discretization used is o.~k.
\begin{thebibliography}{}
% \bibitem[Barndorff-Nielsen(1978)]{barndorff}
% Barndorff-Nielsen, O.~E. (1978).
% \newblock \emph{Information and Exponential Families}.
% \newblock Chichester: John Wiley.
% \bibitem[Brown(1986)]{brown}
% Brown, L.~D. (1986).
% \newblock \emph{Fundamentals of Statistical Exponential Families: with
% Applications in Statistical Decision Theory}.
% \newblock Hayward, CA: Institute of Mathematical Statistics.
\bibitem[Fukuda(2008)]{cddlib}
Fukuda, K. (2008)
\newblock cddlib package, version 094f.
\newblock \url{http://www.ifor.math.ethz.ch/~fukuda/cdd_home/cdd.html}.
\newblock Documentation is the file \texttt{cddlibman.pdf},
which is included in the \texttt{cddlib} source and also in
every \texttt{rcdd} installation.
% \bibitem[Geyer(1990)]{thesis}
% Geyer, C.~J. (1990).
% \newblock \emph{Likelihood and Exponential Families}.
% \newblock Unpublished Ph.~D. Thesis. University of Washington.
% \bibitem[Geyer(2008)]{aster}
% Geyer, C.~J. (2008).
% \newblock R package \texttt{aster} (Aster Models), version 0.7-4.
% \newblock \url{http://www.stat.umn.edu/geyer/aster/}.
% \bibitem[Geyer and Meeden(2005)]{fuzzy}
% Geyer, C.~J. and Meeden, G.~D. (2005).
% \newblock Fuzzy and randomized confidence intervals and P-values
% (with discussion).
% \newblock \emph{Statistical Science}, \textbf{20}, 358--387.
\bibitem[Geyer(2009)]{gdor}
Geyer, C.~J. (2009).
\newblock Likelihood inference in exponential families and directions
of recession
\newblock Submitted to \emph{Electronic Journal of Statistics}.
\newblock \url{http://www.stat.umn.edu/geyer/gdor/}.
\bibitem[Geyer and Meeden(2008)]{rcdd}
Geyer, C.~J. and Meeden, G.~D. (2008).
\newblock R package \texttt{rcdd} (C Double Description for R), version 1.1.
\newblock \url{http://http://www.stat.umn.edu/geyer/rcdd/}.
\newblock Incorporates code from \citet{cddlib}.
% \bibitem[Geyer and Thompson(1992)]{gt}
% Geyer, C.~J. and Thompson, E.~A. (1992).
% \newblock Constrained Monte Carlo maximum likelihood for dependent data
% (with discussion).
% \newblock \emph{Journal of the Royal Statistical Society, Series B},
% \textbf{54} 657--99.
% \bibitem[Geyer et al.(2007)Geyer, Wagenius and Shaw]{gws}
% Geyer, C.~J., Wagenius, S. and Shaw, R.~G. (2007).
% \newblock Aster models for life history analysis.
% \newblock \emph{Biometrika}, \textbf{94}, 415--426.
% \bibitem[Lehmann(1959)]{lehmann-tsh}
% Lehmann, E.~L. (1959).
% \newblock \emph{Testing Statistical Hypotheses}.
% \newblock New York: Wiley.
\bibitem[R Development Core Team(2008)]{rcore}
R Development Core Team (2008).
\newblock R: A language and environment for statistical computing.
\newblock R Foundation for Statistical Computing, Vienna, Austria.
\newblock \url{http://www.R-project.org}.
% \bibitem[Rockafellar(1970)]{rocky}
% Rockafellar, R.~T. (1970).
% \newblock \emph{Convex Analysis}.
% \newblock Princeton: Princeton University Press.
% \bibitem[Rockafellar and Wets(2004)]{raw}
% Rockafellar, R.~T. and Wets, R. J.-B. (2004).
% \newblock \emph{Variational Analysis}, corr.\ 2nd printing.
% \newblock Berlin: Springer-Verlag.
\end{thebibliography}
\end{document}
\begin{center} \LARGE REVISED DOWN TO HERE \end{center}