

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
            "http://www.w3.org/TR/html4/strict.dtd">

<html lang="en-US">

<head>

<title>
Statistics 5102 (Geyer, Spring 2009) 
</title>

<link href="/geyer/5102/foo.css" rel="stylesheet" type="text/css">

</head>

<body>

<div id="header">
<h1>Statistics 5102 (Geyer, Spring 2009) </h1>
</div>

<div id="main">


\documentclass[11pt]{article}

\usepackage{amsmath}
\usepackage{indentfirst}
\usepackage{verbatim}

\input{macros}
\input{bold}

\numberwithin{equation}{section}

\begin{document}

\title{Stat 5101 Notes: Brand Name Distributions}

\author{Charles J. Geyer}

\maketitle

\section{Discrete Uniform Distribution}

\paragraph{Abbreviation} $\DiscreteUniformDis(n)$.

\paragraph{Type} Discrete.

\paragraph{Rationale} Equally likely outcomes.

\paragraph{Sample Space}

The interval 1, 2, $\ldots$, $n$ of the integers.

\paragraph{Probability Mass Function}

$$
   f(x) = \frac{1}{n}, \qquad x = 1, 2, \ldots, n
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \frac{n + 1}{2} \\
   \var(X) & = \frac{n^2 - 1}{12}
\end{align*}
\begin{comment}
  alpha2 = Sum[ k^2, {k, 1, n} ] / n
  alpha1 = Sum[ k, {k, 1, n} ] / n
  mu2 = alpha2 - alpha1^2
  Simplify[alpha1]
  Simplify[mu2]
\end{comment}

\section{General Discrete Uniform Distribution}

\paragraph{Type} Discrete.

\paragraph{Sample Space}

Any finite set $S$.

\paragraph{Probability Mass Function}

$$
   f(x) = \frac{1}{n}, \qquad x \in S,
$$
where $n$ is the number of elements of $S$.

\section{Uniform Distribution}

\paragraph{Abbreviation} $\UniformDis(a, b)$.

\paragraph{Type} Continuous.

\paragraph{Rationale} Continuous analog of the discrete uniform distribution.

\paragraph{Parameters}

Real numbers $a$ and $b$ with $a < b$.

\paragraph{Sample Space}

The interval $(a, b)$ of the real numbers.

\paragraph{Probability Density Function}

$$
   f(x) = \frac{1}{b - a}, \qquad a < x < b
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \frac{a + b}{2} \\
   \var(X) & = \frac{(b - a)^2}{12}
\end{align*}
\begin{comment}
  f[x_] = 1 / (b - a)
  alpha1 = Integrate[ x f[x], {x, a, b} ]
  alpha2 = Integrate[ x^2 f[x], {x, a, b} ]
  mu2 = alpha2 - alpha1^2
  Simplify[alpha1]
  Simplify[mu2]
\end{comment}

\paragraph{Relation to Other Distributions}
$\BetaDis(1, 1) = \UniformDis(0, 1)$.

\section{General Uniform Distribution}

\paragraph{Type} Continuous.

\paragraph{Sample Space}

Any open set $S$ in $\real^n$.

\paragraph{Probability Density Function}

$$
   f(x) = \frac{1}{c}, \qquad x \in S
$$
where $c$ is the measure (length in one dimension, area in two, volume in
three, etc.)\ of the set $S$.

\section{Bernoulli Distribution}

\paragraph{Abbreviation} $\BernoulliDis(p)$.

\paragraph{Type} Discrete.

\paragraph{Rationale} Any zero-or-one-valued random variable.

\paragraph{Parameter}

Real number $0 \le p \le 1$.

\paragraph{Sample Space}

The two-element set $\{0, 1\}$.

\paragraph{Probability Mass Function}

$$
   f(x) = \begin{cases} p, & x = 1 \\ 1 - p, & x = 0 \end{cases}
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = p \\
   \var(X) & = p (1 - p)
\end{align*}

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are IID $\BernoulliDis(p)$ random variables,
then $X_1 + \cdots + X_k$ is a $\BinomialDis(k, p)$ random variable.

\paragraph{Relation to Other Distributions}
$\BernoulliDis(p) = \BinomialDis(1, p)$.

\section{Binomial Distribution}

\paragraph{Abbreviation} $\BinomialDis(n, p)$.

\paragraph{Type} Discrete.

\paragraph{Rationale} Sum of IID Bernoulli random variables.

\paragraph{Parameters}

Real number $0 \le p \le 1$.  Integer $n \ge 1$.

\paragraph{Sample Space}

The interval 0, 1, $\ldots$, $n$ of the integers.

\paragraph{Probability Mass Function}

$$
   f(x) = \binom{n}{x} p^x (1 - p)^{n - x}, \qquad x = 0, 1, \ldots, n
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = n p \\
   \var(X) & = n p (1 - p)
\end{align*}

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are independent random variables,
$X_i$ being $\BinomialDis(n_i, p)$ distributed,
then $X_1 + \cdots + X_k$ is a $\BinomialDis(n_1 + \cdots + n_k, p)$
random variable.

\paragraph{Normal Approximation}

If $n p$ and $n (1 - p)$ are both large, then 
$$
   \BinomialDis(n, p) \approx \NormalDis\bigl(n p, n p (1 - p)\bigr)
$$

% \paragraph{Variance-Stabilizing Transformation}
% 
% $$
%    g(x) = 2 \sin^{-1}(\sqrt{x})
% $$
% If $X$ is $\BinomialDis(n, p)$ distributed and $n p$ and $n (1 - p)$
% are both large, then 
% $$
%    g(X) \approx \NormalDis\bigl(g(n p), n\bigr)
% $$

\paragraph{Poisson Approximation}

If $n$ is large but $n p$ is small, then
$$
   \BinomialDis(n, p) \approx \PoissonDis(n p)
$$

\paragraph{Theorem}

The fact that the probability mass function sums to one is equivalent to the
\textbf{binomial theorem:} for any real numbers $a$ and $b$
$$
   \sum_{k = 0}^n \binom{n}{k} a^k b^{n - k} = (a + b)^n.
$$

\paragraph{Degeneracy}

If $p = 0$ the distribution is concentrated at 0.
If $p = 1$ the distribution is concentrated at $n$.

\paragraph{Relation to Other Distributions}
$\BernoulliDis(p) = \BinomialDis(1, p)$.

\section{Hypergeometric Distribution}

\paragraph{Abbreviation} $\text{Hypergeometric}(A, B, n)$.

\paragraph{Type} Discrete.

\paragraph{Rationale} Sample of size $n$ without replacement from finite
population of $B$ zeros and $A$ ones.

\paragraph{Sample Space}

The interval $\max(0, n - B)$, $\ldots$, $\min(n, A)$ of the integers.

\paragraph{Probability Mass Function}

$$
   f(x) = \frac{\binom{A}{x} \binom{B}{n - x}}{\binom{A + B}{n}},
   \qquad x = \max(0, n - B), \ldots, \min(n, A)
$$

\paragraph{Moments}

\begin{align}
   E(X) & = n p
   \nonumber
   \\
   \var(X) & = n p (1 - p) \cdot \frac{N - n}{N - 1}
   \nonumber
   \\
\intertext{where}
   p & = \frac{A}{A + B}
   \label{eq:hyper-p}
   \\
   N & = A + B
   \nonumber
\end{align}
\begin{comment}
 <<Statistics`DiscreteDistributions`
 A = N p
 B = N (1 - p)
 dist = HypergeometricDistribution[n, A, A + B]
 f[x_] = PDF[dist, x]
 Mean[dist]
 Variance[dist]
\end{comment}

\paragraph{Binomial Approximation}

If $n$ is small compared to either $A$ or $B$, then
$$
   \text{Hypergeometric}(n, A, B) \approx \BinomialDis(n, p)
$$
where $p$ is given by \eqref{eq:hyper-p}.

\paragraph{Normal Approximation}

If $n$ is large, but small compared to either $A$ or $B$, then 
$$
   \text{Hypergeometric}(n, A, B)
   \approx
   \NormalDis\bigl(n p, n p (1 - p)\bigr)
$$
where $p$ is given by \eqref{eq:hyper-p}.

\paragraph{Theorem}

The fact that the probability mass function sums to one is equivalent to
$$
   \sum_{x = \max(0, n - B)}^{\min(A, n)} \binom{A}{x} \binom{B}{n - x}
   = \binom{A + B}{n}
$$


\section{Poisson Distribution}

\paragraph{Abbreviation} $\PoissonDis(\mu)$

\paragraph{Type} Discrete.

\paragraph{Rationale} Counts in a Poisson process.

\paragraph{Parameter}

Real number $\mu > 0$.

\paragraph{Sample Space}

The non-negative integers 0, 1, $\ldots.$

\paragraph{Probability Mass Function}

$$
   f(x) = \frac{\mu^x}{x !} e^{- \mu}, \qquad x = 0, 1, \ldots
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \mu \\
   \var(X) & = \mu
\end{align*}

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are independent random variables,
$X_i$ being $\PoissonDis(\mu_i)$ distributed,
then $X_1 + \cdots + X_k$ is a $\PoissonDis(\mu_1 + \cdots + \mu_k)$
random variable.

\paragraph{Normal Approximation}

If $\mu$ is large, then 
$$
   \PoissonDis(\mu) \approx \NormalDis(\mu, \mu)
$$

\paragraph{Theorem}

The fact that the probability mass function sums to one is equivalent to the
Maclaurin series for the exponential function:
for any real number $x$
$$
   \sum_{k = 0}^\infty \frac{x^k}{k !} = e^x.
$$

\section{Geometric Distribution}

\paragraph{Abbreviation} $\GeoDis(p)$.

\paragraph{Type} Discrete.

\paragraph{Rationales}

\begin{itemize}
\item Discrete lifetime of object that does not age.
\item Waiting time or interarrival time in sequence
of IID Bernoulli trials.
\item Inverse sampling.
\item Discrete analog of the exponential distribution.
\end{itemize}

\paragraph{Parameter}

Real number $0 < p < 1$.

\paragraph{Sample Space}

The non-negative integers 0, 1, $\ldots.$

\paragraph{Probability Mass Function}

$$
   f(x) = p (1 - p)^x \qquad x = 0, 1, \ldots
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \frac{1 - p}{p} \\
   \var(X) & = \frac{1 - p}{p^2}
\end{align*}

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are IID $\GeoDis(p)$ random variables,
then $X_1 + \cdots + X_k$ is a $\NegBinDis(k, p)$ random variable.

\paragraph{Theorem}

The fact that the probability mass function sums to one is equivalent to the
geometric series: for any real number $s$ such that $- 1 < s < 1$
$$
   \sum_{k = 0}^\infty s^k = \frac{1}{1 - s}.
$$

\section{Negative Binomial Distribution}

\paragraph{Abbreviation} $\NegBinDis(r, p)$.

\paragraph{Type} Discrete.

\paragraph{Rationale}
\begin{itemize}
\item Sum of IID geometric random variables.
\item Inverse sampling.
\item Gamma mixture of Poisson distributions.
\end{itemize}

\paragraph{Parameters}

Real number $0 \le p \le 1$.  Integer $r \ge 1$.

\paragraph{Sample Space}

The non-negative integers 0, 1, $\ldots.$

\paragraph{Probability Mass Function}

$$
   f(x) = \binom{r + x - 1}{x} p^r (1 - p)^x, \qquad x = 0, 1, \ldots
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \frac{r (1 - p)}{p} \\
   \var(X) & = \frac{r (1 - p)}{p^2}
\end{align*}

\paragraph{Addition Rule}

\begin{sloppypar}
If $X_1$, $\ldots$, $X_k$ are independent random variables,
$X_i$ being $\NegBinDis(r_i, p)$ distributed,
then $X_1 + \cdots + X_k$ is a $\NegBinDis(r_1 + \cdots + r_k, p)$
random variable.
\end{sloppypar}

\paragraph{Normal Approximation}

If $r (1 - p)$ is large, then 
$$
   \NegBinDis(r, p)
   \approx
   \NormalDis\biggl(\frac{r (1 - p)}{p}, \frac{r (1 - p)}{p^2}\biggr)
$$

\paragraph{Extended Definition}

The definition makes sense for noninteger $r$ if binomial coefficients
are defined by
$$
   \binom{r}{k} = \frac{r \cdot (r - 1) \cdots (r - k + 1)}{k !}
$$
which for integer $r$ agrees with the standard definition.

Also
\begin{equation} \label{eq:negbin-ident}
   \binom{r + x - 1}{x} = (- 1)^x \binom{- r}{x}
\end{equation}
which explains the name ``negative binomial.''

\paragraph{Theorem}

The fact that the probability mass function sums to one is equivalent to the
\textbf{generalized binomial theorem:} for any real number $s$ such
that $- 1 < s < 1$ and any real number $m$
\begin{equation} \label{eq:general-binom-theo}
   \sum_{k = 0}^\infty \binom{m}{k} s^k = (1 + s)^m.
\end{equation}
If $m$ is a nonnegative integer, then $\binom{m}{k}$ is zero for $k > m$,
and we get the ordinary binomial theorem.

Changing variables from $m$ to $- m$ and from $s$ to $- s$ and
using \eqref{eq:negbin-ident}
turns \eqref{eq:general-binom-theo} into
$$
   \sum_{k = 0}^\infty \binom{m + k - 1}{k} s^k =
   \sum_{k = 0}^\infty \binom{- m}{k} (- s)^k = (1 - s)^{- m}
$$
which has a more obvious relationship to the negative binomial
density summing to one.

\section{Normal Distribution}

\paragraph{Abbreviation} $\NormalDis(\mu, \sigma^2)$.

\paragraph{Type} Continuous.

\paragraph{Rationale}
\begin{itemize}
\item Limiting distribution in the central limit theorem.
\item Error distribution that turns the method of least squares into
    maximum likelihood estimation.
\end{itemize}

\paragraph{Parameters}

Real numbers $\mu$ and $\sigma^2 > 0$.

\paragraph{Sample Space}

The real numbers.

\paragraph{Probability Density Function}

$$
   f(x) = \frac{1}{\sqrt{2 \pi} \sigma} e^{- (x - \mu)^2 / 2 \sigma^2},
   \qquad - \infty < x < \infty
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \mu
   \\
   \var(X) & = \sigma^2
   \\
   E\{(X - \mu)^3\} & = 0
   \\
   E\{(X - \mu)^4\} & = 3 \sigma^4
\end{align*}

\paragraph{Linear Transformations}
If $X$ is $\NormalDis(\mu, \sigma^2)$ distributed, then $a X + b$ is
$\NormalDis(a \mu + b, a^2 \sigma^2)$ distributed.

\paragraph{Addition Rule}
If $X_1$, $\ldots$, $X_k$ are independent random variables, $X_i$ being
$\NormalDis(\mu_i, \sigma_i^2)$ distributed, then $X_1 + \cdots + X_k$
is a $\NormalDis(\mu_1 + \cdots + \mu_k, \sigma_1^2 + \cdots + \sigma_k^2)$
random variable.

\paragraph{Theorem}

The fact that the probability density function integrates to one is
equivalent to the integral
$$
   \int_{- \infty}^\infty e^{- z^2 / 2} \, d z = \sqrt{2 \pi}
$$

\paragraph{Relation to Other Distributions}
If $Z$ is $\NormalDis(0, 1)$ distributed,
then $Z^2$ is $\GammaDis(\frac{1}{2}, \frac{1}{2})$ distributed.

\section{Exponential Distribution}

\paragraph{Abbreviation} $\ExpDis(\lambda)$.

\paragraph{Type} Continuous.

\paragraph{Rationales}

\begin{itemize}
\item Lifetime of object that does not age.
\item Waiting time or interarrival time in Poisson process.
\item Continuous analog of the geometric distribution.
\end{itemize}

\paragraph{Parameter}

Real number $\lambda > 0$.

\paragraph{Sample Space}

The interval $(0, \infty)$ of the real numbers.

\paragraph{Probability Density Function}

$$
   f(x) = \lambda e^{- \lambda x}, \qquad 0 < x < \infty
$$

\paragraph{Cumulative Distribution Function}

$$
   F(x) = 1 - e^{- \lambda x}, \qquad 0 < x < \infty
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \frac{1}{\lambda} \\
   \var(X) & = \frac{1}{\lambda^2}
\end{align*}

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are IID $\ExpDis(\lambda)$
random variables,
then $X_1 + \cdots + X_k$ is a $\GammaDis(k, \lambda)$ random variable.

\paragraph{Relation to Other Distributions}
$\ExpDis(\lambda) = \GammaDis(1, \lambda)$.

\section{Gamma Distribution}

\paragraph{Abbreviation} $\GammaDis(\alpha, \lambda)$.

\paragraph{Type} Continuous.

\paragraph{Rationales}

\begin{itemize}
\item Sum of IID exponential random variables.
\item Conjugate prior for exponential, Poisson, or normal precision family.
\end{itemize}

\paragraph{Parameter}

Real numbers $\alpha > 0$ and $\lambda > 0$.

\paragraph{Sample Space}

The interval $(0, \infty)$ of the real numbers.

\paragraph{Probability Density Function}

$$
   f(x) = \frac{\lambda^\alpha}{\Gamma(\alpha)} x^{\alpha - 1} e^{- \lambda x},
   \qquad 0 < x < \infty
$$
where $\Gamma(\alpha)$ is defined by \eqref{eq:gamma-def} below.

\paragraph{Moments}

\begin{align*}
   E(X) & = \frac{\alpha}{\lambda} \\
   \var(X) & = \frac{\alpha}{\lambda^2}
\end{align*}

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are independent random variables,
$X_i$ being $\GammaDis(\alpha_i, \lambda)$ distributed,
then $X_1 + \cdots + X_k$ is
a $\GammaDis(\alpha_1 + \cdots + \alpha_k, \lambda)$ random variable.

\paragraph{Normal Approximation}

If $\alpha$ is large, then 
$$
   \GammaDis(\alpha, \lambda)
   \approx
   \NormalDis\biggl(\frac{\alpha}{\lambda}, \frac{\alpha}{\lambda^2}\biggr)
$$

\paragraph{Theorem}

The fact that the probability density function integrates to one is
equivalent to the integral
$$
   \int_0^\infty x^{\alpha - 1} e^{- \lambda x} \, d x
   =
   \frac{\Gamma(\alpha)}{\lambda^\alpha}
$$
the case $\lambda = 1$ is the definition of the \emph{gamma function}
\begin{equation} \label{eq:gamma-def}
   \Gamma(\alpha)
   =
   \int_0^\infty x^{\alpha - 1} e^{- x} \, d x
\end{equation}

\paragraph{Relation to Other Distributions}

\begin{itemize}
\item $\ExpDis(\lambda) = \GammaDis(1, \lambda)$.
\item $\ChiSqDis(\nu) = \GammaDis(\frac{\nu}{2}, \frac{1}{2})$.
\item If $X$ and $Y$ are independent, $X$ is $\Gamma(\alpha_1, \lambda)$
distributed and $Y$ is $\Gamma(\alpha_2, \lambda)$ distributed, then
$X / (X + Y)$ is $\BetaDis(\alpha_1, \alpha_2)$ distributed.
\item If $Z$ is $\NormalDis(0, 1)$ distributed,
then $Z^2$ is $\GammaDis(\frac{1}{2}, \frac{1}{2})$ distributed.
\end{itemize}

\paragraph{Facts About Gamma Functions}

Integration by parts in \eqref{eq:gamma-def} establishes the
\textbf{gamma function recursion formula}
\begin{equation} \label{eq:gamma-recurse}
   \Gamma(\alpha + 1) = \alpha \Gamma(\alpha), \qquad \alpha > 0
\end{equation}

The relationship between the $\ExpDis(\lambda)$ and
$\GammaDis(1, \lambda)$ distributions gives
$$
   \Gamma(1) = 1
$$
and the relationship between the $\NormalDis(0, 1)$ and
$\GammaDis(\frac{1}{2}, \frac{1}{2})$ distributions gives
$$
   \Gamma(\tfrac{1}{2}) = \sqrt{\pi}
$$
Together with the recursion \eqref{eq:gamma-recurse} these give
for any positive integer $n$
$$
   \Gamma(n + 1) = n !
$$
and
$$
   \Gamma(n + \tfrac{1}{2})
   =
   \left( n  - \tfrac{1}{2} \right)
   \left( n  - \tfrac{3}{2} \right)
   \cdots
   \tfrac{3}{2}
   \cdot
   \tfrac{1}{2}
   \sqrt{\pi}
$$

\section{Beta Distribution}

\paragraph{Abbreviation} $\BetaDis(\alpha_1, \alpha_2)$.

\paragraph{Type} Continuous.

\paragraph{Rationales}

\begin{itemize}
\item Ratio of gamma random variables.
\item Conjugate prior for binomial or negative binomial family.
\end{itemize}

\paragraph{Parameter}

Real numbers $\alpha_1 > 0$ and $\alpha_2 > 0$.

\paragraph{Sample Space}

The interval $(0, 1)$ of the real numbers.

\paragraph{Probability Density Function}

$$
   f(x) = \frac{\Gamma(\alpha_1 + \alpha_2)}{\Gamma(\alpha_1) \Gamma(\alpha_2)}
   x^{\alpha_1 - 1} (1 - x)^{\alpha_2 - 1}
   \qquad 0 < x < 1
$$
where $\Gamma(\alpha)$ is defined by \eqref{eq:gamma-def} above.

\paragraph{Moments}

\begin{align*}
   E(X) & = \frac{\alpha_1}{\alpha_1 + \alpha_2} \\
   \var(X) & = \frac{\alpha_1 \alpha_2}{(\alpha_1 + \alpha_2)^2 (\alpha_1 + \alpha_2 + 1)}
\end{align*}

\paragraph{Theorem}

The fact that the probability density function integrates to one is
equivalent to the integral
$$
   \int_0^1 x^{\alpha_1 - 1} (1 - x)^{\alpha_2 - 1} \, d x
   =
   \frac{\Gamma(\alpha_1) \Gamma(\alpha_2)}{\Gamma(\alpha_1 + \alpha_2)}
$$

\paragraph{Relation to Other Distributions}

\begin{itemize}
\item
If $X$ and $Y$ are independent, $X$ is $\Gamma(\alpha_1, \lambda)$
distributed and $Y$ is $\Gamma(\alpha_2, \lambda)$ distributed, then
$X / (X + Y)$ is $\BetaDis(\alpha_1, \alpha_2)$ distributed.
\item
$\BetaDis(1, 1) = \UniformDis(0, 1)$.
\end{itemize}

\section{Multinomial Distribution}

\paragraph{Abbreviation} $\MultinomialDis(n, \boldp)$.

\paragraph{Type} Discrete.

\paragraph{Rationale} Multivariate analog of the binomial distribution.

\paragraph{Parameters}

Real vector $\boldp$ in the parameter space
\begin{equation} \label{eq:multi-parm-space}
   \left\{\, \boldp \in \real^k :
   0 \le p_i,\ i = 1, \ldots, k,\ \text{and}\ \sum_{i = 1}^k p_i = 1 \,\right\}
\end{equation}

\paragraph{Sample Space}

The set of vectors with integer coordinates
\begin{equation} \label{eq:multi-samp-space}
   S =
   \left\{\, \boldx \in \ints^k :
   0 \le x_i,\ i = 1, \ldots, k,\ \text{and}\ \sum_{i = 1}^k x_i = n \,\right\}
\end{equation}

\paragraph{Probability Mass Function}

$$
   f(\boldx) = \binom{n}{\boldx} \prod_{i = 1}^k p_i^{x_i},
   \qquad \boldx \in S
$$
where
$$
   \binom{n}{\boldx} = \frac{n !}{\prod_{i = 1}^k x_i !}
$$
is called a \emph{multinomial coefficient}.

\paragraph{Moments}

\begin{align*}
   E(X_i) & = n p_i
   \\
   \var(X_i) & = n p_i (1 - p_i)
   \\
   \cov(X_i, X_j) & = - n p_i p_j, \qquad i \neq j
\end{align*}

\paragraph{Moments (Vector Form)}

\begin{align*}
   E(\boldX) & = n \boldp
   \\
   \var(\boldX) & = n \boldM
   \\
\intertext{where}
   \boldM & = \diag(\boldp) - \boldp \boldp'
\end{align*}
is the matrix with elements $m_{i j} = \cov(X_i, X_j) / n$.

\paragraph{Addition Rule}

If $\boldX_1$, $\ldots$, $\boldX_k$ are independent random vectors,
$\boldX_i$ being $\MultinomialDis(n_i, \boldp)$ distributed,
then $\boldX_1 + \cdots + \boldX_k$ is
a $\MultinomialDis(n_1 + \cdots + n_k, \boldp)$
random variable.

\paragraph{Normal Approximation}

If $n$ is large and $\boldp$ is not near the boundary of the parameter
space \eqref{eq:multi-parm-space}, then
$$
   \MultinomialDis(n, \boldp) \approx
   \NormalDis(n \boldp, n \boldM )
$$

\paragraph{Theorem}

The fact that the probability mass function sums to one is equivalent to the
\textbf{multinomial theorem:} for any vector $\bolda$ of real numbers
$$
   \sum_{\boldx \in S}
   \left[ \binom{n}{\boldx} \prod_{i = 1}^k a_i^{x_i} \right]
   = (a_1 + \cdots + a_k)^n
$$

\paragraph{Degeneracy}

If there exists a vector $\bolda$ such that $\boldM \bolda = 0$,
then $\var(\bolda' \boldX) = 0$.

In particular, the vector $\boldu = (1, 1, \ldots, 1)$ always satisfies
$\boldM \boldu = 0$, so $\var(\boldu' \boldX) = 0$.  This
is obvious, since $\boldu' \boldX = \sum_{i = 1}^k X_i = n$ by
definition of the multinomial distribution, and the variance of a constant
is zero.  This means a multinomial random vector of dimension $k$ is
``really'' of dimension no more than $k - 1$ because it is concentrated
on a hyperplane containing the sample
space \eqref{eq:multi-samp-space}.

\paragraph{Marginal Distributions}

Every univariate marginal is binomial
$$
   X_i \sim \BinomialDis(n, p_i)
$$

Not, strictly speaking marginals, but random vectors formed by collapsing
categories are multinomial.  If $A_1$, $\ldots$, $A_m$ is a partition
of the set $\{ 1, \ldots, k \}$ and
\begin{align*}
   Y_j & = \sum_{i \in A_j} X_i, \qquad j = 1, \ldots, m
   \\
   q_j & = \sum_{i \in A_j} p_i, \qquad j = 1, \ldots, m
\end{align*}
then the random vector $\boldY$ has a $\MultinomialDis(n, \boldq)$
distribution.

\paragraph{Conditional Distributions}

If $\{ i_1, \ldots, i_m \}$ and $\{ i_{m + 1}, \ldots, i_k \}$ partition
the set $\{ 1, \ldots, k \}$, then the conditional distribution of
$X_{i_1}$, $\ldots$, $X_{i_m}$ given $X_{i_{m + 1}}$, $\ldots$, $X_{i_k}$
is $\MultinomialDis(n - X_{i_{m + 1}} - \cdots - X_{i_k}, \boldq)$,
where the parameter vector $\boldq$ has components
$$
   q_j = \frac{p_{i_j}}{p_{i_1} + \cdots + p_{i_m}}, \qquad j = 1, \ldots, m
$$

\paragraph{Relation to Other Distributions}

\begin{itemize}
\item Each marginal of a multinomial is binomial.
\item If $X$ is $\BinomialDis(n, p)$, then the vector
$(X, n - X)$ is $\MultinomialDis\bigl(n, (p, 1 - p)\bigr)$.
\end{itemize}

\section{Bivariate Normal Distribution}

\paragraph{Abbreviation} See multivariate normal below.

\paragraph{Type} Continuous.

\paragraph{Rationales} See multivariate normal below.

\paragraph{Parameters}

Real vector $\boldmu$ of dimension $2$, real symmetric positive
semi-definite matrix $\boldM$ of dimension $2 \times 2$ having the
form
$$
   \boldM = \begin{pmatrix} \sigma_1^2 & \rho \sigma_1 \sigma_2
   \\ \rho \sigma_1 \sigma_2 & \sigma_2^2 \end{pmatrix}
$$
where $\sigma_1 > 0$, $\sigma_2 > 0$ and $- 1 < \rho < + 1$.

\paragraph{Sample Space}

The Euclidean space $\real^2$.

\paragraph{Probability Density Function}

\begin{align*}
   f(\boldx)
   & =
   \frac{1}{2 \pi} \det(\boldM)^{- 1 / 2}
   \exp\left(- \tfrac{1}{2} (\boldx - \boldmu)'
   \boldM^{-1} (\boldx - \boldmu)' \right)
   \\
   & =
   \frac{1}{2 \pi \sqrt{1 - \rho^2} \sigma_1 \sigma_2}
   \exp\left(- \frac{1}{2 (1 - \rho^2)}
   \left[
   \left( \frac{x_1 - \mu_1}{\sigma_1} \right)^2
   \right. \right.
   \\
   & \qquad
   \left. \left.
   - 2 \rho
   \left( \frac{x_1 - \mu_1}{\sigma_1} \right)
   \left( \frac{x_2 - \mu_2}{\sigma_2} \right)
   +
   \left( \frac{x_2 - \mu_2}{\sigma_2} \right)^2
   \right]
   \right),
   \qquad \boldx \in \real^2
\end{align*}

\paragraph{Moments}

\begin{align*}
   E(X_i) & = \mu_i, \qquad i = 1, 2
   \\
   \var(X_i) & = \sigma_i^2, \qquad i = 1, 2
   \\
   \cov(X_1, X_2) & = \rho \sigma_1 \sigma_2
   \\
   \cor(X_1, X_2) & = \rho
\end{align*}

\paragraph{Moments (Vector Form)}

\begin{align*}
   E(\boldX) & = \boldmu
   \\
   \var(\boldX) & = \boldM
\end{align*}

\paragraph{Linear Transformations}
See multivariate normal below.

\paragraph{Addition Rule}
See multivariate normal below.

\paragraph{Marginal Distributions}

$X_i$ is $\NormalDis(\mu_i, \sigma_i^2)$ distributed, $i = 1$, 2.

\paragraph{Conditional Distributions}

The conditional distribution of $X_2$ given $X_1$ is
$$
   \NormalDis\Bigl(\mu_2 + \rho \frac{\sigma_2}{\sigma_1} (x_1 - \mu_1),
   (1 - \rho^2) \sigma_2^2 \Bigr)
$$

\section{Multivariate Normal Distribution}

\paragraph{Abbreviation} $\NormalDis(\boldmu, \boldM)$

\paragraph{Type} Continuous.

\paragraph{Rationales}
\begin{itemize}
\item Multivariate analog of the univariate normal distribution.
\item Limiting distribution in the multivariate central limit theorem.
\end{itemize}

\paragraph{Parameters}

Real vector $\boldmu$ of dimension $k$, real symmetric positive
semi-definite matrix $\boldM$ of dimension $k \times k$.

\paragraph{Sample Space}

The Euclidean space $\real^k$.

\paragraph{Probability Density Function}

If $\boldM$ is (strictly) positive definite,
$$
   f(\boldx) = (2 \pi)^{- k / 2} \det(\boldM)^{- 1 / 2}
   \exp\left(- \tfrac{1}{2} (\boldx - \boldmu)'
   \boldM^{-1} (\boldx - \boldmu)' \right),
   \qquad \boldx \in \real^k
$$
Otherwise there is no density ($\boldX$ is concentrated on a hyperplane).

\paragraph{Moments (Vector Form)}

\begin{align*}
   E(\boldX) & = \boldmu
   \\
   \var(\boldX) & = \boldM
\end{align*}

\paragraph{Linear Transformations}
If $\boldX$ is $\NormalDis(\boldmu, \boldM)$ distributed,
then $\boldA \boldX + \boldb$, where $\boldA$ is a constant
matrix and $\boldb$ is a constant vector of dimensions such that the
matrix multiplication and vector addition make sense,
has the $\NormalDis(\boldA \boldmu + \boldb, \boldA \boldM \boldA')$
distribution.

\paragraph{Addition Rule}

If $\boldX_1$, $\ldots$, $\boldX_k$ are independent random vectors,
$\boldX_i$ being $\NormalDis(\boldmu_i, \boldM_i)$
distributed,
then $\boldX_1 + \cdots + \boldX_k$ is
a $\NormalDis(\boldmu_1 + \cdots + \boldmu_k,
\boldM_1 + \cdots + \boldM_k)$ random variable.

\paragraph{Degeneracy}

If there exists a vector $\bolda$ such that $\boldM \bolda = 0$,
then $\var(\bolda' \boldX) = 0$.

\paragraph{Partitioned Vectors and Matrices}

The random vector and parameters are written in \emph{partitioned form}
\begin{subequations}
\begin{align}
   \boldX
   & =
   \begin{pmatrix} \boldX_1 \\ \boldX_2 \end{pmatrix}
   \label{eq:part-x}
   \\
   \boldmu
   & =
   \begin{pmatrix} \boldmu_1 \\ \boldmu_2 \end{pmatrix}
   \label{eq:part-mu}
   \\
   \boldM
   & =
   \begin{pmatrix}
       \boldM_{1 1} & \boldM_{1 2}
       \\
       \boldM_{2 1} & \boldM_2
   \end{pmatrix}
   \label{eq:part-sig}
\end{align}
\end{subequations}
when $\boldX_1$ consists of the first $r$ elements of $\boldX$
and $\boldX_2$ of the other $k - r$ elements and similarly for
$\boldmu_1$ and $\boldmu_2$.

\paragraph{Marginal Distributions}

Every marginal of a multivariate normal is normal (univariate or multivariate
as the case may be).   In partitioned form, the (marginal) distribution
of $\boldX_1$ is $\NormalDis(\boldmu_1, \boldM_{1 1})$.

\paragraph{Conditional Distributions}

Every conditional of a multivariate normal is normal (univariate or
multivariate as the case may be).   In partitioned form, the conditional
distribution of $\boldX_1$ given $\boldX_2$ is
$$
   \NormalDis(
   \boldmu_1 + \boldM_{1 2} \boldM_{2 2}^{-} [\boldX_2 - \boldmu_2],
   \boldM_{1 1} - \boldM_{1 2} \boldM_{2 2}^{-} \boldM_{2 1})
$$
where the notation $\boldM_{2 2}^{-}$ denotes the inverse of the matrix
$\boldM_{2 2}^{-}$ if the matrix is invertible and otherwise any
generalized inverse.

\pagebreak

\section{Chi-Square Distribution}

\paragraph{Abbreviation} $\ChiSqDis(\nu)$ or $\chi^2(\nu)$.

\paragraph{Type} Continuous.

\paragraph{Rationales}

\begin{itemize}
\item Sum of squares of IID standard normal random variables.
\item Sampling distribution of sample variance when data are IID normal.
\end{itemize}

\paragraph{Parameter}

Real number $\nu > 0$ called ``degrees of freedom.''

\paragraph{Sample Space}

The interval $(0, \infty)$ of the real numbers.

\paragraph{Probability Density Function}

$$
   f(x) = \frac{(\tfrac{1}{2})^{\nu / 2}}{\Gamma(\tfrac{\nu}{2})}
   x^{\nu / 2 - 1} e^{- x / 2},
   \qquad 0 < x < \infty.
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \nu \\
   \var(X) & = 2 \nu
\end{align*}

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are independent random variables,
$X_i$ being $\ChiSqDis(\nu_i)$ distributed,
then $X_1 + \cdots + X_k$ is
a $\ChiSqDis(\nu_1 + \cdots + \nu_k)$ random variable.

\paragraph{Normal Approximation}

If $\nu$ is large, then 
$$
   \ChiSqDis(\nu)
   \approx
   \NormalDis(\nu, 2 \nu)
$$

\paragraph{Relation to Other Distributions}

\begin{itemize}
\item $\ChiSqDis(\nu) = \GammaDis(\frac{\nu}{2}, \frac{1}{2})$.
\item If $X$ is $\NormalDis(0, 1)$ distributed, then $X^2$ is
    $\ChiSqDis(1)$ distributed.
\item If $Z$ and $Y$ are independent, $X$ is $\NormalDis(0, 1)$
distributed and $Y$ is $\ChiSqDis(\nu)$ distributed, then
$X / \sqrt{Y / \nu}$ is $t(\nu)$ distributed.
\item If $X$ and $Y$ are independent and are
$\ChiSqDis(\mu)$ and $\ChiSqDis(\nu)$ distributed,
respectively, then
$(X / \mu) / (Y / \nu)$ is $F(\mu, \nu)$ distributed.
\end{itemize}

\section{Student's $t$ Distribution}

\paragraph{Abbreviation} $t(\nu)$.

\paragraph{Type} Continuous.

\paragraph{Rationales}

\begin{itemize}
\item Sampling distribution of pivotal quantity
$\sqrt{n} (\Xbar_n - \mu) / S_n$ when data are IID normal.
\item Marginal for $\mu$ in conjugate prior family for two-parameter
normal data.
\end{itemize}

\paragraph{Parameter}

Real number $\nu > 0$ called ``degrees of freedom.''

\paragraph{Sample Space}

The real numbers.

\paragraph{Probability Density Function}

$$
   f(x)
   =
   \frac{1}{\sqrt{\nu \pi}} \cdot
   \frac{\Gamma(\frac{\nu + 1}{2})}{\Gamma(\frac{\nu}{2})} \cdot
   \frac{1}{\left(1 + \frac{x^2}{\nu}\right)^{(\nu + 1) / 2}},
   \qquad -\infty < x < +\infty
$$

\paragraph{Moments}

If $\nu > 1$, then
$$
   E(X) = 0.
$$
Otherwise the mean does not exist.
If $\nu > 2$, then
$$
   \var(X) = \frac{\nu}{\nu - 2}.
$$
Otherwise the variance does not exist.

\paragraph{Normal Approximation}

If $\nu$ is large, then 
$$
   t(\nu)
   \approx
   \NormalDis(0, 1)
$$

\paragraph{Relation to Other Distributions}

\begin{itemize}
\item If $X$ and $Y$ are independent, $X$ is $\NormalDis(0, 1)$
distributed and $Y$ is $\ChiSqDis(\nu)$ distributed, then
$X / \sqrt{Y / \nu}$ is $t(\nu)$ distributed.
\item If $X$ is $t(\nu)$ distributed, then $X^2$ is $F(1, \nu)$ distributed.
\item $t(1) = \CauchyDis(0, 1)$.
\end{itemize}

\section{Snedecor's $F$ Distribution}

\paragraph{Abbreviation} $F(\mu, \nu)$.

\paragraph{Type} Continuous.

\paragraph{Rationale}

\begin{itemize}
\item Ratio of sums of squares for normal data
(test statistics in regression and analysis of variance).
\end{itemize}

\paragraph{Parameters}

Real numbers $\mu > 0$ and $\nu > 0$ called ``numerator degrees of freedom''
and ``denominator degrees of freedom,'' respectively.

\paragraph{Sample Space}

The interval $(0, \infty)$ of the real numbers.

\paragraph{Probability Density Function}

$$
   f(x)
   =
   \frac{\Gamma(\frac{\mu + \nu}{2}) \mu^{\mu / 2} \nu^{\nu / 2}}
   {\Gamma(\frac{\mu}{2}) \Gamma(\frac{\nu}{2})}
   \cdot
   \frac{x^{\mu / 2 + 1}}{(\mu x + \nu)^{(\mu + \nu) / 2}},
   \qquad 0 < x < +\infty
$$

\paragraph{Moments}

If $\nu > 2$, then
$$
   E(X) = \frac{\nu}{\nu - 2}.
$$
Otherwise the mean does not exist.

\paragraph{Relation to Other Distributions}

\begin{itemize}
\item If $X$ and $Y$ are independent and are
$\ChiSqDis(\mu)$ and $\ChiSqDis(\nu)$ distributed,
respectively, then
$(X / \mu) / (Y / \nu)$ is $F(\mu, \nu)$ distributed.
\item If $X$ is $t(\nu)$ distributed, then $X^2$ is $F(1, \nu)$ distributed.
\end{itemize}

\section{Cauchy Distribution}

\paragraph{Abbreviation} $\CauchyDis(\mu, \sigma)$.

\paragraph{Type} Continuous.

\paragraph{Rationales}

\begin{itemize}
\item Very heavy tailed distribution.
\item Counterexample to law of large numbers.
\end{itemize}

\paragraph{Parameters}

Real numbers $\mu$ and $\sigma > 0$, called the ``location'' and ``scale''
parameter, respectively.

\paragraph{Sample Space}

The real numbers.

\paragraph{Probability Density Function}

$$
   f(x)
   =
   \frac{1}{\pi \sigma} \cdot
   \frac{1}{1 + \left(\frac{x - \mu}{\sigma}\right)^2},
   \qquad -\infty < x < +\infty
$$

\paragraph{Moments}

No moments exist.

\paragraph{Addition Rule}

If $X_1$, $\ldots$, $X_k$ are IID $\CauchyDis(\mu, \sigma)$
random variables, then $\Xbar_n = (X_1 + \cdots + X_k) / n$ is also
$\CauchyDis(\mu, \sigma)$.

\paragraph{Relation to Other Distributions}

\begin{itemize}
\item $t(1) = \CauchyDis(0, 1)$.
\end{itemize}

\section{Laplace Distribution}

\paragraph{Abbreviation} $\text{Laplace}(\mu, \sigma)$.

\paragraph{Type} Continuous.

\paragraph{Rationales} Median is maximum likelihood estimate of location
parameter.

\paragraph{Parameters}

Real numbers $\mu$ and $\sigma > 0$, called the mean and standard deviation,
respectively.

\paragraph{Sample Space}

The real numbers.

\paragraph{Probability Density Function}

$$
   f(x) = \frac{\sqrt{2}}{2 \sigma}
   \exp\left(- \sqrt{2} \left\lvert\frac{x - \mu}{\sigma} \right\rvert \right),
   \qquad - \infty < x < \infty
$$

\paragraph{Moments}

\begin{align*}
   E(X) & = \mu
   \\
   \var(X) & = \sigma^2
\end{align*}

\end{document}



<div class="clearboth"></div>
</div> <!-- end of div main -->

<div id="footer">
<p>
Copyright 2003&mdash;2009 Charles J. Geyer
<p>
Last modified: 09 March 2009 at 1826 hours CDT.
<p>
<a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/">
<img alt="Creative Commons License" style="border-width:0" src="http://i.creativecommons.org/l/by-sa/3.0/88x31.png">
</a>
<br>This 
<!-- Supposed to be this but doesn't validate, well known problem
   see http://infomesh.net/2002/rdfinhtml/
   RDF is XML, doesn't go into validatable HTML without contortions
<span xmlns:dc="http://purl.org/dc/elements/1.1/" href="http://purl.org/dc/dcmitype/" rel="dc:type">work</span> is licensed under a 
   For now just kludge -->
work is licensed under a 
<a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/">Creative Commons Attribution-Share Alike 3.0 License</a>.
</p>
<p>
    <a href="http://validator.w3.org/check?uri=referer">
        <img src="/geyer/5101/valid-html401.png"
        alt="Valid HTML 4.01 Strict" height="31" width="88"></a>

    &nbsp;

    <a href="http://jigsaw.w3.org/css-validator/check?uri=referer">
        <img src="/geyer/5101/vcss-blue.gif"
        alt="Valid CSS!" height="31" width="88"></a>
  </p>
</div>


</body>
</html>

