\documentclass[12pt, letter]{article}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage[mathscr]{euscript}
\usepackage{tikz}
\usepackage{enumitem}
\usepackage{graphicx}
\usepackage{wrapfig}
\usepackage{verbatim}
\addtolength{\topmargin}{-1.3in}
\addtolength{\textheight}{1.9in}
\addtolength{\oddsidemargin}{-0.7in}
\addtolength{\textwidth}{1.4in}
\pagestyle{plain}
%%%%%%%%%%%%%%%%%%%%%%%%
%%% new commands and math operators
%%%%%%%%%%%%%%%%%%%%%%%%
\DeclareMathOperator{\ad}{ad}
\DeclareMathOperator{\Ad}{Ad}
\newcommand{\CoAd}{\Ad^\dagger}
\DeclareMathOperator{\codim}{codim}
\DeclareMathOperator{\colim}{colim}
\def\d{\mathrm{d}}
\DeclareMathOperator{\Hom}{Hom}
\DeclareMathOperator{\im}{im}
\DeclareMathOperator{\pr}{pr}
\def\spn{\mathrm{span}}
\newcommand{\toto}{\rightrightarrows}
%%%%%%%%%%%%%%%%%%%%%%%%
%%% new theorem environments
%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{definition}
\newtheorem{thm}{Theorem}[section]
\newtheorem{corollary}[thm]{Corollary}
\newtheorem{definition}[thm]{Definition}
\newtheorem{example}[thm]{Example}
\newtheorem{fact}[thm]{Fact}
\newtheorem{facts}[thm]{Facts}
\newtheorem{lemma}[thm]{Lemma}
\newtheorem{notation}[thm]{Notation}
\newtheorem{proposition}[thm]{Proposition}
\newtheorem{remark}[thm]{Remark}
\newtheorem{theorem}[thm]{Theorem}
\newtheorem{exercise}{Exercise}
\numberwithin{equation}{section}
%%%%%%%%%%%%%%%%%%%%%%%%
%%% categories
%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\A}{\textsf{A}}
\newcommand{\B}{\textsf{B}}
\newcommand{\Bi}{\textsf{Bi}}
\newcommand{\C}{\textsf{C}}
\newcommand{\Cat}{\textsf{Cat}}
\newcommand{\CFG}{\textsf{CFG}}
\newcommand{\Diff}{\textsf{Diff}}
\newcommand{\Group}{\textsf{Group}}
\newcommand{\Gpoid}{\textsf{Gpoid}}
\newcommand{\LieGpoid}{\textsf{LieGpoid}}
\newcommand{\LinRel}{\textsf{LinRel}}
\newcommand{\Man}{\textsf{Man}}
\newcommand{\Mat}{\textsf{Mat}}
\newcommand{\Rel}{\textsf{Rel}}
\newcommand{\Set}{\textsf{Set}}
\newcommand{\TwoTerm}{\textsf{2Term}}
\newcommand{\TopGpoid}{\textsf{TopGpoid}}
\newcommand{\Vect}{\textsf{Vect}}
\newcommand{\TwoVect}{\textsf{2Vect}}
%%%%%%%%%%%%%%%%%%%%%%%%
%%% diagram macros
%%%%%%%%%%%%%%%%%%%%%%%%
% giving objects and morphisms of a category
\newcommand{\category}[2]
{
\begin{align*}
& \textbf{objects:}\quad #1 \\
& \textbf{morphisms:}\quad #2
\end{align*}
}
%%%%%%%%%%%%%%%%%%%%%%%%
%%% special font letters
%%%%%%%%%%%%%%%%%%%%%%%%
% blackboard bold
\def\bba{\mathbb A}
\def\bbb{\mathbb B}
\def\bbc{\mathbb C}
\def\bbd{\mathbb D}
\def\bbe{\mathbb E}
\def\bbf{\mathbb F}
\def\bbg{\mathbb G}
\def\bbh{\mathbb H}
\def\bbi{\mathbb I}
\def\bbj{\mathbb J}
\def\bbk{\mathbb K}
\def\bbl{\mathbb L}
\def\bbm{\mathbb M}
\def\bbn{\mathbb N}
\def\bbo{\mathbb O}
\def\bbp{\mathbb P}
\def\bbq{\mathbb Q}
\def\bbr{\mathbb R}
\def\bbs{\mathbb S}
\def\bbt{\mathbb T}
\def\bbu{\mathbb U}
\def\bbv{\mathbb V}
\def\bbw{\mathbb W}
\def\bbx{\mathbb X}
\def\bby{\mathbb Y}
\def\bbz{\mathbb Z}
% calligraphic
\def\cala{\mathcal{A}}
\def\calb{\mathcal{B}}
\def\calc{\mathcal{C}}
\def\cald{\mathcal{D}}
\def\cale{\mathcal{E}}
\def\calf{\mathcal{F}}
\def\calg{\mathcal{G}}
\def\calh{\mathcal{H}}
\def\cali{\mathcal{I}}
\def\CALJ{\mathcal{J}}
\def\calk{\mathcal{K}}
\def\call{\mathcal{L}}
\def\calm{\mathcal{M}}
\def\caln{\mathcal{N}}
\def\calo{\mathcal{O}}
\def\calp{\mathcal{P}}
\def\calq{\mathcal{Q}}
\def\calr{\mathcal{R}}
\def\cals{\mathcal{S}}
\def\calt{\mathcal{T}}
\def\calu{\mathcal{U}}
\def\calv{\mathcal{V}}
\def\calw{\mathcal{W}}
\def\calx{\mathcal{X}}
\def\caly{\mathcal{Y}}
\def\calz{\mathcal{Z}}
% lowercase fraktur
\def\ffa{\mathfrak a}
\def\ffb{\mathfrak b}
\def\ffc{\mathfrak c}
\def\ffd{\mathfrak d}
\def\ffe{\mathfrak e}
\def\fff{\mathfrak f}
\def\ffg{\mathfrak g}
\def\ffh{\mathfrak h}
\def\ffi{\mathfrak i}
\def\ffj{\mathfrak j}
\def\ffk{\mathfrak k}
\def\ffl{\mathfrak l}
\def\ffm{\mathfrak m}
\def\ffn{\mathfrak n}
\def\ffo{\mathfrak o}
\def\ffp{\mathfrak p}
\def\ffq{\mathfrak q}
\def\ffr{\mathfrak r}
\def\ffs{\mathfrak s}
\def\fft{\mathfrak t}
\def\ffu{\mathfrak u}
\def\ffv{\mathfrak v}
\def\ffw{\mathfrak w}
\def\ffx{\mathfrak x}
\def\ffy{\mathfrak y}
\def\ffz{\mathfrak z}
%uppercase fraktur
\def\ffA{\mathfrak A}
\def\ffB{\mathfrak B}
\def\ffC{\mathfrak C}
\def\ffD{\mathfrak D}
\def\ffE{\mathfrak E}
\def\ffF{\mathfrak F}
\def\ffG{\mathfrak G}
\def\ffH{\mathfrak H}
\def\ffI{\mathfrak I}
\def\ffJ{\mathfrak J}
\def\ffK{\mathfrak K}
\def\ffL{\mathfrak L}
\def\ffM{\mathfrak M}
\def\ffN{\mathfrak N}
\def\ffO{\mathfrak O}
\def\ffP{\mathfrak P}
\def\ffQ{\mathfrak Q}
\def\ffR{\mathfrak R}
\def\ffS{\mathfrak S}
\def\ffT{\mathfrak T}
\def\ffU{\mathfrak U}
\def\ffV{\mathfrak V}
\def\ffW{\mathfrak W}
\def\ffX{\mathfrak X}
\def\ffY{\mathfrak Y}
\def\ffZ{\mathfrak Z}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% START OF DOCUMENT
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
\begin{center}
{\bf\large \hspace{0.8in} Linear Algebra Working Group :: Day 3} \vspace{0.1in}
\end{center}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\noindent {\it Note}: All vector spaces will be finite-dimensional vector spaces over the field $\bbr$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Principal component analysis and dimensional reduction}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{definition}\label{Def:SampleMeanMeanForm}
Given an $m\times N$ matrix $X$ of observations, with columns $X_j$ thought of as $m$-dimensional observation vectors, the {\bf sample mean} of the observation vectors is the vector:
\[
M:=\frac{1}{N}\sum_{j=1}^N X_j
\]
The matrix:
\[
B:=\left(\begin{array}{cccc}X_1-M & X_2-M & ... & X_N-M\end{array}\right)
\]
is called the {\bf mean-deviation form} of the matrix $X$ of observations. The columns of the matrix $B$ are often denoted by $\widehat X_j$.
\end{definition}
\begin{remark}
When we think of a matrix of observations $X$, one can think of the columns $X_j$ as one set of observations of $m$ variables. Thus, the rows correspond to variables and the row $X^i$ of the matrix are $N$ observations of the $i^\text{th}$ variable. In these applications, $N$ is usually large.
\end{remark}
\begin{exercise}
Given an $m\times N$ matrix $X$ of observations, show that its mean deviation form has zero sample mean.
\end{exercise}
\begin{definition}\label{Def:SampleVar}
Given a vector of observations $x=(x_1,...,x_N)$, let $\widehat x$ be the average of the observations. The {\bf sample variance} of the observations is the quantity:
\[
\text{Var}(x):=\frac{1}{N-1}\sum_{i=1}^N\left(x_i-\widehat x\right)^2
\]
\end{definition}
\begin{definition}\label{Def:SampleCoVar}
Given two vectors of observations $x=(x_1,...,x_N)$ and $y=(y_1,...,y_K)$ with means $\widehat x$ and $\widehat y$ respectively. The (sample) {\bf covariance} of the observations is the quantity:
\[
\text{Covar}(x,y):=\frac{1}{N-1}\sum_{i=1}^N\left(x_i-\widehat x\right)\left(y_i-\widehat y\right)
\]
When the covariance between $x$ and $y$ is $0$ we say the data $x$ and $y$ are {\bf uncorrelated}.
\end{definition}
\begin{definition}\label{Def:SampleCoVarMat}
Given an $m\times N$ matrix of observations $X$, let $B$ be the mean-deviation form of $X$. The {\bf sample-covariance matrix} of the matrix of observations is the $m\times m$matrix $S$ defined by:
\[
S:=\frac{1}{N-1}BB^T
\]
\end{definition}
\begin{exercise}
Show that the sample covariance matrix of a matrix of observations is positive semidefinite (Use exercise 24 or 31 from Day 2.)
\end{exercise}
\begin{exercise}
Let $X$ be a matrix of observations and let $S$ be the covariance matrix of $X$. Suppose that the matrix $X$ is already in mean-deivation form. Show that the diagonal entry $S_{ii}$ of the matrix $S$ corresponds to the variance of the $i^\text{th}$ row of $X$ viewed as a vector of observations. Show that the off-diagonal entry $S_{ij}$ of the covariance matrix corresponds to the covariance of the $i^{th}$ and $j^{th}$ row of $X$.
\end{exercise}
\begin{definition}\label{Def:TotVar}
Let $X$ be an $m\times N$ matrix of observations and $S$ its covariance matrix. The {\bf total variance} of the observation matrix $X$ is the trace:
\[
\text{tr}(S):=\sum_{j=1}^m S_{jj}
\]
\end{definition}
\begin{exercise}
Let $A$ and $B$ be two $n\times n$ matrices.
\begin{enumerate}
\item Show that $\text{tr}(AB)=\text{tr}(BA)$.
\item Show that if $A$ and $B$ are similar, $\text{tr}(A)=\text{tr}(B)$.
\end{enumerate}
\end{exercise}
\begin{exercise}\label{Ex:PCAMotivation}
Let $X$ be an $m\times N$ matrix of observations already in mean-deviation form and let $P$ be an $m\times m$ orthogonal matrix. Let $Y$ be the $m\times N$ matrix $Y:=P^TX$. Then:
\begin{enumerate}
\item Show that the matrix $Y$ is in mean-devation form.
\item Show that if the covariance matrix of $X$ is $S$, then the covariance matrix of $Y$ is $P^TSP$.
\item Show that the total variances of $X$ and $Y$ are the same.
\end{enumerate}
\end{exercise}
\begin{definition}\label{Def:PrincipalComponents}
Let $X$ be an $m\times N$ matrix of observations and let $S$ be its covariance matrix. Let $S$ have the eigenvalues:
\[
\lambda_1\ge \lambda_2 \ge ... \ge \lambda_m\ge 0
\]
with corresponding orthonormal eigenvectors $u_1, u_2, ..., u_m$. The eigenvector $u_i$ is called the {\bf $i^\text{th}$ principal component} of the data.
\end{definition}
\begin{remark}\label{Rem:PCA}
Principal component analysis consists of taking a matrix of observations $X$ and finding an orthogonal change of variables $Y=P^TX$ that makes the new variables uncorrelated. The reason for requiring orthogonality can be seen in \ref{Ex:PCAMotivation}. We also want to put them in order of decreasing variance for the sake of choosing a convention.
\end{remark}
\begin{exercise}
Let $X$ be a matrix of observations in mean-deviation form and let $S$ be its covariance matrix. Use facts about symmetric matrices from Day 2, to show there exists an orthogonal change of variables $Y=P^TX$ such that the matrix $P$ consists of the principal components and the new covariance matrix shows the new variables are uncorrelated.
\end{exercise}
\begin{exercise}
Consider the following matrix of observations:
\[
X=\left(\begin{array}{cccccc}
19 & 22 & 6 & 3 & 2 & 20 \\
12 & 6 & 9 & 15 & 13 & 5
\end{array}\right)
\]
\begin{enumerate}
\item Convert the matrix of observations to mean-deviation form.
\item Construct the sample covariance matrix.
\item Find the principal components of the data.
\item Perform a change of variables to principal components.
\end{enumerate}
\end{exercise}
\begin{remark}\label{Rem:DimRed}
Given a matrix of observations, dimensional reduction consists of performing a change of variables to principal components and then orthogonally projecting to the subspace with the overlwhelming amount of variance.
\end{remark}
\begin{exercise}
Suppose a $3\times 1000$ matrix of observations $X$ has the following covariance matrix:
\[
S=\left(\begin{array}{ccc}
70 & 0 & 0 \\
0 & 20 & 5\sqrt{3} \\
0 & 5\sqrt{3} & 10
\end{array}\right)
\]
\begin{enumerate}
\item Obtain the principal components.
\item In the new variables, what are the proportions of each of the variances to the total variance?
\item Should we do dimensional reduction? To which subspace should we project?
\item Starting from the matrix of observations $X$, what does it mean to reduce dimensions as in remark \ref{Rem:DimRed}? That is, what observations do we consider when we perform dimensional reduction on $X$?
\end{enumerate}
\end{exercise}
\begin{exercise}
Let $X$ be an $m\times N$ matrix of observations. Let $A:=\frac{1}{\sqrt{N-1}}X^T$. Suppose $A=U\Sigma V^T$ is a singular value decomposition of $A$. Identify the eigenvalues of the covariance matrix and the principal components from the singular value decomposition.
\end{exercise}
\begin{remark}
Computing an SVD is better numerically than computing the eigenvalues of $S$. Thus, reading off the principal components from an SVD, as in the previous exercise, is often done in practice.
\end{remark}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{A brief look at Markov chains}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{definition}\label{Def:Convex}
Given vectors $v_1,...,v_n\in\bbr^n$. A {\bf convex combination} is a vector:
\[
c_1v_1+....+c_nv_n
\]
such that the scalars $c_i$ are nonnegative and sum to $1$. The {\bf convex hull} of the vectors $B=\{v_1,...,v_n\}$ is denoted by $\text{Conv}(B)$ and consists of all convex combinations of
\end{definition}
\begin{exercise}
Let $A$ and $B$ be sets in $\bbr^n$.
\begin{enumerate}
\item Show that a set $B$ is convex if and only if $B=\text{conv}(B)$.
\item If $A\subseteq B$ and $B$ is convex then $\text{conv}(A)\subseteq B$. Find a counterexample in $\bbr^2$ to show that equality need not hold.
\item If $A\subseteq B$ then $\text{conv}(A)\subseteq \text{conv}(B)$
\item $\text{conv}(A)\cup\text{conv}(B)\subseteq \text{conv}(A\cup B)$
\end{enumerate}
\end{exercise}
\begin{exercise}
Consider the standard basis vectors $B=\{e_1,...,e_n\}$ of $\bbr^n$. Describe geometrically, the convex hull $\text{Conv}(B)$. This is often denoted by $\Delta^{n-1}$.
\end{exercise}
\begin{definition}\label{Def:ProbVect}
A vector $x\in\bbr^n$ is a {\bf probability vector} or {\bf stochastic vector} if it consists of nonnegative entries that add up to $1$. That is, a stochastic vector is a vector in the convex hull of the standard basis vectors $e_1,...,e_n$. We'll denote by $\calm^n$ the set of stochastic vectors (that is, the standard simplex $\Delta^{n-1}$ in $\bbr^n$). A {\bf stochastic matrix} is an $n\times n$ matrix $P$ with columns that are stochastic vectors.
\end{definition}
\begin{exercise}
Let $P$ be an $n\times n$ stochastic matrix and let $S$ be the $1\times n$ matrix:
\[
S=\left(\begin{array}{cccc} 1 & 1 & ... & 1\end{array}\right)
\]
\begin{enumerate}
\item Show that $x\in\bbr^n$ is a stochastic vector if and only if $Sx=1$ and all entries are nonnegative.
\item Show that $SP=S$.
\end{enumerate}
\end{exercise}
\begin{exercise}
Let $P$ be a stochastic matrix. Then:
\begin{enumerate}
\item Show that all nonnegative powers of $P$ are also stochastic.
\item Show that if $x$ is a stochastic vector, then so is $Px$.
\item Show that the product of stochastic matrices is a stochastic matrix.
\end{enumerate}
\end{exercise}
\begin{exercise}
Let $P$ be an $n\times n$ stochastic matrix. Show that $1$ is an eigenvalue of $P$. (Hint: Instead of $P$ consider $P^T$ and the vector with every entry equal to $1$.)
\end{exercise}
\begin{exercise}
Give an example of a stochastic matrix where the algebraic multiplicity of the eigenvalue $1$ is greater than $1$.
\end{exercise}
\begin{exercise}
Let $P$ be an $n\times n$ stochastic matrix. Show that the eigenvalues $\lambda$ of $P$ are such that $|\lambda|\le1$. (Hint: Try a proof by contradiction.)
\end{exercise}
\begin{exercise}
Give an example of a stochastic matrix that has negative eigenvalues.
\end{exercise}
\begin{exercise}
Give an example of a stochastic matrix that is not invertible. Thus, a stochastic matrix may have eigenvalues equal to $0$.
\end{exercise}
\begin{exercise}
Give an example of a stochastic matrix that isn't diagonalizable. (Hint: Look for a $3\times 3$ stochastic matrix whose row reduced form for which you can easily determine the geometric and algebraic multiplicities.)
\end{exercise}
\begin{definition}\label{Def:MarkovChain}
A {\bf Markov chain} is a sequence of stochastic vectors $\{x_k\}_{k=0}^\infty$ and a stochastic matrix $P$ such that:
\[
x_{k+1}=Px_k
\]
for all $k\ge 0$. Equivalently, a Markov chain is a pair $\left(P,x_0\right)$ consisting of an $n\times n$ stochastic matrix $P$ and a stochastic vector $x_0\in \calm^n$.
\end{definition}
\begin{remark}
The entries in a stochastic vector are often viewed as possibles states of a system. Thus, it is often called a {\bf state vector}. In a Markov chain, the sequence of state vectors exhibits how the probabilities of being in each of the states is changing over time. This is essentially a discrete dynamical system with initial condition $x_0$ and map the stochastic matrix $P$. Thus, it is natural to be interested in its long-term behavior. We think of the entry of $P$ in position $(i,j)$ as the probability to transition from the state corresponding to position $j$ to the state corresponding to position $i$.
\end{remark}
\begin{definition}\label{Def:SteadyState}
Given a Markov chain $\left(P,x_0\right)$, a {\bf steady state vector} is a fixed point of the matrix $P$. That is, a steady state is a vector $q\in\calm^n$ such that $Pq=q$.
\end{definition}
\begin{exercise}\label{Ex:SteadyState}
Find the steady state vector of the stochastic matrix:
\[
P=\left(\begin{array}{ccc}
.7 & .1 & .3 \\
.2 & .8 & .3 \\
.1 & .1 & .4
\end{array}\right)
\]
\end{exercise}
\begin{exercise}
Show that every $2\times 2$ stochastic matrix has a steady state vector.
\end{exercise}
\begin{definition}\label{Def:Conv}
Given a Markov chain $\left(P,x_0\right)$ and a stochastic vector $q\in\calm^n$. We say the Markov chain {\bf converges} to the vector $q$ if:
\[
\lim_{k\to\infty}\left|\left|P^kx_0-q\right|\right|=0
\]
That is, if the sequence $\{P^kx_0\}_{k=1}^\infty$ converges to $q$ in the usual sense.
\end{definition}
\begin{definition}\label{Def:LongRunProbabilities}
Let $q$ be a steady state vector of a stochastic matrix $P$ such that for an initial condition $x_0\in\calm^n$, the Markov chain $(P,x_0)$ converges to $q$. The entries of $q$ are called {\bf long run probabilities} of the Markov chain.
\end{definition}
\begin{definition}
A stochastic matrix $P$ is {\bf regular} if there exists an integer $k\ge 1$ such that $P^k$ has only positive entries.
\end{definition}
\begin{exercise}
Give an example of a stochastic matrix that is regular but not invertible.
\end{exercise}
\begin{theorem}
Let $P$ be an $n\times n$ regular stochastic matrix, then there exists a steady state vector $q$ of $P$. Furthermore, for any stochastic vector $x_0$, the Markov chain $(P,x_0)$ converges to the steady state $q$.
\end{theorem}
\begin{exercise}
Consider the Markov chain given by the stochastic matrix $P$ of exercise \ref{Ex:SteadyState} and the initial vector $x_0=\left(\begin{array}{ccc}0.1 & 0.8 & 0.1\end{array}\right)^T$ converges to the steady statevector you found.
\end{exercise}
\begin{theorem}\label{Thm:SteadyStateExistence}
Let $P$ be an $n\times n$ stochastic matrix. Then $P$ has a steady state stochastic vector $q\in\calm^n$.
\end{theorem}
\begin{exercise}
Prove Theorem \ref{Thm:SteadyStateExistence} as follows:
\begin{enumerate}
\item Let $P$ be a stochastic matrix. Let $x=(x_1,...,x_n)^T\in\bbr^n$ be any vector and let $Px=(y_1,...,y_n)$. Show that:
\[
|y_1|+...|y_n|\le |x_1|+...+|x_n|
\]
with equality if and only if all the nonzero entries of $x$ have the same sign.
\item Let $v$ be an eigenvector for the eigenvalue $1$. Apply part (1) and construct the steady state stochastic vector from $v$.
\end{enumerate}
\end{exercise}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{LU Factorizations}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{definition}\label{Def:EchelonForm}
A matrix $A$ is in {\bf echelon form} if it has the following three properties:
\begin{enumerate}
\item Any nonzero rows are above zero rows.
\item A row's leading entry is in a column to the right of the leading entry of the row above it.
\item All entries below a leading entry are zero.
\end{enumerate}
\end{definition}
\begin{definition}\label{Def:LU}
Let $A$ be an $m\times n$ matrix. An {\bf LU factorization} is a factorization of the form $A=LU$ where $L$ is an $m\times m$ lower triangular matrix with ones on the diagonal and $U$ is an $m\times n$ echelon form of $A$.
\end{definition}
\begin{example}The following is an $LU$ decomposition:
\[
\left(\begin{array}{cccc}
3 & -7 & -2 & 2 \\
-3 & 5 & 1 & 0 \\
6 & -4 & 0 & -5 \\
-9 & 5 & -5 & 12
\end{array}\right)
= \left(\begin{array}{cccc}
1&0&0&0\\
-1&1&0&0\\
2&-5&1&0\\
-3&8&3&1
\end{array}\right)
\left(\begin{array}{cccc}
3&-7&-2&2\\
0&-2&-1&2\\
0&0&-1&1\\
0&0&0&-1
\end{array}
\right)
\]
\end{example}
\begin{definition}\label{Def:ElMat}
An $n\times n$ {\bf elementary matrix} is a matrix obtained by doing only one of the following to the $n\times n$ identity matrix:
\begin{enumerate}
\item Adding a multiple of a row to another row.
\item Interchanging two rows.
\item Multiplying all entries in a row by a nonzero constant.
\end{enumerate}
\end{definition}
\begin{exercise}
Let $E$ be an $m\times m$ elementary matrix and $A$ be an $m\times n$ matrix. What is the relationship of $EA$ to $A$? What can you guarantee if $E$ is lower triangular?
\end{exercise}
\begin{theorem}\label{Thm:LUEx}
Suppose $A$ is an $m\times n$ matrix for which there exist unit lower-triangular elementary matrices $E_1,...E_p$ such that:
\[
E_p...E_1A
\]
is an echelon form of $A$. Then $U:=E_p...E_1A$ and $L:=(E_p...E_1)^{-1}$ is an LU factorization of $A$.
\end{theorem}
\begin{exercise}
By row-reducing the matrix $A$ use theorem \ref{Thm:LUEx} to find an LU factorization of the following:
\begin{enumerate}
\item $A=\left(\begin{array}{ccc}3&-7&-2\\-3&5&1\\6&-4&0\end{array}\right)$
\item $A=\left(\begin{array}{ccc}2&-6&4\\-4&8&0\\0&-4&6\end{array}\right)$
\end{enumerate}
\end{exercise}
\begin{exercise}\label{Ex:LUsys}
Suppose $A$ is an $m\times n$ matrix and $b\in\bbr^m$ is a vector for which we want to solve the equation $Ax=b$. Suppose $A=LU$ is an LU factorization of $A$.
\begin{enumerate}
\item Turn the equation $Ax=b$ into a pair of equations one that involves $L$ only and one that involves $U$ only.
\item Give an algorithm to solve the system using your answer in (1).
\item Why do you think this is a good approach? Lay has a good brief discussion of the utility of the LU factorization \cite[Sec.~2.5]{L94}.
\end{enumerate}
\end{exercise}
\begin{exercise}
Use the method of exercise \ref{Ex:LUsys} to solve the system $Ax=b$ for each of the matrices there, where $b$ is respectively:
\[
b=\left(\begin{array}{c}-7\\5\\2\end{array}\right)
\qquad
b=\left(\begin{array}{c}2\\-4\\6\end{array}\right)
\]
\end{exercise}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Duals and annihilators}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{definition}\label{Def:Hom}
Let $V$ and $W$ be vector spaces. The vector space of linear maps from $V$ to $W$ is the space:
\[
\Hom(V,W):=\{T:V\to W \mid T \text{ is linear}\}
\]
\end{definition}
\begin{exercise}
Show that if $V$ and $W$ are vector spaces, then $\Hom(V,W)$ really is a vector space.
\end{exercise}
\begin{exercise}
Let $V$ and $W$ be finite-dimensional vector spaces. Show that $\Hom(V,W)$ is isomorphic (as vector spaces) to the space of matrices $\text{Mat}(\dim W,\dim V)$ of $\dim W\times \dim V$ matrices. (Hint: Pick bases).
\end{exercise}
\begin{exercise}
Let $V$ and $W$ be finite-dimensional vector spaces. What is the dimension of $\Hom(V,W)$?
\end{exercise}
\begin{definition}\label{Def:Dual}
Let $V$ be a finite-dimensional vector space. The dual of $V$ is the vector space:
\[
V^*:=\Hom(V,\bbr)=\{T:V\to\bbr\mid T \text{ is linear}\}
\]
\end{definition}
\begin{exercise}\label{Ex:DualBasis}
Let $\{e_1,...,e_n\}$ be a basis of a finite-dimensional vector space $V$. Show there exists a basis $\{e^*_1,....,e^*_n\}$ of the dual $V^*$ such that:
\[
e_i^*(e_j)=\delta_{ij}
=\begin{cases}
1 & \text{ if }i=j\\
0 & \text{ if }i\not=j
\end{cases}
\]
Conclude that $V$ and $V^*$ are isomorphic as vector spaces and that $\dim V=\dim V^*$.
\end{exercise}
\begin{definition}\label{Def:DualBasis}
For a finite-dimensional vector space $V$ with a basis $\{e_i\}$. The basis $\{e^*_i\}$ of exercise \ref{Ex:DualBasis} is called the {\bf dual basis} of $V$ dual to $\{e_i\}$.
\end{definition}
\begin{exercise}
What is the dual basis in the following cases:
\begin{enumerate}
\item $\bbr^n$ with the standard basis vectors $\{e_1,...,e_n\}$.
\item $\bbr[n]$ with the basis $\{1,t,t^2,...,t^n\}$
\end{enumerate}
\end{exercise}
\begin{exercise}
Let $V$ and $W$ be finite-dimensional vector spaces and let $T:V\to W$ be a linear map. \begin{enumerate}
\item Show that for any linear map $\rho:W\to\bbr$, we get a linear map:
\[
T^*\rho:V\to\bbr, \qquad T^*\rho(v):=\rho (T(v))
\]
\item Show that the map:
\[
T^*:W^*\to V^*, \qquad \rho\mapsto T^*\rho
\]
is a linear map.
\end{enumerate}
\end{exercise}
\begin{exercise}
Show that if $T:U\to V$ and $S:V\to W$ are linear maps between finite-dimensional vector spaces then $(S\circ T)^*=T^*\circ S^*$
\end{exercise}
\begin{exercise}
Let $V$ be a finite-dimensional vector space. Consider the double dual $V^{**}=\Hom(\Hom(V,\bbr),\bbr)$.
\begin{enumerate}
\item Let $v\in V$ be a fixed vector. Prove that the evaluation map:
\[
\text{ev}_v:V^*\to\bbr, \qquad \text{ev}_v(\rho):=\rho(v)
\]
is an element of $V^{**}$. That is, prove that $\text{ev}_v$ is a linear map from $V^*$ to $\bbr$.
\item Prove that the map:
\[
N:V\to V^{**}, \qquad N(v):=\text{ev}_v
\]
is a linear isomorphism. (In fact, this map exhibits a natural isomorphism.)
\end{enumerate}
\end{exercise}
\begin{definition}\label{Def:Annih}
Let $V$ be a vector space and let $U\subseteq V$ be a subspace. The {\bf annihilator} of $U$ is the subspace:
\[
U^0:=\{\rho\in V^*\mid \rho|_U\equiv 0\}
\]
\end{definition}
\begin{exercise}
Let $V$ be a vector space with a subspace $U\subseteq V$. Show that the annihilator $U^0$ really is a vector space.
\end{exercise}
\begin{exercise}
Let $V$ be a vector space. What are the annihilators of $\{0\}$ and $V$ respectively?
\end{exercise}
\begin{exercise}
Let $V$ be a finite-dimensional vector space. Suppose $U\subseteq V$ is a subspace different from $\{0\}$. Show that $U^0\not=V^*$.
\end{exercise}
\begin{exercise}
Let $V$ be a finite-dimensional vector space and let $U\subseteq V$ be a subspace. Show that $V=U\oplus U^0$.
\end{exercise}
\begin{exercise}
Let $V$ be a finite-dimensional vector space and let $U$ and $W$ be subspaces of $V$. Show that if $U\subseteq W$, then $W^0\subseteq U^0$.
\end{exercise}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Some multilinear algebra}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{definition}\label{Def:MultiLin}
Let $V_1,...,V_n$ and $U$ be vector spaces. A map:
\[
T:V_1\times...\times V_n\to U
\]
is {\bf multilinear} (we sometimes say {\bf $n$-linear}) if for all fixed $n-1$ vectors $v_1,...,v_{i-1},v_{i+1},...,v_n\in V$, the map:
\[
V_i\to U, \qquad w\mapsto T(v_1,...,v_{i-1},w,v_{i+1},...,v_n)
\]
is linear. That is, if freezing all but one variable yields a linear function.
\end{definition}
\begin{exercise}
Show that the dot product on $\bbr^n$ is a bilinear (that is, a $2$-linear) map.
\end{exercise}
\begin{exercise}\label{Ex:Det}
Make the identification:
\[
\bbr^{n^2}\cong\underbrace{\bbr^n\times...\times\bbr^n}_{n\text{ times}}
\]
So that an element of $\bbr^{n^2}$ is viewed as $n$ column vectors. Show that the determinant:
\[
\det:\bbr^{n^2}\to\bbr, \qquad (v_1,...,v_n)\mapsto \det\big(v_1|...|v_n\big)
\]
is an $n$-linear map. The space of $n$-linear maps will be denoted by $\text{Mult}(V_1\times...\times V_n,U)$.
\end{exercise}
\begin{exercise}
Let $V,W,U$ be finite-dimensional vector spaces with bases $\{v_i\}_{i=1}^m$, $\{w_j\}_{j=1}^n$, $\{u_k\}_{k=1}^l$ and corresponding dual bases $\{v_i^*\}_{i=1}^m$, $\{w_j^*\}_{j=1}^n$, $\{u_k^*\}_{k=1}^l$. Show that the maps:
\[
\phi^k_{i,j}:V\times W\to U, \qquad \phi^k_{i,j}(v,w):=v_i^*(v)w^*_j(w)u_k
\]
are a basis of $\text{Mult}(V\times W,U)$. Conclude that $\dim\text{Mult}(V\times W,U)=\dim V\dim W\dim U$.
\end{exercise}
\begin{definition}
Let $V_1,...,V_n$ and $U$ be vector spaces and $T:V_1\times...\times V_n\to U$ be a multilinear map. The map $T$ is {\bf alternating} if whenever two of its arguments are permuted, the value switches sign. That is, for all $v_1,...v_i,...v_j,...,v_n$ we have:
\[
T(v_1,...,v_i,...,v_j,...,v_n)=-T(v_1,...,v_j,...,v_i,...,v_n)
\]
\end{definition}
\begin{exercise}
Show that the determinant is an alternating $n$-linear map when viewed as in exercise \ref{Ex:Det}
\end{exercise}
\begin{exercise}
Let $\omega:\bbr^n\times \bbr^n\to\bbr$ be a bilinear map. Show there exists a matrix $B$ such that for all $u,v\in V$ we have:
\[
\omega(u,v)=v^TBu
\]
\end{exercise}
\begin{thebibliography}{WWWW}
\bibitem[Hal58]{Hal58} P.R. Halmos. Finite-Dimensional Vector Sapces. Reprinting of the 1958 second edition. {\it Undergraduate Texts in Mathematics}. Springer-Verlag, New York-Heidelberg, 1974.
\bibitem[L94]{L94} D.C. Lay. Linear Algebra and its Applications. Fourth Edition. Addison-Wesley, 2012.
\bibitem[Rom08]{Rom08} S. Roman. Advanced Linear Algebra. Third Edition. {\it Graduate Texts in Mathematics, 135}. Springer, New York, 2008.
\end{thebibliography}
\end{document}