260 lines
8.9 KiB
Plaintext
260 lines
8.9 KiB
Plaintext
\documentclass{article}
|
|
\usepackage{myVignette}
|
|
\usepackage[authoryear,round]{natbib}
|
|
\bibliographystyle{plainnat}
|
|
%%\VignetteIndexEntry{Comparisons of Least Squares calculation speeds}
|
|
%%\VignetteDepends{Matrix,datasets,stats,utils}
|
|
\begin{document}
|
|
\SweaveOpts{engine=R,eps=FALSE,pdf=TRUE,width=5,height=3,strip.white=true,keep.source=TRUE}
|
|
\setkeys{Gin}{width=\textwidth}
|
|
\title{Comparing Least Squares Calculations}
|
|
\author{Douglas Bates\\R Development Core Team\\\email{Douglas.Bates@R-project.org}}
|
|
\date{\today}
|
|
\maketitle
|
|
\begin{abstract}
|
|
Many statistics methods require one or more least squares problems
|
|
to be solved. There are several ways to perform this calculation,
|
|
using objects from the base R system and using objects in the
|
|
classes defined in the \code{Matrix} package.
|
|
|
|
We compare the speed of some of these methods on a very small
|
|
example and on a example for which the model matrix is large and
|
|
sparse.
|
|
\end{abstract}
|
|
<<preliminaries, echo=FALSE>>=
|
|
options(width=75)
|
|
library(stats) # for R_DEFAULT_PACKAGES=NULL
|
|
library(utils) # ditto
|
|
@
|
|
|
|
\section{Linear least squares calculations}
|
|
\label{sec:LeastSquares}
|
|
|
|
Many statistical techniques require least squares solutions
|
|
\begin{equation}
|
|
\label{eq:LeastSquares}
|
|
\widehat{\bm{\beta}}=
|
|
\arg\min_{\bm{\beta}}\left\|\bm{y}-\bX\bm{\beta}\right\|^2
|
|
\end{equation}
|
|
where $\bX$ is an $n\times p$ model matrix ($p\leq n$), $\bm{y}$ is
|
|
$n$-dimensional and $\bm{\beta}$ is $p$ dimensional. Most statistics
|
|
texts state that the solution to (\ref{eq:LeastSquares}) is
|
|
\begin{equation}
|
|
\label{eq:XPX}
|
|
\widehat{\bm{\beta}}=\left(\bX\trans\bX\right)^{-1}\bX\trans\bm{y}
|
|
\end{equation}
|
|
when $\bX$ has full column rank (i.e. the columns of $\bX$ are
|
|
linearly independent) and all too frequently it is calculated in
|
|
exactly this way.
|
|
|
|
|
|
\subsection{A small example}
|
|
\label{sec:smallLSQ}
|
|
|
|
As an example, let's create a model matrix, \code{mm}, and corresponding
|
|
response vector, \code{y}, for a simple linear regression model using
|
|
the \code{Formaldehyde} data.
|
|
<<modelMatrix>>=
|
|
data(Formaldehyde, package = "datasets")
|
|
str(Formaldehyde)
|
|
(m <- cbind(1, Formaldehyde$carb))
|
|
(yo <- Formaldehyde$optden)
|
|
@
|
|
Using \code{t} to evaluate
|
|
the transpose, \code{solve} to take an inverse, and the \code{\%*\%}
|
|
operator for matrix multiplication, we can translate \ref{eq:XPX} into
|
|
the \Slang{} as
|
|
<<naiveCalc>>=
|
|
solve(t(m) %*% m) %*% t(m) %*% yo
|
|
@
|
|
|
|
On modern computers this calculation is performed so quickly that it cannot
|
|
be timed accurately in \RR{}
|
|
\footnote{From R version 2.2.0, \code{system.time()} has default argument
|
|
\code{gcFirst = TRUE} which is assumed and relevant for all subsequent timings}
|
|
<<timedNaive>>=
|
|
system.time(solve(t(m) %*% m) %*% t(m) %*% yo)
|
|
@
|
|
and it provides essentially the same results as the standard
|
|
\code{lm.fit} function that is called by \code{lm}.
|
|
<<catNaive>>=
|
|
dput(c(solve(t(m) %*% m) %*% t(m) %*% yo))
|
|
dput(unname(lm.fit(m, yo)$coefficients))
|
|
@
|
|
%$
|
|
|
|
\subsection{A large example}
|
|
\label{sec:largeLSQ}
|
|
|
|
For a large, ill-conditioned least squares problem, such as that
|
|
described in \citet{koen:ng:2003}, the literal translation of
|
|
(\ref{eq:XPX}) does not perform well.
|
|
<<KoenNg>>=
|
|
library(Matrix)
|
|
data(KNex, package = "Matrix")
|
|
y <- KNex$y
|
|
mm <- as(KNex$mm, "matrix") # full traditional matrix
|
|
dim(mm)
|
|
system.time(naive.sol <- solve(t(mm) %*% mm) %*% t(mm) %*% y)
|
|
@
|
|
|
|
Because the calculation of a ``cross-product'' matrix, such as
|
|
$\bX\trans\bX$ or $\bX\trans\bm{y}$, is a common operation in
|
|
statistics, the \code{crossprod} function has been provided to do
|
|
this efficiently. In the single argument form \code{crossprod(mm)}
|
|
calculates $\bX\trans\bX$, taking advantage of the symmetry of the
|
|
product. That is, instead of calculating the $712^2=506944$ elements of
|
|
$\bX\trans\bX$ separately, it only calculates the $(712\cdot
|
|
713)/2=253828$ elements in the upper triangle and replicates them in
|
|
the lower triangle. Furthermore, there is no need to calculate the
|
|
inverse of a matrix explicitly when solving a
|
|
linear system of equations. When the two argument form of the \code{solve}
|
|
function is used the linear system
|
|
\begin{equation}
|
|
\label{eq:LSQsol}
|
|
\left(\bX\trans\bX\right) \widehat{\bm{\beta}} = \bX\trans\by
|
|
\end{equation}
|
|
is solved directly.
|
|
|
|
Combining these optimizations we obtain
|
|
<<crossKoenNg>>=
|
|
system.time(cpod.sol <- solve(crossprod(mm), crossprod(mm,y)))
|
|
all.equal(naive.sol, cpod.sol)
|
|
@
|
|
|
|
On this computer (2.0 GHz Pentium-4, 1 GB Memory, Goto's BLAS, in Spring
|
|
2004) the
|
|
crossprod form of the calculation is about four times as fast as the
|
|
naive calculation. In fact, the entire crossprod solution is
|
|
faster than simply calculating $\bX\trans\bX$ the naive way.
|
|
<<xpxKoenNg>>=
|
|
system.time(t(mm) %*% mm)
|
|
@
|
|
|
|
Note that in newer versions of \RR{} and the BLAS library (as of summer
|
|
2007), \RR's \code{\%*\%} is able to detect the many zeros in \code{mm} and
|
|
shortcut many operations, and is hence much faster for such a sparse matrix
|
|
than \code{crossprod} which currently does not make use of such
|
|
optimizations. This is not the case when \RR{} is linked against an
|
|
optimized BLAS library such as GOTO or ATLAS.
|
|
%%
|
|
Also, for fully dense matrices, \code{crossprod()} indeed remains faster
|
|
(by a factor of two, typically) independently of the BLAS library:
|
|
<<fullMatrix_crossprod>>=
|
|
fm <- mm
|
|
set.seed(11)
|
|
fm[] <- rnorm(length(fm))
|
|
system.time(c1 <- t(fm) %*% fm)
|
|
system.time(c2 <- crossprod(fm))
|
|
stopifnot(all.equal(c1, c2, tol = 1e-12))
|
|
@ % using stopifnot(.) to save output
|
|
|
|
|
|
\subsection{Least squares calculations with Matrix classes}
|
|
\label{sec:MatrixLSQ}
|
|
|
|
The \code{crossprod} function applied to a single matrix takes
|
|
advantage of symmetry when calculating the product but does not retain
|
|
the information that the product is symmetric (and positive
|
|
semidefinite). As a result the solution of (\ref{eq:LSQsol}) is
|
|
performed using general linear system solver based on an LU
|
|
decomposition when it would be faster, and more stable numerically, to
|
|
use a Cholesky decomposition. The Cholesky decomposition could be used
|
|
but it is rather awkward
|
|
<<naiveChol>>=
|
|
system.time(ch <- chol(crossprod(mm)))
|
|
system.time(chol.sol <-
|
|
backsolve(ch, forwardsolve(ch, crossprod(mm, y),
|
|
upper = TRUE, trans = TRUE)))
|
|
stopifnot(all.equal(chol.sol, naive.sol))
|
|
@
|
|
|
|
The \code{Matrix} package uses the S4 class system
|
|
\citep{R:Chambers:1998} to retain information on the structure of
|
|
matrices from the intermediate calculations. A general matrix in
|
|
dense storage, created by the \code{Matrix} function, has class
|
|
\code{"dgeMatrix"} but its cross-product has class \code{"dpoMatrix"}.
|
|
The \code{solve} methods for the \code{"dpoMatrix"} class use the
|
|
Cholesky decomposition.
|
|
<<MatrixKoenNg>>=
|
|
mm <- as(KNex$mm, "denseMatrix")
|
|
class(crossprod(mm))
|
|
system.time(Mat.sol <- solve(crossprod(mm), crossprod(mm, y)))
|
|
stopifnot(all.equal(naive.sol, unname(as(Mat.sol,"matrix"))))
|
|
@
|
|
|
|
Furthermore, any method that calculates a
|
|
decomposition or factorization stores the resulting factorization with
|
|
the original object so that it can be reused without recalculation.
|
|
<<saveFactor>>=
|
|
xpx <- crossprod(mm)
|
|
xpy <- crossprod(mm, y)
|
|
system.time(solve(xpx, xpy))
|
|
system.time(solve(xpx, xpy)) # reusing factorization
|
|
@
|
|
|
|
The model matrix \code{mm} is sparse; that is, most of the elements of
|
|
\code{mm} are zero. The \code{Matrix} package incorporates special
|
|
methods for sparse matrices, which produce the fastest results of all.
|
|
|
|
<<SparseKoenNg>>=
|
|
mm <- KNex$mm
|
|
class(mm)
|
|
system.time(sparse.sol <- solve(crossprod(mm), crossprod(mm, y)))
|
|
stopifnot(all.equal(naive.sol, unname(as(sparse.sol, "matrix"))))
|
|
@
|
|
|
|
As with other classes in the \code{Matrix} package, the
|
|
\code{dsCMatrix} retains any factorization that has been calculated
|
|
although, in this case, the decomposition is so fast that it is
|
|
difficult to determine the difference in the solution times.
|
|
|
|
<<SparseSaveFactor>>=
|
|
xpx <- crossprod(mm)
|
|
xpy <- crossprod(mm, y)
|
|
system.time(solve(xpx, xpy))
|
|
system.time(solve(xpx, xpy))
|
|
@
|
|
|
|
\subsection*{Session Info}
|
|
|
|
<<sessionInfo, results=tex>>=
|
|
toLatex(sessionInfo())
|
|
@
|
|
|
|
<<from_pkg_sfsmisc, echo=FALSE>>=
|
|
|
|
if(identical(1L, grep("linux", R.version[["os"]]))) { ##----- Linux - only ----
|
|
|
|
Sys.procinfo <- function(procfile)
|
|
{
|
|
l2 <- strsplit(readLines(procfile),"[ \t]*:[ \t]*")
|
|
r <- sapply(l2[sapply(l2, length) == 2],
|
|
function(c2)structure(c2[2], names= c2[1]))
|
|
attr(r,"Name") <- procfile
|
|
class(r) <- "simple.list"
|
|
r
|
|
}
|
|
|
|
Scpu <- Sys.procinfo("/proc/cpuinfo")
|
|
Smem <- Sys.procinfo("/proc/meminfo")
|
|
} # Linux only
|
|
@
|
|
<<Sys_proc_fake, eval=FALSE>>=
|
|
if(identical(1L, grep("linux", R.version[["os"]]))) { ## Linux - only ---
|
|
Scpu <- sfsmisc::Sys.procinfo("/proc/cpuinfo")
|
|
Smem <- sfsmisc::Sys.procinfo("/proc/meminfo")
|
|
print(Scpu[c("model name", "cpu MHz", "cache size", "bogomips")])
|
|
print(Smem[c("MemTotal", "SwapTotal")])
|
|
}
|
|
@
|
|
<<Sys_proc_out, echo=FALSE>>=
|
|
if(identical(1L, grep("linux", R.version[["os"]]))) { ## Linux - only ---
|
|
print(Scpu[c("model name", "cpu MHz", "cache size", "bogomips")])
|
|
print(Smem[c("MemTotal", "SwapTotal")])
|
|
}
|
|
@
|
|
|
|
\bibliography{Matrix}
|
|
\end{document}
|