390 lines
14 KiB
R
390 lines
14 KiB
R
# File src/library/stats/tests/nls.R
|
|
# Part of the R package, https://www.R-project.org
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# A copy of the GNU General Public License is available at
|
|
# https://www.R-project.org/Licenses/
|
|
|
|
## tests of nls, especially of weighted fits
|
|
|
|
library(stats)
|
|
options(digits = 5) # to avoid trivial printed differences
|
|
options(useFancyQuotes = FALSE) # avoid fancy quotes in o/p
|
|
options(show.nls.convergence = FALSE) # avoid non-diffable output
|
|
options(warn = 1)
|
|
|
|
have_MASS <- requireNamespace('MASS', quietly = TRUE)
|
|
|
|
pdf("nls-test.pdf")
|
|
|
|
## utility for comparing nls() results: [TODO: use more often below]
|
|
.n <- function(r) r[names(r) != "call"]
|
|
|
|
## selfStart.default() w/ no parameters:
|
|
logist <- deriv( ~Asym/(1+exp(-(x-xmid)/scal)), c("Asym", "xmid", "scal"),
|
|
function(x, Asym, xmid, scal){} )
|
|
logistInit <- function(mCall, LHS, data) {
|
|
xy <- sortedXyData(mCall[["x"]], LHS, data)
|
|
if(nrow(xy) < 3) stop("Too few distinct input values to fit a logistic")
|
|
Asym <- max(abs(xy[,"y"]))
|
|
if (Asym != max(xy[,"y"])) Asym <- -Asym # negative asymptote
|
|
xmid <- NLSstClosestX(xy, 0.5 * Asym)
|
|
scal <- NLSstClosestX(xy, 0.75 * Asym) - xmid
|
|
setNames(c(Asym, xmid, scal),
|
|
mCall[c("Asym", "xmid", "scal")])
|
|
}
|
|
logist <- selfStart(logist, initial = logistInit) ##-> Error in R 1.5.0
|
|
str(logist)
|
|
## with parameters and getInitial():
|
|
logist <- selfStart(logist, initial = logistInit,
|
|
parameters = c("Asym", "xmid", "scal"))
|
|
tools::assertWarning(verbose = TRUE,
|
|
in1 <- getInitial(circumference ~ logist(age, Asym, xmid, scal), Orange)
|
|
) # no warning previously
|
|
## but this then failed, now gives the same warning:
|
|
tools::assertWarning(verbose = TRUE,
|
|
fm <- nls(circumference ~ logist(age, Asym, xmid, scal), Orange)
|
|
)
|
|
## in R 4.1.{0,1,2} gave
|
|
## Error in (attr(object, "initial"))(mCall = mCall, data = data, LHS = LHS, :
|
|
## unused arguments (control = list(.......), trace = FALSE)
|
|
## IGNORE_RDIFF_BEGIN
|
|
coef(summary(fm))
|
|
## IGNORE_RDIFF_END
|
|
|
|
## getInitial.formula() gets selfStart function from environment(formula)
|
|
plogis <- stats::SSlogis
|
|
in2 <- getInitial(circumference ~ plogis(age, Asym, xmid, scal), Orange)
|
|
## in R <= 4.2.x gave Error:
|
|
## no 'getInitial' method found for "function" objects
|
|
rm(plogis)
|
|
|
|
|
|
## lower and upper in algorithm="port"
|
|
set.seed(123)
|
|
x <- runif(200)
|
|
a <- b <- 1; c <- -0.1
|
|
y <- a+b*x+c*x^2+rnorm(200, sd=0.05)
|
|
plot(x,y)
|
|
curve(a+b*x+c*x^2, add = TRUE)
|
|
## IGNORE_RDIFF_BEGIN
|
|
nls(y ~ a+b*x+c*I(x^2), start = c(a=1, b=1, c=0.1), algorithm = "port")
|
|
(fm <- nls(y ~ a+b*x+c*I(x^2), start = c(a=1, b=1, c=0.1),
|
|
algorithm = "port", lower = c(0, 0, 0)))
|
|
## IGNORE_RDIFF_END
|
|
if(have_MASS) {
|
|
print(confint(fm))
|
|
} else message("skipping tests requiring the MASS package")
|
|
|
|
## weighted nls fit
|
|
set.seed(123)
|
|
y <- x <- 1:10
|
|
yeps <- y + rnorm(length(y), sd = 0.01)
|
|
wts <- rep(c(1, 2), length = 10); wts[5] <- 0
|
|
fit0 <- lm(yeps ~ x, weights = wts)
|
|
## IGNORE_RDIFF_BEGIN
|
|
summary(fit0, cor = TRUE)
|
|
cf0 <- coef(summary(fit0))[, 1:2]
|
|
fit <- nls(yeps ~ a + b*x, start = list(a = 0.12345, b = 0.54321),
|
|
weights = wts, trace = TRUE)
|
|
summary(fit, cor = TRUE)
|
|
## IGNORE_RDIFF_END
|
|
stopifnot(all.equal(residuals(fit), residuals(fit0), tolerance = 1e-5,
|
|
check.attributes = FALSE))
|
|
stopifnot(df.residual(fit) == df.residual(fit0))
|
|
stopifnot(all.equal(logLik(fit), logLik(fit0), tolerance = 1e-8))
|
|
cf1 <- coef(summary(fit))[, 1:2]
|
|
## IGNORE_RDIFF_BEGIN
|
|
fit2 <- nls(yeps ~ a + b*x, start = list(a = 0.12345, b = 0.54321),
|
|
weights = wts, trace = TRUE, algorithm = "port")
|
|
summary(fit2, cor = TRUE)
|
|
## IGNORE_RDIFF_END
|
|
cf2 <- coef(summary(fit2))[, 1:2]
|
|
rownames(cf0) <- c("a", "b")
|
|
# expect relative errors ca 2e-08
|
|
stopifnot(all.equal(cf1, cf0, tolerance = 1e-6),
|
|
all.equal(cf1, cf0, tolerance = 1e-6))
|
|
stopifnot(all.equal(residuals(fit2), residuals(fit0), tolerance = 1e5,
|
|
check.attributes = FALSE))
|
|
stopifnot(all.equal(logLik(fit2), logLik(fit0), tolerance = 1e-8))
|
|
|
|
|
|
DNase1 <- subset(DNase, Run == 1)
|
|
DNase1$wts <- rep(8:1, each = 2)
|
|
fm1 <- nls(density ~ SSlogis(log(conc), Asym, xmid, scal),
|
|
data = DNase1, weights = wts)
|
|
summary(fm1)
|
|
|
|
## directly
|
|
fm2 <- nls(density ~ Asym/(1 + exp((xmid - log(conc))/scal)),
|
|
data = DNase1, weights = wts,
|
|
start = list(Asym = 3, xmid = 0, scal = 1))
|
|
summary(fm2)
|
|
stopifnot(all.equal(coef(summary(fm2)), coef(summary(fm1)), tolerance = 1e-6))
|
|
stopifnot(all.equal(residuals(fm2), residuals(fm1), tolerance = 1e-5))
|
|
stopifnot(all.equal(fitted(fm2), fitted(fm1), tolerance = 1e-6))
|
|
fm2a <- nls(density ~ Asym/(1 + exp((xmid - log(conc)))),
|
|
data = DNase1, weights = wts,
|
|
start = list(Asym = 3, xmid = 0))
|
|
anova(fm2a, fm2)
|
|
|
|
## and without using weights
|
|
fm3 <- nls(~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))/scal))),
|
|
data = DNase1, start = list(Asym = 3, xmid = 0, scal = 1))
|
|
summary(fm3)
|
|
stopifnot(all.equal(coef(summary(fm3)), coef(summary(fm1)), tolerance = 1e-6))
|
|
ft <- with(DNase1, density - fitted(fm3)/sqrt(wts))
|
|
stopifnot(all.equal(ft, fitted(fm1), tolerance = 1e-6))
|
|
# sign of residuals is reversed
|
|
r <- with(DNase1, -residuals(fm3)/sqrt(wts))
|
|
all.equal(r, residuals(fm1), tolerance = 1e-5)
|
|
fm3a <- nls(~ sqrt(wts) * (density - Asym/(1 + exp((xmid - log(conc))))),
|
|
data = DNase1, start = list(Asym = 3, xmid = 0))
|
|
anova(fm3a, fm3)
|
|
|
|
## using conditional linearity
|
|
fm4 <- nls(density ~ 1/(1 + exp((xmid - log(conc))/scal)),
|
|
data = DNase1, weights = wts,
|
|
start = list(xmid = 0, scal = 1), algorithm = "plinear")
|
|
summary(fm4)
|
|
cf <- coef(summary(fm4))[c(3,1,2), ]
|
|
rownames(cf)[2] <- "Asym"
|
|
stopifnot(all.equal(cf, coef(summary(fm1)), tolerance = 1e-6,
|
|
check.attributes = FALSE))
|
|
stopifnot(all.equal(residuals(fm4), residuals(fm1), tolerance = 1e-5))
|
|
stopifnot(all.equal(fitted(fm4), fitted(fm1), tolerance = 1e-6))
|
|
fm4a <- nls(density ~ 1/(1 + exp((xmid - log(conc)))),
|
|
data = DNase1, weights = wts,
|
|
start = list(xmid = 0), algorithm = "plinear")
|
|
anova(fm4a, fm4)
|
|
|
|
## using 'port'
|
|
fm5 <- nls(density ~ Asym/(1 + exp((xmid - log(conc))/scal)),
|
|
data = DNase1, weights = wts,
|
|
start = list(Asym = 3, xmid = 0, scal = 1),
|
|
algorithm = "port")
|
|
summary(fm5)
|
|
stopifnot(all.equal(coef(summary(fm5)), coef(summary(fm1)), tolerance = 1e-6))
|
|
stopifnot(all.equal(residuals(fm5), residuals(fm1), tolerance = 1e-5))
|
|
stopifnot(all.equal(fitted(fm5), fitted(fm1), tolerance = 1e-6))
|
|
|
|
## check profiling
|
|
pfm1 <- profile(fm1)
|
|
pfm3 <- profile(fm3)
|
|
for(m in names(pfm1))
|
|
stopifnot(all.equal(pfm1[[m]], pfm3[[m]], tolerance = 1e-5))
|
|
pfm5 <- profile(fm5)
|
|
for(m in names(pfm1))
|
|
stopifnot(all.equal(pfm1[[m]], pfm5[[m]], tolerance = 1e-5))
|
|
if(have_MASS) {
|
|
print(c1 <- confint(fm1))
|
|
print(c4 <- confint(fm4, 1:2))
|
|
stopifnot(all.equal(c1[2:3, ], c4, tolerance = 1e-3))
|
|
}
|
|
|
|
## some low-dimensional examples
|
|
npts <- 1000
|
|
set.seed(1001)
|
|
x <- runif(npts)
|
|
b <- 0.7
|
|
y <- x^b+rnorm(npts, sd=0.05)
|
|
a <- 0.5
|
|
y2 <- a*x^b+rnorm(npts, sd=0.05)
|
|
c <- 1.0
|
|
y3 <- a*(x+c)^b+rnorm(npts, sd=0.05)
|
|
d <- 0.5
|
|
y4 <- a*(x^d+c)^b+rnorm(npts, sd=0.05)
|
|
m1 <- c(y ~ x^b, y2 ~ a*x^b, y3 ~ a*(x+exp(logc))^b)
|
|
s1 <- list(c(b=1), c(a=1,b=1), c(a=1,b=1,logc=0))
|
|
for(p in 1:3) {
|
|
fm <- nls(m1[[p]], start = s1[[p]])
|
|
print(fm)
|
|
if(have_MASS) print(confint(fm))
|
|
fm <- nls(m1[[p]], start = s1[[p]], algorithm = "port")
|
|
print(fm)
|
|
if(have_MASS) print(confint(fm))
|
|
}
|
|
|
|
if(have_MASS) {
|
|
fm <- nls(y2~x^b, start=c(b=1), algorithm="plinear")
|
|
print(confint(profile(fm)))
|
|
fm <- nls(y3 ~ (x+exp(logc))^b, start=c(b=1, logc=0), algorithm="plinear")
|
|
print(confint(profile(fm)))
|
|
}
|
|
|
|
|
|
## more profiling with bounds
|
|
op <- options(digits=3)
|
|
npts <- 10
|
|
set.seed(1001)
|
|
a <- 2
|
|
b <- 0.5
|
|
x <- runif(npts)
|
|
y <- a*x/(1+a*b*x) + rnorm(npts, sd=0.2)
|
|
gfun <- function(a,b,x) {
|
|
if(a < 0 || b < 0) stop("bounds violated")
|
|
a*x/(1+a*b*x)
|
|
}
|
|
m1 <- nls(y ~ gfun(a,b,x), algorithm = "port",
|
|
lower = c(0,0), start = c(a=1, b=1))
|
|
(pr1 <- profile(m1))
|
|
if(have_MASS) print(confint(pr1))
|
|
|
|
gfun <- function(a,b,x) {
|
|
if(a < 0 || b < 0 || a > 1.5 || b > 1) stop("bounds violated")
|
|
a*x/(1+a*b*x)
|
|
}
|
|
m2 <- nls(y ~ gfun(a,b,x), algorithm = "port",
|
|
lower = c(0, 0), upper=c(1.5, 1), start = c(a=1, b=1))
|
|
profile(m2)
|
|
if(have_MASS) print(confint(m2))
|
|
options(op)
|
|
|
|
## scoping problems
|
|
test <- function(trace=TRUE)
|
|
{
|
|
x <- seq(0,5,len=20)
|
|
n <- 1
|
|
y <- 2*x^2 + n + rnorm(x)
|
|
xy <- data.frame(x=x,y=y)
|
|
myf <- function(x,a,b,c) a*x^b+c
|
|
list(with.start=
|
|
nls(y ~ myf(x,a,b,n), data=xy, start=c(a=1,b=1), trace=trace),
|
|
no.start= ## cheap auto-init to 1
|
|
suppressWarnings(
|
|
nls(y ~ myf(x,A,B,n), data=xy)))
|
|
}
|
|
## IGNORE_RDIFF_BEGIN
|
|
t1 <- test()
|
|
## IGNORE_RDIFF_END
|
|
t1$with.start
|
|
##__with.start:
|
|
## failed to find n in 2.2.x
|
|
## found wrong n in 2.3.x
|
|
## finally worked in 2.4.0
|
|
##__no.start: failed in 3.0.2
|
|
## 2018-09 fails on macOS with Accelerate framework.
|
|
stopifnot(all.equal(.n(t1[[1]]), .n(t1[[2]]), check.environment = FALSE))
|
|
rm(a,b)
|
|
t2 <- test(FALSE)
|
|
stopifnot(all.equal(lapply(t1, .n),
|
|
lapply(t2, .n), tolerance = 0.16, # different random error
|
|
check.environment = FALSE))
|
|
|
|
|
|
## list 'start'
|
|
set.seed(101)# (remain independent of above)
|
|
getExpmat <- function(theta, t)
|
|
{
|
|
conc <- matrix(nrow = length(t), ncol = length(theta))
|
|
for(i in 1:length(theta)) conc[, i] <- exp(-theta[i] * t)
|
|
conc
|
|
}
|
|
expsum <- as.vector(getExpmat(c(.05,.005), 1:100) %*% c(1,1))
|
|
expsumNoisy <- expsum + max(expsum) *.001 * rnorm(100)
|
|
expsum.df <-data.frame(expsumNoisy)
|
|
|
|
## estimate decay rates, amplitudes with default Gauss-Newton
|
|
summary (nls(expsumNoisy ~ getExpmat(k, 1:100) %*% sp, expsum.df,
|
|
start = list(k = c(.6,.02), sp = c(1,2))))
|
|
|
|
## didn't work with port in 2.4.1
|
|
summary (nls(expsumNoisy ~ getExpmat(k, 1:100) %*% sp, expsum.df,
|
|
start = list(k = c(.6,.02), sp = c(1,2)),
|
|
algorithm = "port"))
|
|
|
|
|
|
## PR13540
|
|
|
|
x <- runif(200)
|
|
b0 <- c(rep(0,100),runif(100))
|
|
b1 <- 1
|
|
fac <- as.factor(rep(c(0,1), each = 100))
|
|
y <- b0 + b1*x + rnorm(200, sd=0.05)
|
|
# next failed in 2.8.1
|
|
fit <- nls(y~b0[fac] + b1*x, start = list(b0=c(1,1), b1=1),
|
|
algorithm ="port", upper = c(100, 100, 100))
|
|
# next did not "fail" in proposed fix:
|
|
fiB <- nls(y~b0[fac] + b1*x, start = list(b0=c(1,1), b1=101),
|
|
algorithm ="port", upper = c(100, 100, 100),
|
|
control = list(warnOnly=TRUE))# warning ..
|
|
with(fiB$convInfo, ## start par. violates constraints
|
|
stopifnot(isConv == FALSE, stopCode == 300))
|
|
|
|
|
|
## PR#17367 -- nls() quoting non-syntactical variable names
|
|
##
|
|
op <- options(warn = 2)# no warnings allowed from here
|
|
##
|
|
dN <- data.frame('NO [µmol/l]' = c(1,3,8,17), t = 1:4, check.names=FALSE)
|
|
fnN <- `NO [µmol/l]` ~ a + k* exp(t)
|
|
## lm() works, nls() should too
|
|
lm.N <- lm(`NO [µmol/l]` ~ exp(t) , data = dN)
|
|
summary(lm.N) -> slmN
|
|
nm. <- nls(`NO [µmol/l]` ~ a + k*exp(t), start=list(a=0,k=1), data = dN)
|
|
## In R <= 3.4.x : Error in eval(predvars, data, env) : object 'NO' not found
|
|
nmf <- nls(fnN, start=list(a=0,k=1), data = dN)
|
|
## (ditto; gave identical error)
|
|
noC <- function(L) L[-match("call", names(L))]
|
|
stopifnot(all.equal(noC (nm.), noC (nmf)))
|
|
##
|
|
## with list for which as.data.frame() does not work [-> different branch, not using model.frame!]
|
|
## list version (has been valid "forever", still doubtful, rather give error [FIXME] ?)
|
|
lsN <- c(as.list(dN), list(foo="bar")); lsN[["t"]] <- 1:8
|
|
nmL <- nls(`NO [µmol/l]` ~ a + k*exp(t), start=list(a=0,k=1), data = lsN)
|
|
stopifnot(all.equal(coef(nmL), c(a = 5.069866, k = 0.003699669), tol = 4e-7))# seen 4.2e-8
|
|
|
|
## trivial RHS -- should work even w/o 'start='
|
|
fi1 <- nls(y ~ a, start = list(a=1))
|
|
## -> 2 deprecation warnings "length 1 in vector-arithmetic" from nlsModel() in R 3.4.x ..
|
|
options(op) # warnings about missing 'start' ok:
|
|
f.1 <- nls(y ~ a) # failed in R 3.4.x
|
|
stopifnot(all.equal(noC(f.1), noC(fi1)),
|
|
all.equal(coef(f.1), c(a = mean(y))))
|
|
|
|
|
|
##--- New option 'central' for numericDeriv() :
|
|
|
|
## Continuing the pnorm() example from example(numericDeriv):
|
|
|
|
mkEnv <- function(n, from = -3, to = 3) {
|
|
stopifnot(is.numeric(n), n >= 2)
|
|
E <- new.env()
|
|
E$mean <- 0.
|
|
E$sd <- 1.
|
|
E$x <- seq(from, to, length.out = n)
|
|
E
|
|
}
|
|
|
|
pnEnv <- mkEnv(65) # is used inside errE() :
|
|
|
|
## varying eps (very platform dependent?):
|
|
errE <- Vectorize(function(eps, central=FALSE) {
|
|
grad <- attr(numericDeriv(quote(pnorm(x, mean, sd)), c("mean", "sd"),
|
|
pnEnv, eps=eps, central=central), "gradient")
|
|
target <- with(pnEnv, -dnorm(x) * cbind(1, x, deparse.level=0L))
|
|
## return relative error {in the same sense as in all.equal()} :
|
|
sum(abs(target - grad)) / sum(abs(target))
|
|
})
|
|
|
|
curve(errE(x), 1e-9, 1e-4, log="xy", n=512, ylim = c(1.5e-11, 5e-7),
|
|
xlab = quote(epsilon), ylab=quote(errE(epsilon))) -> rex
|
|
axis(1, at = 2^-(52/2), label = quote(sqrt(epsilon[c])), col=4, col.axis=4, line=-1/2)
|
|
axis(1, at = 2^-(52/3), label = quote(epsilon[c]^{1/3}), col=4, col.axis=4, line=-1/2)
|
|
curve(errE(x, central=TRUE), n=512, col=2, add = TRUE) -> rexC
|
|
## IGNORE_RDIFF_BEGIN
|
|
str(xy1 <- approx(rex , xout= sqrt(2^-52)) )
|
|
str(xy2 <- approx(rexC, xout=(2^-52)^(1/3)))
|
|
## IGNORE_RDIFF_END
|
|
lines(xy1, type="h", col=4)
|
|
lines(xy2, type="h", col=4)
|