297 lines
11 KiB
R
Raw Normal View History

2025-01-12 00:52:51 +08:00
require("splines")
## Bug report PR#16549 - 'bad value from splineDesign'
## Date: Wed, 30 Sep 2015 12:12:47 +0000
## https://bugs.r-project.org/show_bug.cgi?id=16549
## Reporter: roconnor@health.usf.edu (extended original example code)
kn8.01 <- c(0,0,0,0,1,1,1,1)
m1 <- rbind(c(1, 0,0,0))
m.1 <- rbind(c(0,0,0, 1))
stopifnot(
## the first gave (0 0 0 0) instead of m1 :
all.equal(splineDesign(kn8.01, c(0,2), outer.ok = TRUE), rbind(m1, 0)),
all.equal(splineDesign(kn8.01, c( 0,1,2), outer.ok = TRUE), rbind(m1, m.1, 0)),
all.equal(splineDesign(kn8.01, c(-1,0,1), outer.ok = TRUE), rbind(0, m1, m.1)),
all.equal(splineDesign(kn8.01, 0, outer.ok = TRUE), m1),
all.equal(splineDesign(kn8.01, 0), m1),
TRUE)
## The original fix proposal introduced a new bug, visible here:
S <- splineDesign(c(-3, -3, -2, 0, 2, 3, 3), x= -3:3, outer.ok=TRUE)
## (had a NaN in the lower-right corner)
stopifnot(all.equal(S,
rbind(0, c(22,3,0)/45, c(193, 6*23, 9)/360,
c(1,3,1)/5,
c(9, 6*23, 193)/360,
c(0,3,22)/45, 0),
tolerance = 1e-14))
chkSum.Ok <- TRUE ## for check
chkSum <- function(knots, n = 1 + 2^9, ord = 4) {
stopifnot(is.numeric(knots), !is.unsorted(knots), (n.k <- length(knots)) >= ord)
dk <- diff(rk <- range(uk <- unique(knots)))
if(dk == 0) dk <- uk[1]/4
d.x <- dk / (2*n.k)
## x: the unique knots
x <- sort(c(uk, seq.int(min(knots)-d.x, max(knots)+d.x, length.out = n)))
bb <- splineDesign(knots, x = x, ord = ord, outer.ok = TRUE)
is.x.in <- knots[ord] <= x & x <= knots[n.k-(ord-1)]
## ~~~~ ~~~~ same as in splineDesign(*, outer.ok=TRUE)
##
### no longer needed:
## work around "infelicity" (or not; not sure if to call "bug")
## n.RHk := #{duplicated RHS boundary knots}
## n.RHk <- match(FALSE, rev(duplicated(knots)))
## if(ord > 1 && n.RHk > ord) { ## the (ord == 1) case "works" via spike = 1
## ## <==> knots[n.k -j ] == knots[n.k] for j = 0,1,..,(n.RHk-1)
## ## then spl(x= RHS-knot, knots) == 0, even though "should" be 1
## ## "FIX it up" for here. FIXME: do in splineDesign() or it's C code ?!
## iR <- which(x == knots[n.k])
## ## ncol(bb) == n.k - ord
## j <- n.k - n.RHk # == ncol(bb) - (n.RHk - ord)
## if(any(bb[iR, j] == 0)) bb[iR, j] <- 1
## }
sumB <- rowSums(bb)
if(any(iBad <- !is.finite(sumB))) {
chkSum.Ok <<- FALSE ## for check
cat("** _FIXME_ NON-finite values in sumB: ord = ", ord, "; |knots| =", n.k,"\n")
cat("knots <- "); dput(knots)
cat("non-finite at x = "); dput(x[iBad])
} else if(length(bb)) { # only when bb[] is not 0-dimensional
eps <- 3*.Machine$double.eps
stopifnot(abs(1 - sumB[is.x.in]) <= 2*eps, 0 <= sumB+eps, sumB-2*eps <= 1,
allow.logical0=TRUE)
## TODO: now also check derivatives
}
invisible(bb)
}
plotSplD <- function(knots, n = 2^10, ord = 4, type = "l",
ylim = c(0,1), ylab = "B-splines", ...) {
stopifnot(is.numeric(knots), !is.unsorted(knots), (n.k <- length(knots)) >= ord)
dk <- diff(range(uk <- unique(knots)))
if(dk == 0) dk <- uk[1]/4
d.x <- dk / (2*n.k)
## x: will contain the unique knots uk
x <- sort(c(uk, seq.int(min(knots)-d.x, max(knots)+d.x, length.out = n)))
bb <- splineDesign(knots, x = x, ord = ord, outer.ok = TRUE)
matplot(x, bb, type=type, ylim=ylim, ylab=ylab, ...)
sumB <- rowSums(bb)
abline(v = knots, lty = 3, col = "light gray")
abline(v = knots[c(ord, n.k-(ord-1))], lty = 3, col = "gray10")
lines(x, sumB, col = adjustcolor("red", 0.4), lwd = 3)
abline(h=1, lty=2, col = adjustcolor(1, 0.4), lwd = 2)
## lty, col,: from matplot()
legend(mean(x), 0.98, legend = paste0("B_", 1:ncol(bb)), lty=1:5, col=1:6,
bty = "n", xjust = 0.5)
invisible(list(x=x, splineDesign = bb))
}
if(!dev.interactive(orNone=TRUE)) pdf("spline-tst.pdf")
plotSplD(kn8.01)
chkSum (kn8.01)
## from ../man/splineDesign.Rd :
knots <- c(1,1.8,3:5,6.5,7,8.1,9.2,10) # 10 => 10-4 = 6 Basis splines
str(plotSplD(knots)) # cubic splines, adding to 1 in [ 4, 7 ]
str(plotSplD(knots, ord=3))# quadratic splines, adding to 1 in [ 3, 8.1]
str(plotSplD(knots, ord=2))# linear splines, adding to 1 in [1.8,9.2]
str(plotSplD(knots, ord=1))# constant splines, adding to 1 in [1, 10]
chkSum(knots)
chkSum(knots, ord=3)
chkSum(knots, ord=2)
chkSum(knots, ord=1)
## cases that failed too {Linux lynne 4.1.6 x86_64}
chkSum(c(1:5, 9,9), ord=2) # ok for ord in {1,3,4}
chkSum(c(1:4, 9,9,9), ord=3)
chkSum(c(1:3, 9,9,9,9), ord=4)
## These failed in R <= 3.2.2 (but not after first fix):
chkSum(c(0,0,0,0, 1:3), ord=4)
chkSum(c(0,0,0, 1:4), ord=3)
chkSum(c(0,0, 1:5), ord=2)
## This should be symmetric, but was not;
## the graphic is maybe most convincing:
k6.01 <- c(0,0,0, 1,1,1)
round(with(plotSplD(k6.01, ord=2, n=16), cbind(x, splineDesign)), 4)
x8 <- (0:8)/8
sp8 <- splineDesign(k6.01, x=x8, ord=2)
print.table(8*sp8, zero.print=".")
stopifnot(all.equal(8*sp8,
cbind(0, 8:0, 0:8, 0), tol = 1e-14))
## or just
splineDesign(k6.01, x=0:1, ord=2)
## 0 1 0 0
## 0 0 1 0 --- [finally !]
stopifnot(identical(cbind(0, diag(2), 0),
splineDesign(k6.01, x=0:1, ord=2)))
## Further:
for(k in 0:8)
chkSum(c(0:k, 9,9,9), ord=2)
for(k in 0:8)
chkSum(c(0:k, 9,9,9,9), ord=3)
for(k in 0:8)
chkSum(c(0:k, 9,9,9,9,9), ord=4)
## look at two examples
r <- plotSplD(c(0:4, 8.9,9,9,9), ord=3)# B_6 is narrow spike
r. <- plotSplD(c(0:4, 9, 9,9,9), ord=3)# B_6 diverged to all zero.
if(interactive())
round(with(r., cbind(x, splineDesign)), 4)
stopifnot(r.$splineDesign[, 6] == 0)
## one could argue that B_6 should also have finite area, and hence be "a delta".
## ==> sum_j B_j(x = 9) would then be Inf or undefined ..
## more sensible: B_6 should be omitted and should have B_5(9) == 1
## now implemented:
stopifnot(identical(c(0, 0, 0, 0, 1, 0),
with(r., splineDesign[x == 9,])))
## Everything is fine, if left-right mirrored / reversed :
r03 <- plotSplD(c(0,0,0,0, 1:5), ord=3)
## here, B_1 is "delta" and should rather be omitted
stopifnot(identical(c(0, 1, 0, 0, 0, 0),
with(r03, splineDesign[x == 0,])))
## 0 1 0 0 0 0 -- as it should be ..
round(with(r03, cbind(x, splineDesign)[50:60,]), 4)
r04 <- plotSplD(c(0,0,0,0, 1:5), ord=4)
## here, B_1 is "delta" and should rather be omitted
stopifnot(identical(c(1, 0, 0, 0, 0),
with(r04, splineDesign[x == 0,])))
round(with(r04, cbind(x, splineDesign)[50:60,]), 4)
r0.4 <- plotSplD(c(0,0,0, 1:5), ord=4) # basis is nice and correct
set.seed(17)
for(n in 1:1000) {
if(n %% 50 == 0) cat(sprintf("n = %4d\n",n))
kn <- sort.int(round(10* rnorm(4 + rpois(1, lambda=4))))
for(oo in 1:4)
## if(inherits(r <- tryCatch(chkSum(kn, ord=oo), error=identity), "error"))
## cat(r$message, "\n chkSum(", deparse(kn), ", ord = ", oo, ")\n")
chkSum(kn, ord = oo)
}
## One of the cases with NaN {when used ( . <= x & x <= . )}:
bb <- chkSum(c(-14, -4, 3, 5, 6, 15, 15))
stopifnot(is.finite(rowSums(bb)))
stopifnot(chkSum.Ok)
proc.time()
###---- "Bug report" (to R-core) from Trevor Hastie ---
###----> bs(*, Boundary.knots = .)
### needing boundary ajustment for correct extrapolation
## Trevor's Example, slightly modified and extended:
x <- seq(1.5, 8.5, by = 1/4)
set.seed(13)
y <- x + .01*(x - 5)^3 + rnorm(x)
fit0 <- lm(y ~ bs(x, degree=3, knots=4))
fit0.<- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(1,9)))# *NOT* outside
fit1 <- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(1,8)))# warning
fit2 <- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(2,8)))# warning "2 x"
jx <- seq(from=-2,to=12, by=0.1)
p0 <- predict(fit0, list(x=jx))
p0.<- predict(fit0, list(x=jx))
p1 <- predict(fit1, list(x=jx))
p2 <- predict(fit2, list(x=jx))
stopifnot(all.equal(p0, p0.,tol=1e-14),
all.equal(p0, p1, tol=1e-14),
all.equal(p0, p2, tol=1e-14))
## ^^ p1 and p2 differed from p0 in R <= 3.2.2
## See numerical fuzz:
all.equal(p0, p0., tol=0)
all.equal(p0, p1, tol=0)
all.equal(p0, p2, tol=0)
all.equal(p1, p2, tol=0)# interestingly almost the same
## formula ==> print for default method
ispl <- with(women, interpSpline( height, weight ))
stopifnot(identical(format(formula(ispl)),
"weight ~ height")) ## was wrongly .Primitive(\"~\")(wei...
###------------- Problems for small n ------- want at least good error messages ---
## This gives an error, but not a "human readable" one for k <= 3
## -- now done: the English error message is "must have at least 'ord'=4 points"
## FIXME: Would like to get degree=0 (constant), 1, 2 interpolation spline here
## (and could use degree = 0 for both n=0 and n=1) ==> would have *no* error here
for(n in 0:4) {
cat("n = ", n,": ")
x <- cumsum(pmax(1, round(10*runif(n)))) # pmax(1,*): x[] must be distinct
y <- (x - 2)^2 + round(8*rnorm(n))/4
if(!inherits(sp <- try(interpSpline(x,y)), "try-error")) print(sp)
cat("-------------------------------\n")
}
## for n=4:
stopifnot(inherits(sp, "polySpline"), is.matrix(sp$coefficients),
identical(dim(sp$coefficients), c(4L, 4L)))
## This also gives a "hard to read" error _FIXME_
try(ns(1[0])) # n = 0
## Error in splineDesign(Aknots, x, ord) :
## length of 'derivs' is larger than length of 'x' -- barely ok + 2 warnings
try(bs(1[0])) # n = 0 : same error etc as ns()
(b1 <- bs(pi)) # n = 1 : works fine
(n1 <- ns(pi)) # n = 1 : now ok; gave error: "qr.default(.. NA ...)"
##' keep {dim, dimnames} but nothing else :
noAttr <- function(x) `mostattributes<-`(x, list(dim=dim(x), dimnames=dimnames(x)))
stopifnot(
identical(noAttr(b1), rbind(c(`1`=0, `2`=0, `3`=0))),
all.equal(noAttr(n1), matrix(0.400891862868637, 1, dimnames=list(NULL,"1")),
tol = 5e-15))
d1 <- data.frame(u = 2, Y = 5) ## data set with 1 observation
summary(mbs <- lm(Y ~ bs(u), data=d1)) # fine though many coef etc are NA
summary(mbs1 <- lm(Y ~ bs(u, degree=1), data=d1)) # ok
stopifnot(
identical(coef(mbs), setNames(c(5, NA, NA, NA),
c("(Intercept)", paste0("bs(u)", 1:3))))
,
identical(coef(mbs1), c("(Intercept)" = 5, "bs(u, degree = 1)" = NA)))
if(FALSE)
summary(mbs0 <- lm(Y ~ bs(u, degree=0), data=d1)) # error: degree >= 1
## ns() has no 'degree' argument!
summary(mns <- lm(Y ~ ns(u), data=d1)) ## gave Error; now ok
stopifnot(identical(coef(mns), c("(Intercept)" = 5, "ns(u)" = NA))
## perfect prediction: all residuals == 0 :
, identical(residuals(mns), c(`1` = 0))
, identical(residuals(mbs), c(`1` = 0))
, identical(residuals(mbs1),c(`1` = 0))
)
## [Bug 18442] ns() fails when quantiles end up on the boundary (2022-12-05)
## ========= ---- 1st example ===> <R>/tests/reg-tests-1e.R 'ns(nn,4)'
##
## a (more extreme) example where bs() is affected similarly:
require(splines)
##
x <- c(rep(0L, 44), 1:10, 2L*(6:15), as.integer(round(1.25^(15:22))))
y <- 10L*x + c(-1L,1L)
B <- bs(x, df = 7)
summary(m <- lm(y ~ B))
stopifnot(exprs = {
qr(B)$rank == 7
all.equal(c(0, 0.03265, 20.53, 21.79, 73.27, 519.8, 964.3, 1361),
unname(coef(m)), tol = 1e-4)
})
## rank was 5, and coef(m) had 3 NA's in R <= 4.2.x