297 lines
11 KiB
R
297 lines
11 KiB
R
require("splines")
|
|
|
|
## Bug report PR#16549 - 'bad value from splineDesign'
|
|
## Date: Wed, 30 Sep 2015 12:12:47 +0000
|
|
## https://bugs.r-project.org/show_bug.cgi?id=16549
|
|
|
|
## Reporter: roconnor@health.usf.edu (extended original example code)
|
|
|
|
kn8.01 <- c(0,0,0,0,1,1,1,1)
|
|
m1 <- rbind(c(1, 0,0,0))
|
|
m.1 <- rbind(c(0,0,0, 1))
|
|
stopifnot(
|
|
## the first gave (0 0 0 0) instead of m1 :
|
|
all.equal(splineDesign(kn8.01, c(0,2), outer.ok = TRUE), rbind(m1, 0)),
|
|
all.equal(splineDesign(kn8.01, c( 0,1,2), outer.ok = TRUE), rbind(m1, m.1, 0)),
|
|
all.equal(splineDesign(kn8.01, c(-1,0,1), outer.ok = TRUE), rbind(0, m1, m.1)),
|
|
all.equal(splineDesign(kn8.01, 0, outer.ok = TRUE), m1),
|
|
all.equal(splineDesign(kn8.01, 0), m1),
|
|
TRUE)
|
|
|
|
## The original fix proposal introduced a new bug, visible here:
|
|
S <- splineDesign(c(-3, -3, -2, 0, 2, 3, 3), x= -3:3, outer.ok=TRUE)
|
|
## (had a NaN in the lower-right corner)
|
|
stopifnot(all.equal(S,
|
|
rbind(0, c(22,3,0)/45, c(193, 6*23, 9)/360,
|
|
c(1,3,1)/5,
|
|
c(9, 6*23, 193)/360,
|
|
c(0,3,22)/45, 0),
|
|
tolerance = 1e-14))
|
|
|
|
chkSum.Ok <- TRUE ## for check
|
|
chkSum <- function(knots, n = 1 + 2^9, ord = 4) {
|
|
stopifnot(is.numeric(knots), !is.unsorted(knots), (n.k <- length(knots)) >= ord)
|
|
dk <- diff(rk <- range(uk <- unique(knots)))
|
|
if(dk == 0) dk <- uk[1]/4
|
|
d.x <- dk / (2*n.k)
|
|
## x: the unique knots
|
|
x <- sort(c(uk, seq.int(min(knots)-d.x, max(knots)+d.x, length.out = n)))
|
|
bb <- splineDesign(knots, x = x, ord = ord, outer.ok = TRUE)
|
|
is.x.in <- knots[ord] <= x & x <= knots[n.k-(ord-1)]
|
|
## ~~~~ ~~~~ same as in splineDesign(*, outer.ok=TRUE)
|
|
##
|
|
### no longer needed:
|
|
## work around "infelicity" (or not; not sure if to call "bug")
|
|
## n.RHk := #{duplicated RHS boundary knots}
|
|
## n.RHk <- match(FALSE, rev(duplicated(knots)))
|
|
## if(ord > 1 && n.RHk > ord) { ## the (ord == 1) case "works" via spike = 1
|
|
## ## <==> knots[n.k -j ] == knots[n.k] for j = 0,1,..,(n.RHk-1)
|
|
## ## then spl(x= RHS-knot, knots) == 0, even though "should" be 1
|
|
## ## "FIX it up" for here. FIXME: do in splineDesign() or it's C code ?!
|
|
## iR <- which(x == knots[n.k])
|
|
## ## ncol(bb) == n.k - ord
|
|
## j <- n.k - n.RHk # == ncol(bb) - (n.RHk - ord)
|
|
## if(any(bb[iR, j] == 0)) bb[iR, j] <- 1
|
|
## }
|
|
sumB <- rowSums(bb)
|
|
if(any(iBad <- !is.finite(sumB))) {
|
|
chkSum.Ok <<- FALSE ## for check
|
|
cat("** _FIXME_ NON-finite values in sumB: ord = ", ord, "; |knots| =", n.k,"\n")
|
|
cat("knots <- "); dput(knots)
|
|
cat("non-finite at x = "); dput(x[iBad])
|
|
} else if(length(bb)) { # only when bb[] is not 0-dimensional
|
|
eps <- 3*.Machine$double.eps
|
|
stopifnot(abs(1 - sumB[is.x.in]) <= 2*eps, 0 <= sumB+eps, sumB-2*eps <= 1,
|
|
allow.logical0=TRUE)
|
|
## TODO: now also check derivatives
|
|
}
|
|
invisible(bb)
|
|
}
|
|
|
|
plotSplD <- function(knots, n = 2^10, ord = 4, type = "l",
|
|
ylim = c(0,1), ylab = "B-splines", ...) {
|
|
stopifnot(is.numeric(knots), !is.unsorted(knots), (n.k <- length(knots)) >= ord)
|
|
dk <- diff(range(uk <- unique(knots)))
|
|
if(dk == 0) dk <- uk[1]/4
|
|
d.x <- dk / (2*n.k)
|
|
## x: will contain the unique knots uk
|
|
x <- sort(c(uk, seq.int(min(knots)-d.x, max(knots)+d.x, length.out = n)))
|
|
bb <- splineDesign(knots, x = x, ord = ord, outer.ok = TRUE)
|
|
matplot(x, bb, type=type, ylim=ylim, ylab=ylab, ...)
|
|
sumB <- rowSums(bb)
|
|
abline(v = knots, lty = 3, col = "light gray")
|
|
abline(v = knots[c(ord, n.k-(ord-1))], lty = 3, col = "gray10")
|
|
lines(x, sumB, col = adjustcolor("red", 0.4), lwd = 3)
|
|
abline(h=1, lty=2, col = adjustcolor(1, 0.4), lwd = 2)
|
|
## lty, col,: from matplot()
|
|
legend(mean(x), 0.98, legend = paste0("B_", 1:ncol(bb)), lty=1:5, col=1:6,
|
|
bty = "n", xjust = 0.5)
|
|
invisible(list(x=x, splineDesign = bb))
|
|
}
|
|
|
|
if(!dev.interactive(orNone=TRUE)) pdf("spline-tst.pdf")
|
|
|
|
plotSplD(kn8.01)
|
|
chkSum (kn8.01)
|
|
|
|
## from ../man/splineDesign.Rd :
|
|
knots <- c(1,1.8,3:5,6.5,7,8.1,9.2,10) # 10 => 10-4 = 6 Basis splines
|
|
str(plotSplD(knots)) # cubic splines, adding to 1 in [ 4, 7 ]
|
|
str(plotSplD(knots, ord=3))# quadratic splines, adding to 1 in [ 3, 8.1]
|
|
str(plotSplD(knots, ord=2))# linear splines, adding to 1 in [1.8,9.2]
|
|
str(plotSplD(knots, ord=1))# constant splines, adding to 1 in [1, 10]
|
|
|
|
chkSum(knots)
|
|
chkSum(knots, ord=3)
|
|
chkSum(knots, ord=2)
|
|
chkSum(knots, ord=1)
|
|
|
|
## cases that failed too {Linux lynne 4.1.6 x86_64}
|
|
chkSum(c(1:5, 9,9), ord=2) # ok for ord in {1,3,4}
|
|
chkSum(c(1:4, 9,9,9), ord=3)
|
|
chkSum(c(1:3, 9,9,9,9), ord=4)
|
|
## These failed in R <= 3.2.2 (but not after first fix):
|
|
chkSum(c(0,0,0,0, 1:3), ord=4)
|
|
chkSum(c(0,0,0, 1:4), ord=3)
|
|
chkSum(c(0,0, 1:5), ord=2)
|
|
|
|
## This should be symmetric, but was not;
|
|
## the graphic is maybe most convincing:
|
|
k6.01 <- c(0,0,0, 1,1,1)
|
|
round(with(plotSplD(k6.01, ord=2, n=16), cbind(x, splineDesign)), 4)
|
|
x8 <- (0:8)/8
|
|
sp8 <- splineDesign(k6.01, x=x8, ord=2)
|
|
print.table(8*sp8, zero.print=".")
|
|
stopifnot(all.equal(8*sp8,
|
|
cbind(0, 8:0, 0:8, 0), tol = 1e-14))
|
|
## or just
|
|
splineDesign(k6.01, x=0:1, ord=2)
|
|
## 0 1 0 0
|
|
## 0 0 1 0 --- [finally !]
|
|
stopifnot(identical(cbind(0, diag(2), 0),
|
|
splineDesign(k6.01, x=0:1, ord=2)))
|
|
|
|
## Further:
|
|
for(k in 0:8)
|
|
chkSum(c(0:k, 9,9,9), ord=2)
|
|
for(k in 0:8)
|
|
chkSum(c(0:k, 9,9,9,9), ord=3)
|
|
for(k in 0:8)
|
|
chkSum(c(0:k, 9,9,9,9,9), ord=4)
|
|
|
|
## look at two examples
|
|
r <- plotSplD(c(0:4, 8.9,9,9,9), ord=3)# B_6 is narrow spike
|
|
r. <- plotSplD(c(0:4, 9, 9,9,9), ord=3)# B_6 diverged to all zero.
|
|
if(interactive())
|
|
round(with(r., cbind(x, splineDesign)), 4)
|
|
stopifnot(r.$splineDesign[, 6] == 0)
|
|
## one could argue that B_6 should also have finite area, and hence be "a delta".
|
|
## ==> sum_j B_j(x = 9) would then be Inf or undefined ..
|
|
## more sensible: B_6 should be omitted and should have B_5(9) == 1
|
|
## now implemented:
|
|
stopifnot(identical(c(0, 0, 0, 0, 1, 0),
|
|
with(r., splineDesign[x == 9,])))
|
|
|
|
## Everything is fine, if left-right mirrored / reversed :
|
|
r03 <- plotSplD(c(0,0,0,0, 1:5), ord=3)
|
|
## here, B_1 is "delta" and should rather be omitted
|
|
stopifnot(identical(c(0, 1, 0, 0, 0, 0),
|
|
with(r03, splineDesign[x == 0,])))
|
|
## 0 1 0 0 0 0 -- as it should be ..
|
|
round(with(r03, cbind(x, splineDesign)[50:60,]), 4)
|
|
|
|
r04 <- plotSplD(c(0,0,0,0, 1:5), ord=4)
|
|
## here, B_1 is "delta" and should rather be omitted
|
|
stopifnot(identical(c(1, 0, 0, 0, 0),
|
|
with(r04, splineDesign[x == 0,])))
|
|
round(with(r04, cbind(x, splineDesign)[50:60,]), 4)
|
|
|
|
r0.4 <- plotSplD(c(0,0,0, 1:5), ord=4) # basis is nice and correct
|
|
|
|
|
|
set.seed(17)
|
|
|
|
for(n in 1:1000) {
|
|
if(n %% 50 == 0) cat(sprintf("n = %4d\n",n))
|
|
kn <- sort.int(round(10* rnorm(4 + rpois(1, lambda=4))))
|
|
for(oo in 1:4)
|
|
## if(inherits(r <- tryCatch(chkSum(kn, ord=oo), error=identity), "error"))
|
|
## cat(r$message, "\n chkSum(", deparse(kn), ", ord = ", oo, ")\n")
|
|
chkSum(kn, ord = oo)
|
|
}
|
|
|
|
## One of the cases with NaN {when used ( . <= x & x <= . )}:
|
|
|
|
bb <- chkSum(c(-14, -4, 3, 5, 6, 15, 15))
|
|
stopifnot(is.finite(rowSums(bb)))
|
|
|
|
|
|
stopifnot(chkSum.Ok)
|
|
|
|
proc.time()
|
|
|
|
###---- "Bug report" (to R-core) from Trevor Hastie ---
|
|
###----> bs(*, Boundary.knots = .)
|
|
### needing boundary ajustment for correct extrapolation
|
|
|
|
## Trevor's Example, slightly modified and extended:
|
|
x <- seq(1.5, 8.5, by = 1/4)
|
|
set.seed(13)
|
|
y <- x + .01*(x - 5)^3 + rnorm(x)
|
|
|
|
fit0 <- lm(y ~ bs(x, degree=3, knots=4))
|
|
fit0.<- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(1,9)))# *NOT* outside
|
|
fit1 <- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(1,8)))# warning
|
|
fit2 <- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(2,8)))# warning "2 x"
|
|
|
|
jx <- seq(from=-2,to=12, by=0.1)
|
|
p0 <- predict(fit0, list(x=jx))
|
|
p0.<- predict(fit0, list(x=jx))
|
|
p1 <- predict(fit1, list(x=jx))
|
|
p2 <- predict(fit2, list(x=jx))
|
|
stopifnot(all.equal(p0, p0.,tol=1e-14),
|
|
all.equal(p0, p1, tol=1e-14),
|
|
all.equal(p0, p2, tol=1e-14))
|
|
## ^^ p1 and p2 differed from p0 in R <= 3.2.2
|
|
## See numerical fuzz:
|
|
all.equal(p0, p0., tol=0)
|
|
all.equal(p0, p1, tol=0)
|
|
all.equal(p0, p2, tol=0)
|
|
all.equal(p1, p2, tol=0)# interestingly almost the same
|
|
|
|
## formula ==> print for default method
|
|
ispl <- with(women, interpSpline( height, weight ))
|
|
stopifnot(identical(format(formula(ispl)),
|
|
"weight ~ height")) ## was wrongly .Primitive(\"~\")(wei...
|
|
|
|
###------------- Problems for small n ------- want at least good error messages ---
|
|
|
|
## This gives an error, but not a "human readable" one for k <= 3
|
|
## -- now done: the English error message is "must have at least 'ord'=4 points"
|
|
## FIXME: Would like to get degree=0 (constant), 1, 2 interpolation spline here
|
|
## (and could use degree = 0 for both n=0 and n=1) ==> would have *no* error here
|
|
for(n in 0:4) {
|
|
cat("n = ", n,": ")
|
|
x <- cumsum(pmax(1, round(10*runif(n)))) # pmax(1,*): x[] must be distinct
|
|
y <- (x - 2)^2 + round(8*rnorm(n))/4
|
|
if(!inherits(sp <- try(interpSpline(x,y)), "try-error")) print(sp)
|
|
cat("-------------------------------\n")
|
|
}
|
|
## for n=4:
|
|
stopifnot(inherits(sp, "polySpline"), is.matrix(sp$coefficients),
|
|
identical(dim(sp$coefficients), c(4L, 4L)))
|
|
|
|
## This also gives a "hard to read" error _FIXME_
|
|
try(ns(1[0])) # n = 0
|
|
## Error in splineDesign(Aknots, x, ord) :
|
|
## length of 'derivs' is larger than length of 'x' -- barely ok + 2 warnings
|
|
try(bs(1[0])) # n = 0 : same error etc as ns()
|
|
|
|
(b1 <- bs(pi)) # n = 1 : works fine
|
|
(n1 <- ns(pi)) # n = 1 : now ok; gave error: "qr.default(.. NA ...)"
|
|
|
|
##' keep {dim, dimnames} but nothing else :
|
|
noAttr <- function(x) `mostattributes<-`(x, list(dim=dim(x), dimnames=dimnames(x)))
|
|
stopifnot(
|
|
identical(noAttr(b1), rbind(c(`1`=0, `2`=0, `3`=0))),
|
|
all.equal(noAttr(n1), matrix(0.400891862868637, 1, dimnames=list(NULL,"1")),
|
|
tol = 5e-15))
|
|
|
|
d1 <- data.frame(u = 2, Y = 5) ## data set with 1 observation
|
|
summary(mbs <- lm(Y ~ bs(u), data=d1)) # fine though many coef etc are NA
|
|
summary(mbs1 <- lm(Y ~ bs(u, degree=1), data=d1)) # ok
|
|
stopifnot(
|
|
identical(coef(mbs), setNames(c(5, NA, NA, NA),
|
|
c("(Intercept)", paste0("bs(u)", 1:3))))
|
|
,
|
|
identical(coef(mbs1), c("(Intercept)" = 5, "bs(u, degree = 1)" = NA)))
|
|
if(FALSE)
|
|
summary(mbs0 <- lm(Y ~ bs(u, degree=0), data=d1)) # error: degree >= 1
|
|
|
|
## ns() has no 'degree' argument!
|
|
summary(mns <- lm(Y ~ ns(u), data=d1)) ## gave Error; now ok
|
|
stopifnot(identical(coef(mns), c("(Intercept)" = 5, "ns(u)" = NA))
|
|
## perfect prediction: all residuals == 0 :
|
|
, identical(residuals(mns), c(`1` = 0))
|
|
, identical(residuals(mbs), c(`1` = 0))
|
|
, identical(residuals(mbs1),c(`1` = 0))
|
|
)
|
|
|
|
|
|
## [Bug 18442] ns() fails when quantiles end up on the boundary (2022-12-05)
|
|
## ========= ---- 1st example ===> <R>/tests/reg-tests-1e.R 'ns(nn,4)'
|
|
##
|
|
## a (more extreme) example where bs() is affected similarly:
|
|
require(splines)
|
|
##
|
|
x <- c(rep(0L, 44), 1:10, 2L*(6:15), as.integer(round(1.25^(15:22))))
|
|
y <- 10L*x + c(-1L,1L)
|
|
B <- bs(x, df = 7)
|
|
summary(m <- lm(y ~ B))
|
|
stopifnot(exprs = {
|
|
qr(B)$rank == 7
|
|
all.equal(c(0, 0.03265, 20.53, 21.79, 73.27, 519.8, 964.3, 1361),
|
|
unname(coef(m)), tol = 1e-4)
|
|
})
|
|
## rank was 5, and coef(m) had 3 NA's in R <= 4.2.x
|