require("splines")

## Bug report PR#16549 - 'bad value from splineDesign'
## Date: Wed, 30 Sep 2015 12:12:47 +0000
## https://bugs.r-project.org/show_bug.cgi?id=16549

## Reporter: roconnor@health.usf.edu  (extended original example code)

kn8.01 <- c(0,0,0,0,1,1,1,1)
m1  <- rbind(c(1, 0,0,0))
m.1 <- rbind(c(0,0,0, 1))
stopifnot(
    ## the first gave (0 0 0 0) instead of m1 :
    all.equal(splineDesign(kn8.01, c(0,2),    outer.ok = TRUE), rbind(m1, 0)),
    all.equal(splineDesign(kn8.01, c( 0,1,2), outer.ok = TRUE), rbind(m1, m.1, 0)),
    all.equal(splineDesign(kn8.01, c(-1,0,1), outer.ok = TRUE), rbind(0, m1, m.1)),
    all.equal(splineDesign(kn8.01, 0, outer.ok = TRUE), m1),
    all.equal(splineDesign(kn8.01, 0), m1),
    TRUE)

## The original fix proposal introduced a new bug, visible here:
S <- splineDesign(c(-3, -3, -2, 0, 2, 3, 3), x= -3:3, outer.ok=TRUE)
## (had a NaN in the lower-right corner)
stopifnot(all.equal(S,
		    rbind(0, c(22,3,0)/45, c(193, 6*23, 9)/360,
			  c(1,3,1)/5,
			  c(9, 6*23, 193)/360,
			  c(0,3,22)/45, 0),
		    tolerance = 1e-14))

chkSum.Ok <- TRUE ## for check
chkSum <- function(knots, n = 1 + 2^9, ord = 4) {
    stopifnot(is.numeric(knots), !is.unsorted(knots), (n.k <- length(knots)) >= ord)
    dk <- diff(rk <- range(uk <- unique(knots)))
    if(dk == 0) dk <- uk[1]/4
    d.x <- dk / (2*n.k)
    ## x: the unique knots
    x <- sort(c(uk, seq.int(min(knots)-d.x, max(knots)+d.x, length.out = n)))
    bb <- splineDesign(knots, x = x, ord = ord, outer.ok = TRUE)
    is.x.in <- knots[ord] <= x & x <= knots[n.k-(ord-1)]
    ##                   ~~~~     ~~~~  same as in splineDesign(*, outer.ok=TRUE)
    ##
### no longer needed:
    ## work around "infelicity" (or not; not sure if to call "bug")
    ## n.RHk := #{duplicated RHS boundary knots}
    ## n.RHk <- match(FALSE, rev(duplicated(knots)))
    ## if(ord > 1 && n.RHk > ord) { ## the (ord == 1) case "works" via spike = 1
    ##     ## <==> knots[n.k -j ] == knots[n.k] for j = 0,1,..,(n.RHk-1)
    ##     ## then spl(x= RHS-knot, knots) == 0, even though "should" be 1
    ##     ## "FIX it up" for here. FIXME: do in splineDesign() or it's C code ?!
    ##     iR <- which(x == knots[n.k])
    ##     ## ncol(bb) == n.k - ord
    ##     j <- n.k - n.RHk # == ncol(bb) - (n.RHk - ord)
    ##     if(any(bb[iR, j] == 0)) bb[iR, j] <- 1
    ## }
    sumB <- rowSums(bb)
    if(any(iBad <- !is.finite(sumB))) {
	chkSum.Ok <<- FALSE ## for check
	cat("** _FIXME_ NON-finite values in sumB: ord = ", ord, "; |knots| =", n.k,"\n")
	cat("knots <- "); dput(knots)
	cat("non-finite at x = "); dput(x[iBad])
    } else if(length(bb)) { # only when bb[] is not 0-dimensional
	eps <- 3*.Machine$double.eps
	stopifnot(abs(1 - sumB[is.x.in]) <= 2*eps, 0 <= sumB+eps, sumB-2*eps <= 1,
		  allow.logical0=TRUE)
	## TODO: now also check derivatives
    }
    invisible(bb)
}

plotSplD <- function(knots, n = 2^10, ord = 4, type = "l",
		     ylim = c(0,1), ylab = "B-splines", ...) {
    stopifnot(is.numeric(knots), !is.unsorted(knots), (n.k <- length(knots)) >= ord)
    dk <- diff(range(uk <- unique(knots)))
    if(dk == 0) dk <- uk[1]/4
    d.x <- dk / (2*n.k)
    ## x: will contain the unique knots uk
    x <- sort(c(uk, seq.int(min(knots)-d.x, max(knots)+d.x, length.out = n)))
    bb <- splineDesign(knots, x = x, ord = ord, outer.ok = TRUE)
    matplot(x, bb, type=type, ylim=ylim, ylab=ylab, ...)
    sumB <- rowSums(bb)
    abline(v = knots, lty = 3, col = "light gray")
    abline(v = knots[c(ord, n.k-(ord-1))], lty = 3, col = "gray10")
    lines(x, sumB, col = adjustcolor("red", 0.4), lwd = 3)
    abline(h=1, lty=2, col = adjustcolor(1, 0.4), lwd = 2)
    ## lty, col,: from matplot()
    legend(mean(x), 0.98, legend = paste0("B_", 1:ncol(bb)), lty=1:5, col=1:6,
	   bty = "n", xjust = 0.5)
    invisible(list(x=x, splineDesign = bb))
}

if(!dev.interactive(orNone=TRUE)) pdf("spline-tst.pdf")

plotSplD(kn8.01)
chkSum  (kn8.01)

## from ../man/splineDesign.Rd :
knots <- c(1,1.8,3:5,6.5,7,8.1,9.2,10)  # 10 => 10-4 = 6 Basis splines
str(plotSplD(knots))       # cubic     splines, adding to 1 in [ 4, 7 ]
str(plotSplD(knots, ord=3))# quadratic splines, adding to 1 in [ 3, 8.1]
str(plotSplD(knots, ord=2))# linear    splines, adding to 1 in [1.8,9.2]
str(plotSplD(knots, ord=1))# constant  splines, adding to 1 in [1, 10]

chkSum(knots)
chkSum(knots, ord=3)
chkSum(knots, ord=2)
chkSum(knots, ord=1)

## cases that failed too {Linux lynne 4.1.6 x86_64}
chkSum(c(1:5, 9,9),     ord=2) # ok for ord in {1,3,4}
chkSum(c(1:4, 9,9,9),   ord=3)
chkSum(c(1:3, 9,9,9,9), ord=4)
## These failed in R <= 3.2.2 (but not after first fix):
chkSum(c(0,0,0,0, 1:3), ord=4)
chkSum(c(0,0,0,   1:4), ord=3)
chkSum(c(0,0,     1:5), ord=2)

## This should be symmetric, but was not;
## the graphic is maybe most convincing:
k6.01 <- c(0,0,0, 1,1,1)
round(with(plotSplD(k6.01, ord=2, n=16), cbind(x, splineDesign)), 4)
x8 <- (0:8)/8
sp8 <- splineDesign(k6.01, x=x8, ord=2)
print.table(8*sp8, zero.print=".")
stopifnot(all.equal(8*sp8,
		    cbind(0, 8:0, 0:8, 0), tol = 1e-14))
## or just
splineDesign(k6.01, x=0:1, ord=2)
##  0 1 0 0
##  0 0 1 0 --- [finally !]
stopifnot(identical(cbind(0, diag(2), 0),
		    splineDesign(k6.01, x=0:1, ord=2)))

## Further:
for(k in 0:8)
    chkSum(c(0:k, 9,9,9),     ord=2)
for(k in 0:8)
    chkSum(c(0:k, 9,9,9,9),   ord=3)
for(k in 0:8)
    chkSum(c(0:k, 9,9,9,9,9), ord=4)

## look at two examples
r  <- plotSplD(c(0:4, 8.9,9,9,9), ord=3)# B_6 is narrow spike
r. <- plotSplD(c(0:4,  9, 9,9,9), ord=3)# B_6 diverged to all zero.
if(interactive())
    round(with(r., cbind(x, splineDesign)), 4)
stopifnot(r.$splineDesign[, 6] == 0)
## one could argue that B_6 should also have finite area, and hence be "a delta".
## ==> sum_j B_j(x = 9) would then be Inf or undefined ..
## more sensible: B_6 should be omitted and should have B_5(9) == 1
## now implemented:
stopifnot(identical(c(0, 0, 0, 0, 1, 0),
		    with(r., splineDesign[x == 9,])))

## Everything is fine, if  left-right  mirrored / reversed :
r03 <- plotSplD(c(0,0,0,0,  1:5), ord=3)
## here, B_1 is "delta" and should rather be omitted
stopifnot(identical(c(0, 1, 0, 0, 0, 0),
		    with(r03, splineDesign[x == 0,])))
## 0 1 0 0 0 0 -- as it should be ..
round(with(r03, cbind(x, splineDesign)[50:60,]), 4)

r04 <- plotSplD(c(0,0,0,0,  1:5), ord=4)
## here, B_1 is "delta" and should rather be omitted
stopifnot(identical(c(1, 0, 0, 0, 0),
		    with(r04, splineDesign[x == 0,])))
round(with(r04, cbind(x, splineDesign)[50:60,]), 4)

r0.4 <- plotSplD(c(0,0,0, 1:5), ord=4) # basis is nice and correct


set.seed(17)

for(n in 1:1000) {
    if(n %% 50 == 0) cat(sprintf("n = %4d\n",n))
    kn <- sort.int(round(10* rnorm(4 + rpois(1, lambda=4))))
    for(oo in 1:4)
	## if(inherits(r <- tryCatch(chkSum(kn, ord=oo), error=identity), "error"))
	##     cat(r$message, "\n chkSum(", deparse(kn), ", ord = ", oo, ")\n")
	chkSum(kn, ord = oo)
}

## One of the cases with NaN {when used  ( . <= x & x <= . )}:

bb <- chkSum(c(-14, -4, 3, 5, 6, 15, 15))
stopifnot(is.finite(rowSums(bb)))


stopifnot(chkSum.Ok)

proc.time()

###---- "Bug report" (to R-core) from Trevor Hastie ---
###---->  bs(*, Boundary.knots = .)
### needing boundary ajustment for correct extrapolation

## Trevor's Example, slightly modified and extended:
x <- seq(1.5, 8.5, by = 1/4)
set.seed(13)
y <- x + .01*(x - 5)^3 + rnorm(x)

fit0 <- lm(y ~ bs(x, degree=3, knots=4))
fit0.<- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(1,9)))# *NOT* outside
fit1 <- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(1,8)))# warning
fit2 <- lm(y ~ bs(x, degree=3, knots=4, Boundary.knots=c(2,8)))# warning "2 x"

jx <- seq(from=-2,to=12, by=0.1)
p0 <- predict(fit0, list(x=jx))
p0.<- predict(fit0, list(x=jx))
p1 <- predict(fit1, list(x=jx))
p2 <- predict(fit2, list(x=jx))
stopifnot(all.equal(p0, p0.,tol=1e-14),
          all.equal(p0, p1, tol=1e-14),
          all.equal(p0, p2, tol=1e-14))
## ^^  p1 and p2 differed from p0 in R <= 3.2.2
## See numerical fuzz:
all.equal(p0, p0., tol=0)
all.equal(p0, p1,  tol=0)
all.equal(p0, p2,  tol=0)
all.equal(p1, p2,  tol=0)# interestingly almost the same

## formula ==> print for default method
ispl <- with(women, interpSpline( height, weight ))
stopifnot(identical(format(formula(ispl)),
		    "weight ~ height")) ## was wrongly .Primitive(\"~\")(wei...

###------------- Problems for small n ------- want at least good error messages ---

## This gives an error, but not a "human readable" one for k <= 3
## -- now done: the English error message is  "must have at least 'ord'=4 points"
## FIXME: Would like to get degree=0 (constant), 1, 2 interpolation spline here
##        (and could use degree = 0 for both n=0 and n=1) ==> would have *no* error here
for(n in 0:4) {
    cat("n = ", n,": ")
    x <- cumsum(pmax(1, round(10*runif(n)))) # pmax(1,*): x[] must be distinct
    y <- (x - 2)^2 + round(8*rnorm(n))/4
    if(!inherits(sp <- try(interpSpline(x,y)), "try-error")) print(sp)
    cat("-------------------------------\n")
}
## for n=4:
stopifnot(inherits(sp, "polySpline"), is.matrix(sp$coefficients),
          identical(dim(sp$coefficients), c(4L, 4L)))

## This also gives a "hard to read" error _FIXME_
try(ns(1[0])) # n = 0
## Error in splineDesign(Aknots, x, ord) :
##   length of 'derivs' is larger than length of 'x' -- barely ok  + 2 warnings
try(bs(1[0])) # n = 0 :  same error etc as  ns()

(b1 <- bs(pi)) # n = 1 :  works fine
(n1 <- ns(pi)) # n = 1 : now ok; gave error:  "qr.default(.. NA ...)"

##' keep {dim, dimnames} but nothing else :
noAttr <- function(x) `mostattributes<-`(x, list(dim=dim(x), dimnames=dimnames(x)))
stopifnot(
    identical(noAttr(b1), rbind(c(`1`=0, `2`=0, `3`=0))),
    all.equal(noAttr(n1), matrix(0.400891862868637, 1, dimnames=list(NULL,"1")),
	      tol = 5e-15))

d1 <- data.frame(u = 2, Y = 5) ## data set with 1 observation
summary(mbs  <- lm(Y ~ bs(u),           data=d1)) # fine though many coef etc are NA
summary(mbs1 <- lm(Y ~ bs(u, degree=1), data=d1)) # ok
stopifnot(
    identical(coef(mbs), setNames(c(5, NA, NA, NA),
                                  c("(Intercept)", paste0("bs(u)", 1:3))))
    ,
    identical(coef(mbs1), c("(Intercept)" = 5, "bs(u, degree = 1)" = NA)))
if(FALSE)
summary(mbs0 <- lm(Y ~ bs(u, degree=0), data=d1)) # error: degree >= 1

## ns() has no 'degree' argument!
summary(mns  <- lm(Y ~ ns(u),       data=d1)) ## gave Error; now ok
stopifnot(identical(coef(mns),  c("(Intercept)" = 5, "ns(u)" = NA))
	  ## perfect prediction: all residuals == 0 :
	, identical(residuals(mns), c(`1` = 0))
	, identical(residuals(mbs), c(`1` = 0))
	, identical(residuals(mbs1),c(`1` = 0))
	  )


## [Bug 18442] ns() fails when quantiles end up on the boundary  (2022-12-05)
##  =========  ---- 1st example ===> <R>/tests/reg-tests-1e.R  'ns(nn,4)'
##
## a (more extreme) example where bs() is affected similarly:
require(splines)
##
x <- c(rep(0L, 44), 1:10, 2L*(6:15), as.integer(round(1.25^(15:22))))
y <- 10L*x + c(-1L,1L)
B <- bs(x, df = 7)
summary(m <- lm(y ~ B))
stopifnot(exprs = {
    qr(B)$rank == 7
    all.equal(c(0, 0.03265, 20.53, 21.79, 73.27, 519.8, 964.3, 1361),
              unname(coef(m)), tol = 1e-4)
})
## rank was 5, and coef(m) had 3 NA's in R <= 4.2.x