67 lines
2.9 KiB
R
67 lines
2.9 KiB
R
library(cluster)
|
|
|
|
x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1,
|
|
100 + c(13, 0, 2, 4, 7, 8, 1)),
|
|
c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13,
|
|
47, 64, 48, NA, NA, 44, 65))
|
|
x
|
|
(d <- dist(x,'manhattan'))
|
|
summary(d, na.rm = TRUE) # max = 270
|
|
## First call with "trace" (seg.fault typically later ...):
|
|
try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) )
|
|
## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!!
|
|
## Now: gives the proper error message.
|
|
|
|
## S-plus 6.1.2 (rel.2 for Linux, 2002) gives
|
|
##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10)
|
|
## Problem in .Fortran("clara",: Internal error: data for decrementing
|
|
## ref.count didn't point to a valid arena (0x0), while calling subroutine clara
|
|
|
|
## The large example from clara.R -- made small enough to still provoke
|
|
## the "** dysta2() ... OUT" problem {no longer!}
|
|
x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10,
|
|
51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA,
|
|
9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38,
|
|
54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59,
|
|
-1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56,
|
|
51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3,
|
|
-11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1,
|
|
12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15,
|
|
-3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2,
|
|
23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1,
|
|
9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8),
|
|
ncol = 2)
|
|
str(x) # 88 x 2
|
|
try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem
|
|
## To see error message for > 1 missing:
|
|
try(clara(rbind(NA,x), 2))
|
|
|
|
x <- x[-33,]
|
|
## still had the ** dysta2() .. OUT" problem {no longer!}
|
|
c2 <- clara(x, 2, samples = 12, trace = 3)
|
|
c2. <- clara(x, 2, samples = 12, trace = 1, correct.d=TRUE)
|
|
p2g <- pam(daisy(x,"gower"), k=2, trace = 3)
|
|
if(FALSE) { ## disabled clara(*, "gower") for now (2023-11-30):
|
|
c2g <- clara(x, 2, samples = 12, sampsize=nrow(x), trace = 2, metric = "gower", pamLike=TRUE, correct.d=TRUE)
|
|
(icall <- which(names(c2) == "call"))
|
|
## c2g and p2g are *quite* different !
|
|
table(c2g$clustering,
|
|
p2g$clustering)
|
|
## 1 2
|
|
## 1 40 32
|
|
## 2 15 0 << not *one* pair of {2,2} !?!
|
|
|
|
stopifnot(exprs = {
|
|
all.equal(c2[-icall], c2.[-icall])
|
|
})
|
|
}# no "gower" for now
|
|
|
|
data(xclara)
|
|
suppressWarnings(RNGversion("3.5.0")) # back compatibility of results
|
|
set.seed(123)
|
|
xclara[sample(nrow(xclara), 50),] <- NA
|
|
try( clara(xclara, k = 3) ) #-> "nice" error message {.. first 12 missing obs} :
|
|
## Error in clara(xclara, k = 3) :
|
|
## 50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs
|
|
## --> omit them for clustering!
|