228 lines
9.5 KiB
R
Raw Permalink Normal View History

2025-01-12 00:52:51 +08:00
library(cluster)
## Compare on these:
nms <- c("clustering", "objective", "isolation", "clusinfo", "silinfo")
nm2 <- c("medoids", "id.med", nms)
nm3 <- nm2[- pmatch("obj", nm2)]
(x <- x0 <- cbind(V1 = (-3:4)^2, V2 = c(0:6,NA), V3 = c(1,2,NA,7,NA,8:9,8)))
(px <- pam(x,2, metric="manhattan"))
stopifnot(identical(x,x0))# DUP=FALSE ..
pd <- pam(dist(x,"manhattan"), 2)
px2 <- pam(x,2, metric="manhattan", keep.diss=FALSE, keep.data=FALSE)
pdC <- pam(x,2, metric="manhattan", cluster.only = TRUE)
p1 <- pam(x,1, metric="manhattan")
stopifnot(identical(px[nms], pd[nms]),
identical(px[nms], px2[nms]),
identical(pdC, px2$clustering),
## and for default dist "euclidean":
identical(pam(x, 2)[nms],
pam(dist(x),2)[nms]),
identical(p1[c("id.med", "objective", "clusinfo")],
list(id.med = 6L, objective = c(build=9.25, swap=9.25),
clusinfo = array(c(8, 18, 9.25, 45, 0), dim = c(1, 5),
dimnames=list(NULL, c("size", "max_diss", "av_diss",
"diameter", "separation"))))),
p1$clustering == 1, is.null(p1$silinfo)
)
set.seed(253)
## generate 250 objects, divided into 2 clusters.
x <- rbind(cbind(rnorm(120, 0,8), rnorm(120, 0,8)),
cbind(rnorm(130,50,8), rnorm(130,10,8)))
.proctime00 <- proc.time()
summary(px2 <- pam(x, 2))
pdx <- pam(dist(x), 2)
all.equal(px2[nms], pdx[nms], tol = 1e-12) ## TRUE
pdxK <- pam(dist(x), 2, keep.diss = TRUE)
stopifnot(identical(pdx[nm2], pdxK[nm2]))
spdx <- silhouette(pdx)
summary(spdx)
spdx
postscript("pam-tst.ps")
if(FALSE)
plot(spdx)# the silhouette
## is now identical :
plot(pdx)# failed in 1.7.0 -- now only does silhouette
par(mfrow = 2:1)
## new 'dist' argument for clusplot():
plot(pdx, dist=dist(x))
## but this should work automagically (via eval()) as well:
plot(pdx)
## or this
clusplot(pdx)
data(ruspini)
summary(pr4 <- pam(ruspini, 4))
(pr3 <- pam(ruspini, 3))
(pr5 <- pam(ruspini, 5))
data(votes.repub)
summary(pv3 <- pam(votes.repub, 3))
(pv4 <- pam(votes.repub, 4))
(pv6 <- pam(votes.repub, 6, trace = 3))
cat('Time elapsed: ', proc.time() - .proctime00,'\n')
## re-starting with medoids from pv6 shouldn't change:
pv6. <- pam(votes.repub, 6, medoids = pv6$id.med, trace = 3)
identical(pv6[nm3], pv6.[nm3])
## This example seg.faulted at some point:
d.st <- data.frame(V1= c(9, 12, 12, 15, 9, 9, 13, 11, 15, 10, 13, 13,
13, 15, 8, 13, 13, 10, 7, 9, 6, 11, 3),
V2= c(5, 9, 3, 5, 1, 1, 2, NA, 10, 1, 4, 7,
4, NA, NA, 5, 2, 4, 3, 3, 6, 1, 1),
V3 = c(63, 41, 59, 50, 290, 226, 60, 36, 32, 121, 70, 51,
79, 32, 42, 39, 76, 60, 56, 88, 57, 309, 254),
V4 = c(146, 43, 78, 88, 314, 149, 78, NA, 238, 153, 159, 222,
203, NA, NA, 74, 100, 111, 9, 180, 50, 256, 107))
dd <- daisy(d.st, stand = TRUE)
(r0 <- pam(dd, 5))# cluster 5 = { 23 } -- on single observation
## pam doing the "daisy" computation internally:
r0s <- pam(d.st, 5, stand=TRUE, keep.diss=FALSE, keep.data=FALSE)
(ii <- which(names(r0) %in% c("call","medoids")))
stopifnot(all.equal(r0[-ii], r0s[-ii], tol=1e-14),
identical(r0s$medoids, data.matrix(d.st)[r0$medoids, ]))
## This gave only 3 different medoids -> and seg.fault:
(r5 <- pam(dd, 5, medoids = c(1,3,20,2,5), trace = 2)) # now "fine"
dev.off()
##------------------------ Testing pam() with new "pamonce" argument:
## This is from "next version of Matrix" test-tools-1.R:
showSys.time <- function(expr) {
## prepend 'Time' for R CMD Rdiff
st <- system.time(expr)
writeLines(paste("Time", capture.output(print(st))))
invisible(st)
}
show6Ratios <- function(...) {
stopifnot(length(rgs <- list(...)) == 6,
nchar(ns <- names(rgs)) > 0)
r <- round(cbind(..1, ..2, ..3, ..4, ..5, ..6)[c(1,5),], 5)
dimnames(r) <- list(paste("Time ", rownames(r)), ns)
r
}
n <- 1000
## If not enough cases, all CPU times equals 0.
n <- 500 # for now, and automatic testing
sd <- 0.5
set.seed(13)
n2 <- as.integer(round(n * 1.5))
x <- rbind(cbind(rnorm( n,0,sd), rnorm( n,0,sd)),
cbind(rnorm(n2,5,sd), rnorm(n2,5,sd)),
cbind(rnorm(n2,7,sd), rnorm(n2,7,sd)),
cbind(rnorm(n2,9,sd), rnorm(n2,9,sd)))
## original algorithm
st0 <- showSys.time(pamx <- pam(x, 4, trace.lev=2))# 8.157 0.024 8.233
## bswapPamOnce algorithm
st1 <- showSys.time(pamxonce <- pam(x, 4, pamonce=TRUE, trace.lev=2))# 6.122 0.024 6.181
## bswapPamOnceDistIndice
st2 <- showSys.time(pamxonce2 <- pam(x, 4, pamonce = 2, trace.lev=2))# 4.101 0.024 4.151
## bswapPamSchubert FastPAM1
st3 <- showSys.time(pamxonce3 <- pam(x, 4, pamonce = 3, trace.lev=2))#
## bswapPamSchubert FastPAM2
st4 <- showSys.time(pamxonce4 <- pam(x, 4, pamonce = 4, trace.lev=2))#
## bswapPamSchubert FastPAM2 with linearized memory access
st5 <- showSys.time(pamxonce5 <- pam(x, 4, pamonce = 5, trace.lev=2))#
## bswapPamSchubert FasterPAM
st6 <- showSys.time(pamxonce6 <- pam(x, 4, pamonce = 6, trace.lev=2))#
show6Ratios('6:orig' = st6/st0, '5:orig' = st5/st0, '4:orig' = st4/st0, '3:orig' = st3/st0, '2:orig' = st2/st0, '1:orig' = st1/st0)
## only call element is not equal
(icall <- which(names(pamx) == "call"))
pamx[[icall]]
stopifnot(all.equal(pamx [-icall], pamxonce [-icall]),
all.equal(pamxonce[-icall], pamxonce2[-icall]),
all.equal(pamxonce[-icall], pamxonce3[-icall]),
all.equal(pamxonce[-icall], pamxonce4[-icall]),
all.equal(pamxonce[-icall], pamxonce5[-icall]),
all.equal(pamxonce[-icall], pamxonce6[-icall]))
## Same using specified medoids
(med0 <- 1 + round(n* c(0,1, 2.5, 4)))# lynne (~ 2010, AMD Phenom II X4 925)
st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.071 0.024 13.177
st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.503 0.024 8.578
st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.587 0.025 5.647
st3 <- showSys.time(pamxonce3st <- pam(x, 4, medoids = med0, pamonce=3, trace.lev=2))#
st4 <- showSys.time(pamxonce4st <- pam(x, 4, medoids = med0, pamonce=4, trace.lev=2))#
st5 <- showSys.time(pamxonce5st <- pam(x, 4, medoids = med0, pamonce=5, trace.lev=2))#
st6 <- showSys.time(pamxonce6st <- pam(x, 4, medoids = med0, pamonce=6, trace.lev=2))#
show6Ratios('6:orig' = st6/st0, '5:orig' = st5/st0, '4:orig' = st4/st0, '3:orig' = st3/st0, '2:orig' = st2/st0, '1:orig' = st1/st0)
## only call element is not equal
stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]),
all.equal(pamxoncest[-icall], pamxonce2st[-icall]),
all.equal(pamxoncest[-icall], pamxonce3st[-icall]),
all.equal(pamxoncest[-icall], pamxonce4st[-icall]),
all.equal(pamxoncest[-icall], pamxonce5st[-icall]),
all.equal(pamxoncest[-icall], pamxonce6st[-icall]))
## Different starting values
med0 <- 1:4 # lynne (~ 2010, AMD Phenom II X4 925)
st0 <- showSys.time(pamxst <- pam(x, 4, medoids = med0, trace.lev=2))# 13.416 0.023 13.529
st1 <- showSys.time(pamxoncest <- pam(x, 4, medoids = med0, pamonce=TRUE, trace.lev=2))# 8.384 0.024 8.459
st2 <- showSys.time(pamxonce2st <- pam(x, 4, medoids = med0, pamonce=2, trace.lev=2))# 5.455 0.030 5.520
st3 <- showSys.time(pamxonce3st <- pam(x, 4, medoids = med0, pamonce=3, trace.lev=2))#
st4 <- showSys.time(pamxonce4st <- pam(x, 4, medoids = med0, pamonce=4, trace.lev=2))#
st5 <- showSys.time(pamxonce5st <- pam(x, 4, medoids = med0, pamonce=5, trace.lev=2))#
st6 <- showSys.time(pamxonce6st <- pam(x, 4, medoids = med0, pamonce=6, trace.lev=2))#
show6Ratios('6:orig' = st6/st0, '5:orig' = st5/st0, '4:orig' = st4/st0, '3:orig' = st3/st0, '2:orig' = st2/st0, '1:orig' = st1/st0)
## only call element is not equal
stopifnot(all.equal(pamxst [-icall], pamxoncest [-icall]),
all.equal(pamxoncest[-icall], pamxonce2st[-icall]),
all.equal(pamxoncest[-icall], pamxonce3st[-icall]),
all.equal(pamxoncest[-icall], pamxonce4st[-icall]),
all.equal(pamxoncest[-icall], pamxonce5st[-icall]),
all.equal(pamxoncest[-icall], pamxonce6st[-icall]))
## Medoid bug --- MM: Fixed, well "0L+ hack", in my pam.q, on 2012-01-31
## ----------
med0 <- (1:6)
st0 <- showSys.time(pamxst <- pam(x, 6, medoids = med0 , trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st1 <- showSys.time(pamxst.1 <- pam(x, 6, medoids = med0 , pamonce=1, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st2 <- showSys.time(pamxst.2 <- pam(x, 6, medoids = med0 , pamonce=2, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st3 <- showSys.time(pamxst.3 <- pam(x, 6, medoids = med0 , pamonce=3, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st4 <- showSys.time(pamxst.4 <- pam(x, 6, medoids = med0 , pamonce=4, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st5 <- showSys.time(pamxst.5 <- pam(x, 6, medoids = med0 , pamonce=5, trace.lev=2))
stopifnot(identical(med0, 1:6))
med0 <- (1:6)
st6 <- showSys.time(pamxst.6 <- pam(x, 6, medoids = med0 , pamonce=6, trace.lev=2))
stopifnot(identical(med0, 1:6))
stopifnot(all.equal(pamxst[-icall], pamxst.1 [-icall]),
all.equal(pamxst[-icall], pamxst.2 [-icall]),
all.equal(pamxst[-icall], pamxst.3 [-icall]),
all.equal(pamxst[-icall], pamxst.4 [-icall]),
all.equal(pamxst[-icall], pamxst.5 [-icall]))
# FasterPAM finds a better solution here, by chance
stopifnot(pamxst$objective >= pamxst.6$objective)
## Last Line:
cat('Time elapsed: ', proc.time() - .proctime00,'\n')