638 lines
19 KiB
Plaintext
638 lines
19 KiB
Plaintext
|
|
R Under development (unstable) (2023-03-07 r83950) -- "Unsuffered Consequences"
|
|
Copyright (C) 2023 The R Foundation for Statistical Computing
|
|
Platform: x86_64-pc-linux-gnu (64-bit)
|
|
|
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
|
You are welcome to redistribute it under certain conditions.
|
|
Type 'license()' or 'licence()' for distribution details.
|
|
|
|
R is a collaborative project with many contributors.
|
|
Type 'contributors()' for more information and
|
|
'citation()' on how to cite R or R packages in publications.
|
|
|
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
|
'help.start()' for an HTML browser interface to help.
|
|
Type 'q()' to quit R.
|
|
|
|
> ## This came from a bug report on R-help by ge yreyt <tothri2000@yahoo.ca>
|
|
> ## Date: Mon, 9 Jun 2003 16:06:53 -0400 (EDT)
|
|
> library(cluster)
|
|
> if(FALSE) # manual testing
|
|
+ library(cluster, lib="~/R/Pkgs/cluster.Rcheck")
|
|
>
|
|
> data(iris)
|
|
>
|
|
> .proctime00 <- proc.time()
|
|
>
|
|
> mdist <- as.dist(1 - cor(t(iris[,1:4])))#dissimlarity
|
|
> ## this is always the same:
|
|
> hc <- diana(mdist, diss = TRUE, stand = FALSE)
|
|
>
|
|
> maxk <- 15 # at most 15 clusters
|
|
> silh.wid <- numeric(maxk) # myind[k] := the silh.value for k clusters
|
|
> silh.wid[1] <- NA # 1-cluster: silhouette not defined
|
|
>
|
|
> op <- par(mfrow = c(4,4), mar = .1+ c(2,1,2,1), mgp=c(1.5, .6,0))
|
|
> for(k in 2:maxk) {
|
|
+ cat("\n", k,":\n==\n")
|
|
+ k.gr <- cutree(as.hclust(hc), k = k)
|
|
+ cat("grouping table: "); print(table(k.gr))
|
|
+ si <- silhouette(k.gr, mdist)
|
|
+ cat("silhouette:\n"); print(summary(si))
|
|
+ plot(si, main = paste("k =",k),
|
|
+ col = 2:(k+1), do.n.k=FALSE, do.clus.stat=FALSE)
|
|
+ silh.wid[k] <- summary(si)$avg.width
|
|
+ ## ===
|
|
+ }
|
|
|
|
2 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2
|
|
50 100
|
|
silhouette:
|
|
Silhouette of 150 units in 2 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
50 100
|
|
0.9829965 0.9362626
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
0.5884 0.9437 0.9611 0.9518 0.9815 0.9918
|
|
|
|
3 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3
|
|
50 50 50
|
|
silhouette:
|
|
Silhouette of 150 units in 3 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
50 50 50
|
|
0.9773277 0.6926798 0.7467236
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
0.03353 0.76937 0.86121 0.80558 0.97564 0.98919
|
|
|
|
4 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4
|
|
35 15 50 50
|
|
silhouette:
|
|
Silhouette of 150 units in 4 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 15 50 50
|
|
0.5653722 0.5226372 0.6926798 0.7467236
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
0.03353 0.56621 0.75102 0.66399 0.84240 0.89390
|
|
|
|
5 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5
|
|
35 15 29 21 50
|
|
silhouette:
|
|
Silhouette of 150 units in 5 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 15 29 21 50
|
|
0.5653722 0.5226372 0.5776362 0.4625437 0.5296735
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.5404 0.3937 0.6252 0.5372 0.7392 0.8136
|
|
|
|
6 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6
|
|
35 15 29 21 29 21
|
|
silhouette:
|
|
Silhouette of 150 units in 6 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 15 29 21 29 21
|
|
0.5653722 0.5226372 0.5776362 0.3732981 0.3383135 0.5945444
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.1094 0.3351 0.5257 0.4968 0.6938 0.8136
|
|
|
|
7 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7
|
|
35 14 1 29 21 29 21
|
|
silhouette:
|
|
Silhouette of 150 units in 7 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 14 1 29 21 29 21
|
|
0.4165289 0.6671435 0.0000000 0.5776362 0.3732981 0.3383135 0.5945444
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.3264 0.3001 0.5234 0.4720 0.6970 0.8301
|
|
|
|
8 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8
|
|
35 14 1 29 10 11 29 21
|
|
silhouette:
|
|
Silhouette of 150 units in 8 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 14 1 29 10 11 29 21
|
|
0.4165289 0.6671435 0.0000000 0.4209012 0.6943265 0.7262601 0.2053018 0.5945444
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.6258 0.2576 0.5842 0.4633 0.7132 0.8887
|
|
|
|
9 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9
|
|
35 14 1 26 10 11 3 29 21
|
|
silhouette:
|
|
Silhouette of 150 units in 9 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 14 1 26 10 11 3 29
|
|
0.4165289 0.6671435 0.0000000 0.5318152 0.6673269 0.6944652 0.7957279 0.2053018
|
|
21
|
|
0.5945444
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.6258 0.3150 0.5896 0.4859 0.7263 0.8870
|
|
|
|
10 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10
|
|
35 14 1 26 10 11 3 16 13 21
|
|
silhouette:
|
|
Silhouette of 150 units in 10 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 14 1 26 10 11 3 16
|
|
0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4640123
|
|
13 21
|
|
0.6615431 0.4228530
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.5870 0.3535 0.6068 0.5208 0.7349 0.8803
|
|
|
|
11 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11
|
|
35 14 1 26 10 11 3 16 13 11 10
|
|
silhouette:
|
|
Silhouette of 150 units in 11 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 14 1 26 10 11 3 16
|
|
0.4165289 0.6671435 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279 0.4064279
|
|
13 11 10
|
|
0.5866228 0.4297258 0.6590274
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.3264 0.3730 0.5984 0.5244 0.7302 0.8505
|
|
|
|
12 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12
|
|
35 11 3 1 26 10 11 3 16 13 11 10
|
|
silhouette:
|
|
Silhouette of 150 units in 12 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
35 11 3 1 26 10 11 3
|
|
0.2883758 0.7044155 0.4092330 0.0000000 0.5318152 0.6319149 0.6145837 0.7957279
|
|
16 13 11 10
|
|
0.4064279 0.5866228 0.4297258 0.6590274
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.6007 0.3395 0.5817 0.4921 0.7216 0.8700
|
|
|
|
13 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13
|
|
28 11 3 7 1 26 10 11 3 16 13 11 10
|
|
silhouette:
|
|
Silhouette of 150 units in 13 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
28 11 3 7 1 26 10 11
|
|
0.3783869 0.6827810 0.4092330 0.4285753 0.0000000 0.5318152 0.6319149 0.6145837
|
|
3 16 13 11 10
|
|
0.7957279 0.4064279 0.5866228 0.4297258 0.6590274
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.4013 0.3314 0.5704 0.5138 0.7274 0.8531
|
|
|
|
14 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14
|
|
19 11 3 9 7 1 26 10 11 3 16 13 11 10
|
|
silhouette:
|
|
Silhouette of 150 units in 14 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
19 11 3 9 7 1 26 10
|
|
0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.5318152 0.6319149
|
|
11 3 16 13 11 10
|
|
0.6145837 0.7957279 0.4064279 0.5866228 0.4297258 0.6590274
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.5929 0.3795 0.5875 0.5217 0.7263 0.8505
|
|
|
|
15 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
|
19 11 3 9 7 1 18 10 11 8 3 16 13 11 10
|
|
silhouette:
|
|
Silhouette of 150 units in 15 clusters from silhouette.default(x = k.gr, dist = mdist) :
|
|
Cluster sizes and average silhouette widths:
|
|
19 11 3 9 7 1 18 10
|
|
0.5419530 0.6171802 0.3959926 0.4525348 0.1669077 0.0000000 0.6616381 0.5871805
|
|
11 8 3 16 13 11 10
|
|
0.5171407 0.6705138 0.7444822 0.4064279 0.5866228 0.4297258 0.6590274
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.5929 0.3859 0.6211 0.5335 0.7478 0.8551
|
|
> par(op)
|
|
>
|
|
> summary(si.p <- silhouette(50 - k.gr, mdist))
|
|
Silhouette of 150 units in 15 clusters from silhouette.default(x = 50 - k.gr, dist = mdist) :
|
|
Cluster sizes, ids = (35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49), and average silhouette widths:
|
|
10 11 13 16 3 8 11 10
|
|
0.6590274 0.4297258 0.5866228 0.4064279 0.7444822 0.6705138 0.5171407 0.5871805
|
|
18 1 7 9 3 11 19
|
|
0.6616381 0.0000000 0.1669077 0.4525348 0.3959926 0.6171802 0.5419530
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.5929 0.3859 0.6211 0.5335 0.7478 0.8551
|
|
> stopifnot(identical(si.p[,3], si[,3]),
|
|
+ identical(si.p[, 1:2], 50 - si[, 1:2]))
|
|
>
|
|
> # the widths:
|
|
> silh.wid
|
|
[1] NA 0.9518406 0.8055770 0.6639850 0.5371742 0.4967654 0.4720384
|
|
[8] 0.4633064 0.4858965 0.5207776 0.5243911 0.4920638 0.5138220 0.5217026
|
|
[15] 0.5335255
|
|
> #select the number of k clusters with the largest si value :
|
|
> (myk <- which.min(silh.wid)) # -> 8 (here)
|
|
[1] 8
|
|
>
|
|
> postscript(file="silhouette-ex.ps")
|
|
> ## MM: plot to see how the decision is made
|
|
> plot(silh.wid, type = 'b', col= "blue", xlab = "k")
|
|
> axis(1, at=myk, col.axis= "red", font.axis= 2)
|
|
>
|
|
> ##--- PAM()'s silhouette should give same as silh*.default()!
|
|
> Eq <- function(x,y, tol = 1e-12) x == y | abs(x - y) < tol * abs((x+y)/2)
|
|
>
|
|
> for(k in 2:40) {
|
|
+ cat("\n", k,":\n==\n")
|
|
+ p.k <- pam(mdist, k = k)
|
|
+ k.gr <- p.k$clustering
|
|
+ si.p <- silhouette(p.k)
|
|
+ si.g <- silhouette(k.gr, mdist)
|
|
+ ## since the obs.order may differ (within cluster):
|
|
+ si.g <- si.g[ as.integer(rownames(si.p)), ]
|
|
+ cat("grouping table: "); print(table(k.gr))
|
|
+ if(!isTRUE(all.equal(c(si.g), c(si.p)))) {
|
|
+ cat("silhouettes differ:")
|
|
+ if(any(neq <- !Eq(si.g[,3], si.p[,3]))) {
|
|
+ cat("\n")
|
|
+ print( cbind(si.p[], si.g[,2:3])[ neq, ] )
|
|
+ } else cat(" -- but not in col.3 !\n")
|
|
+ }
|
|
+ }
|
|
|
|
2 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2
|
|
50 100
|
|
|
|
3 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3
|
|
50 50 50
|
|
|
|
4 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4
|
|
50 43 37 20
|
|
|
|
5 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5
|
|
50 25 35 20 20
|
|
|
|
6 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6
|
|
33 17 25 35 20 20
|
|
|
|
7 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7
|
|
33 17 17 14 18 31 20
|
|
|
|
8 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8
|
|
21 13 16 17 14 18 31 20
|
|
|
|
9 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9
|
|
21 13 16 12 20 11 19 17 21
|
|
|
|
10 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10
|
|
21 13 16 18 10 15 14 7 16 20
|
|
|
|
11 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11
|
|
21 13 16 19 10 14 7 6 15 13 16
|
|
|
|
12 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12
|
|
21 13 16 17 10 12 9 3 5 15 13 16
|
|
|
|
13 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13
|
|
21 12 16 1 18 11 12 9 3 15 13 4 15
|
|
|
|
14 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14
|
|
20 10 7 13 18 10 12 9 3 7 10 13 4 14
|
|
|
|
15 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
|
20 11 5 13 1 18 10 12 9 3 7 10 13 4 14
|
|
|
|
16 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
|
|
20 11 5 13 1 12 8 9 11 9 3 7 10 13 4 14
|
|
|
|
17 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
|
|
20 11 5 13 1 12 8 7 9 10 3 3 9 13 4 13 9
|
|
|
|
18 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
|
|
20 11 5 9 4 1 12 8 7 9 10 3 3 9 13 4 13 9
|
|
|
|
19 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
|
|
20 11 5 9 4 1 10 8 8 9 8 3 3 9 13 3 4 13 9
|
|
|
|
20 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
|
|
20 11 5 9 4 1 10 8 8 9 8 3 3 9 12 3 4 6 9 8
|
|
|
|
21 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
|
|
20 11 5 9 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5
|
|
|
|
22 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
|
|
9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 9 8 5
|
|
|
|
23 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
|
9 11 5 9 11 4 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1
|
|
|
|
24 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
|
|
15 10 5 10 3 3 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1
|
|
|
|
25 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
|
|
8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 11 3 4 6 8 8 5 1
|
|
|
|
26 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
8 4 5 9 11 7 2 3 1 10 8 8 7 8 3 3 7 7 3 4 6 8 8 4 5 1
|
|
|
|
27 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
8 4 5 9 11 7 2 3 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4 5
|
|
27
|
|
1
|
|
|
|
28 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 3 2 7 7 3 2 4 6 8 8 4
|
|
27 28
|
|
5 1
|
|
|
|
29 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
8 4 4 9 11 7 2 3 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8 8
|
|
27 28 29
|
|
4 5 1
|
|
|
|
30 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 8
|
|
27 28 29 30
|
|
8 4 5 1
|
|
|
|
31 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
8 4 11 10 6 3 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6 7
|
|
27 28 29 30 31
|
|
7 4 5 1 2
|
|
|
|
32 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 4 6
|
|
27 28 29 30 31 32
|
|
7 7 4 5 1 2
|
|
|
|
33 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 10 10 6 2 2 3 1 1 1 10 8 7 7 8 2 2 7 7 3 2 1 1 6
|
|
27 28 29 30 31 32 33
|
|
7 3 7 4 5 1 2
|
|
|
|
34 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 7 7 3 2 1 1
|
|
27 28 29 30 31 32 33 34
|
|
6 7 3 7 4 5 1 2
|
|
|
|
35 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 7 8 2 2 5 7 3 2 1 1
|
|
27 28 29 30 31 32 33 34 35
|
|
6 5 3 7 4 4 5 1 2
|
|
|
|
36 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 7 6 8 2 2 5 7 1 3 2 1
|
|
27 28 29 30 31 32 33 34 35 36
|
|
1 6 5 3 7 4 4 5 1 2
|
|
|
|
37 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 8 2 2 5 7 1 3 2
|
|
27 28 29 30 31 32 33 34 35 36 37
|
|
1 1 6 5 3 7 4 5 1 3 2
|
|
|
|
38 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 8 9 6 2 3 3 1 1 2 1 10 8 3 5 6 5 2 2 5 3 7 1 3
|
|
27 28 29 30 31 32 33 34 35 36 37 38
|
|
2 1 1 6 5 3 7 4 5 1 3 2
|
|
|
|
39 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
7 4 3 8 9 6 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7 1
|
|
27 28 29 30 31 32 33 34 35 36 37 38 39
|
|
3 2 1 1 6 5 3 7 4 5 1 3 2
|
|
|
|
40 :
|
|
==
|
|
grouping table: k.gr
|
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
|
5 4 3 7 10 6 2 2 3 3 1 1 2 1 7 3 8 3 5 6 5 2 2 5 3 7
|
|
27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
|
1 3 2 1 1 6 5 3 7 4 5 1 3 2
|
|
>
|
|
>
|
|
> ## "pathological" case where a_i == b_i == 0 :
|
|
> D6 <- structure(c(0, 0, 0, 0.4, 1, 0.05, 1, 1, 0, 1, 1, 0, 0.25, 1, 1),
|
|
+ Labels = LETTERS[1:6], Size = 6, call = as.name("manually"),
|
|
+ class = "dist", Diag = FALSE, Upper = FALSE)
|
|
> D6
|
|
A B C D E
|
|
B 0.00
|
|
C 0.00 0.05
|
|
D 0.00 1.00 1.00
|
|
E 0.40 1.00 1.00 0.25
|
|
F 1.00 0.00 0.00 1.00 1.00
|
|
> kl6 <- c(1,1, 2,2, 3,3)
|
|
> (skD6 <- silhouette(kl6, D6))# had one NaN
|
|
cluster neighbor sil_width
|
|
[1,] 1 2 0.000
|
|
[2,] 1 3 1.000
|
|
[3,] 2 1 -0.975
|
|
[4,] 2 1 -0.500
|
|
[5,] 3 2 -0.375
|
|
[6,] 3 1 -0.500
|
|
attr(,"Ordered")
|
|
[1] FALSE
|
|
attr(,"call")
|
|
silhouette.default(x = kl6, dist = D6)
|
|
attr(,"class")
|
|
[1] "silhouette"
|
|
> summary(skD6)
|
|
Silhouette of 6 units in 3 clusters from silhouette.default(x = kl6, dist = D6) :
|
|
Cluster sizes and average silhouette widths:
|
|
2 2 2
|
|
0.5000 -0.7375 -0.4375
|
|
Individual silhouette widths:
|
|
Min. 1st Qu. Median Mean 3rd Qu. Max.
|
|
-0.97500 -0.50000 -0.43750 -0.22500 -0.09375 1.00000
|
|
> plot(silhouette(kl6, D6))# gives error in earlier cluster versions
|
|
> dev.off()
|
|
pdf
|
|
2
|
|
>
|
|
> ## checking compatibility with R-only version
|
|
> silhouetteR <- asNamespace("cluster")$silhouetteR
|
|
> noCall <- function(si) `attr<-`(si, "call", NULL) # only 'call' is different:
|
|
> stopifnot(all.equal(noCall(skD6), noCall(silhouetteR(kl6, D6))))
|
|
>
|
|
> ## k=1 : pam(*, k=1) works, but silhouette() is not defined;
|
|
> ## --- FIXME: silhouette.partition() fails: "invalid partition .."; (which is not strictly true
|
|
> ## ------ -> give something like NA ((or a *different* error message)
|
|
> ## the other methods just give NA (no object!)
|
|
> ## drop "call" *and* "iOrd"
|
|
> noCliO <- function(si) noCall(`attr<-`(si, "iOrd", NULL))
|
|
> for(k in 2:7) {
|
|
+ p.k <- pam(ruspini, k=k)
|
|
+ ## order the silhouette to be *as* the default:
|
|
+ ## spk <- silhouette(p.k); opk <- spk[order(as.numeric(rownames(spk))), ]
|
|
+ ## rather sort*() the other:
|
|
+ stopifnot(all.equal(noCall(silhouette(p.k)),
|
|
+ noCliO(sortSilhouette(silhouetteR(p.k$clustering, p.k$diss)))))
|
|
+ }
|
|
>
|
|
> ## Last Line:
|
|
> cat('Time elapsed: ', proc.time() - .proctime00,'\n')
|
|
Time elapsed: 1.607 0.036 1.651 0 0
|
|
>
|
|
>
|
|
> proc.time()
|
|
user system elapsed
|
|
1.722 0.084 1.859
|