686 lines
32 KiB
Plaintext
Raw Normal View History

2025-01-12 00:52:51 +08:00
R Under development (unstable) (2023-11-29 r85646) -- "Unsuffered Consequences"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> library(cluster)
>
> x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1,
+ 100 + c(13, 0, 2, 4, 7, 8, 1)),
+ c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13,
+ 47, 64, 48, NA, NA, 44, 65))
> x
[,1] [,2]
[1,] 0 -5
[2,] -4 -14
[3,] -22 NA
[4,] -14 -35
[5,] 0 -30
[6,] NA NA
[7,] -28 7
[8,] 1 2
[9,] 10 -18
[10,] -1 13
[11,] 113 47
[12,] 100 64
[13,] 102 48
[14,] 104 NA
[15,] 107 NA
[16,] 108 44
[17,] 101 65
> (d <- dist(x,'manhattan'))
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
2 13
3 44 36
4 44 31 16
5 25 20 44 19
6 NA NA NA NA NA
7 40 45 12 56 65 NA
8 8 21 46 52 33 NA 34
9 23 18 64 41 22 NA 63 29
10 19 30 42 61 44 NA 33 13 42
11 165 178 270 209 190 NA 181 157 168 148
12 169 182 244 213 194 NA 185 161 172 152 30
13 155 168 248 199 180 NA 171 147 158 138 12 18
14 208 216 252 236 208 NA 264 206 188 210 18 8 4
15 214 222 258 242 214 NA 270 212 194 216 12 14 10 6
16 157 170 260 201 182 NA 173 149 160 140 8 28 10 8 2
17 171 184 246 215 196 NA 187 163 174 154 30 2 18 6 12 28
> summary(d, na.rm = TRUE) # max = 270
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
2.00 27.25 147.50 114.55 188.50 270.00 16
> ## First call with "trace" (seg.fault typically later ...):
> try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) )
C clara(): (nsam,nran,n) = (10,5,17); 'large_sample',
- clara sample 1 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 1
[ntt=7, nunfs=0] .. nsel[1:10]= 6 7 8 9 10 12 13 14 16 17 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 2 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 3
[ntt=7, nunfs=1] .. nsel[1:10]= 1 4 7 9 11 12 13 15 16 17 -> dysta2();
clara -> s:= max{dys[1..45]} = 270;
bswap2(*, s=270), 1. BUILD: new repr. 7
new repr. 1
after build: medoids are 1 7
and min.dist dysma[1:n] are
0 44 40 23 12 18 0 10 10 18
--> sky = sum_j D_j= 175
swp new 8 <-> 7 old; decreasing diss. by -18
Last swap: new 8 <-> 7 old; decreasing diss. by 1
end{bswap2}: sky = 157
selec() -> 'NAfs' obj= 7.41176
- clara sample 3 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 6
[ntt=7, nunfs=2] .. nsel[1:10]= 1 2 3 5 8 9 10 11 13 17 -> dysta2();
clara -> s:= max{dys[1..45]} = 270;
bswap2(*, s=270), 1. BUILD: new repr. 5
new repr. 9
after build: medoids are 5 9
and min.dist dysma[1:n] are
8 21 46 33 0 29 13 12 0 18
--> sky = sum_j D_j= 180
swp new 1 <-> 5 old; decreasing diss. by -18
Last swap: new 1 <-> 5 old; decreasing diss. by 1
end{bswap2}: sky = 162
selec() -> 'NAfs' obj= 7.41176
- clara sample 4 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 5
[ntt=7, nunfs=3] .. nsel[1:10]= 1 2 3 4 7 8 9 10 13 14 -> dysta2();
clara -> s:= max{dys[1..45]} = 264;
bswap2(*, s=264), 1. BUILD: new repr. 1
new repr. 10
after build: medoids are 1 10
and min.dist dysma[1:n] are
0 13 44 44 40 8 23 19 4 0
--> sky = sum_j D_j= 195
Last swap: new 9 <-> 10 old; decreasing diss. by 0
end{bswap2}: sky = 195
selec() -> 'NAfs' obj= 7.41176
- clara sample 5 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 16
[ntt=7, nunfs=4] .. nsel[1:10]= 2 3 4 6 7 8 9 10 11 17 -> dysta2() gave dyst_toomany_NA --> new sample.
C clara() -> best sample _found_ ; nbest[1:10] =
c(0,0,0,0,0,0,0,0,0,0)
Error in clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) :
Observation 6 has *only* NAs --> omit it for clustering
In addition: Warning message:
In clara(x, k = 2, metric = "manhattan", sampsize = 10, trace = 3) :
Distance computations with NAs: using correct instead of pre-2016 wrong formula.
Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly
to suppress this warning.
> ## Originally:already shows the problem: nbest[] = c(0,0,...,0) must be WRONG!!
> ## Now: gives the proper error message.
>
> ## S-plus 6.1.2 (rel.2 for Linux, 2002) gives
> ##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10)
> ## Problem in .Fortran("clara",: Internal error: data for decrementing
> ## ref.count didn't point to a valid arena (0x0), while calling subroutine clara
>
> ## The large example from clara.R -- made small enough to still provoke
> ## the "** dysta2() ... OUT" problem {no longer!}
> x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10,
+ 51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA,
+ 9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38,
+ 54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59,
+ -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56,
+ 51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3,
+ -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1,
+ 12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15,
+ -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2,
+ 23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1,
+ 9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8),
+ ncol = 2)
> str(x) # 88 x 2
num [1:88, 1:2] 0 3 -4 62 1 3 -7 45 36 46 ...
> try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem
C clara(): (nsam,nran,n) = (44,20,88);
- clara sample 1 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 2
[ntt=44, nunfs=0] .. nsel[1:44]= 2 3 4 6 9 10 12 14 15 18 19 20 24 25 26 28 31 35 38 42 47 48 51 53 54 57 60 61 64 66 68 70 71 73 74 75 76 77 78 79 80 81 82 88 -> dysta2();
clara -> s:= max{dys[1..946]} = 78.6448;
bswap2(*, s=78.6448), 1. BUILD: new repr. 19
new repr. 9
after build: medoids are 9 19
and min.dist dysma[1:n] are
21.2 7.07 9.9 2.83 5.66 5 5.1 9.22 0 11.3
1.41 6.71 6.32 8.49 7.07 12.7 1.41 33.9 0 14.1
7.07 18.9 5.39 4.24 15.5 31.1 5.66 5.66 5.66 4.24
1.41 8.49 11.3 22.6 2.83 4.12 13 0 3.61 5
1.41 17 9.22 12.7
--> sky = sum_j D_j= 385.677
Last swap: new 6 <-> 9 old; decreasing diss. by 0.939294
end{bswap2}: sky = 385.677
selec() -> 'NAfs' obj= 2.59347
- clara sample 2 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 12
[ntt=44, nunfs=1] .. nsel[1:44]= 1 2 3 5 6 7 9 12 17 19 26 27 28 29 30 38 39 42 43 45 47 50 52 54 55 56 58 59 60 61 62 64 67 68 71 74 75 76 77 79 80 81 83 84 -> dysta2();
clara -> s:= max{dys[1..946]} = 81.7435;
bswap2(*, s=81.7435), 1. BUILD: new repr. 16
new repr. 43
after build: medoids are 16 43
and min.dist dysma[1:n] are
1.41 21.2 7.07 1.41 2.83 17 5.66 5 14.1 1.41
7.07 15 12.7 14.1 14.1 0 4.24 14.1 8.49 9.9
7.07 2 8.6 14.1 12.1 4.24 1.41 5.66 5.66 5.66
5.66 5.66 4.24 1.41 11.3 2.83 5.83 11 0 5.1
1.41 17 0 17
--> sky = sum_j D_j= 331.98
Last swap: new 17 <-> 43 old; decreasing diss. by 0.492109
end{bswap2}: sky = 331.98
selec() -> 'NAfs' obj= 2.55701
- clara sample 3 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 14
[ntt=44, nunfs=2] .. nsel[1:44]= 1 3 4 6 7 9 12 14 15 16 18 19 20 29 30 31 32 36 38 39 40 44 46 47 48 49 51 53 54 56 57 60 62 64 65 66 67 73 75 77 79 81 82 87 -> dysta2();
clara -> s:= max{dys[1..946]} = 77.8781;
bswap2(*, s=77.8781), 1. BUILD: new repr. 19
new repr. 35
after build: medoids are 19 35
and min.dist dysma[1:n] are
1.41 7.07 9.9 2.83 17 5.66 6.4 5.1 4.12 4.24
11.3 1.41 2.83 14.1 14.1 1.41 6 5.66 0 3.16
5.66 18.4 8.06 7.07 16.3 6 7.21 4.24 14 4.24
31.1 5.66 5.66 5.66 0 4.24 4.24 22.6 7.07 0
5.1 17 8.25 7.07
--> sky = sum_j D_j= 338.587
end{bswap2}: sky = 338.587
selec() -> 'NAfs' obj= 2.57726
- clara sample 4 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 70
[ntt=44, nunfs=3] .. nsel[1:44]= 1 3 8 9 14 15 16 17 19 20 22 23 28 30 31 32 34 35 36 37 38 39 40 41 45 46 47 49 54 56 57 65 66 67 69 70 71 74 76 77 78 84 86 88 -> dysta2();
clara -> s:= max{dys[1..946]} = 77.8781;
bswap2(*, s=77.8781), 1. BUILD: new repr. 21
new repr. 32
after build: medoids are 21 32
and min.dist dysma[1:n] are
1.41 7.07 7.81 5.66 5.1 4.12 4.24 14.1 1.41 2.83
4.24 0 12.7 14.1 1.41 6 8.06 33.9 5.66 8.49
0 3.16 5.66 5.66 9.9 8.06 7.07 6 14 4.24
31.1 0 4.24 4.24 4.24 8.49 11.3 2.83 9.06 0
7.07 17 1.41 12.7
--> sky = sum_j D_j= 325.933
end{bswap2}: sky = 325.933
selec() -> 'NAfs' obj= 2.57726
- clara sample 5 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 80
[ntt=44, nunfs=4] .. nsel[1:44]= 1 2 3 5 7 8 11 13 14 20 22 23 26 28 30 31 33 34 37 38 39 41 45 46 47 50 51 52 57 59 61 64 67 71 76 77 79 80 81 82 85 86 87 88 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 6 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 5
[ntt=44, nunfs=5] .. nsel[1:44]= 2 3 4 5 6 8 10 12 19 20 21 23 24 25 29 30 31 32 33 37 39 41 42 45 46 48 50 53 54 59 61 66 68 69 71 72 73 79 80 82 84 85 86 87 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 7 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 17
[ntt=44, nunfs=6] .. nsel[1:44]= 2 3 6 7 8 12 14 16 17 18 20 22 26 27 29 30 31 32 33 35 36 37 42 44 45 46 49 52 54 58 59 61 62 63 65 67 70 74 75 77 78 79 87 88 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 8 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 67
[ntt=44, nunfs=7] .. nsel[1:44]= 2 3 6 7 8 9 11 13 14 18 30 31 32 34 35 37 38 40 43 44 47 48 49 52 54 55 56 58 59 60 66 67 68 70 71 72 73 75 80 83 84 85 87 88 -> dysta2();
clara -> s:= max{dys[1..946]} = 85.5102;
bswap2(*, s=85.5102), 1. BUILD: new repr. 17
new repr. 9
after build: medoids are 9 17
and min.dist dysma[1:n] are
21.2 7.07 2.83 17 9.9 5.66 2.83 1.41 0 11.3
14.1 1.41 9.9 9.22 33.9 8.49 0 5.66 8.49 18.4
7.07 13.9 1.41 8.25 13.9 5.66 4.24 1.41 4.24 5.66
4.24 4.24 1.41 8.49 11.3 0 22.6 11.3 1.41 7.07
17 21.2 7.07 12.7
--> sky = sum_j D_j= 384.698
end{bswap2}: sky = 384.698
selec() -> 'NAfs' obj= 2.73432
- clara sample 9 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 67
[ntt=44, nunfs=8] .. nsel[1:44]= 2 4 6 7 8 11 12 13 14 15 17 19 20 21 24 27 29 30 31 33 34 35 36 39 42 45 46 48 50 51 52 54 55 58 60 61 62 65 67 78 80 84 86 88 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 10 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 5
[ntt=44, nunfs=9] .. nsel[1:44]= 1 3 5 6 7 9 10 14 15 16 17 18 19 20 21 23 28 29 30 32 33 36 37 39 40 44 46 47 51 53 54 55 56 57 65 69 70 74 76 81 82 84 86 87 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 11 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 66
[ntt=44, nunfs=10] .. nsel[1:44]= 1 3 4 5 6 11 13 14 15 18 19 21 28 30 31 32 33 34 39 40 41 42 43 46 47 57 58 59 63 65 66 67 71 72 73 74 75 78 79 80 83 84 87 88 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 12 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 21
[ntt=44, nunfs=11] .. nsel[1:44]= 4 5 6 8 9 10 13 14 15 16 17 21 23 25 27 28 30 35 36 41 44 46 47 49 50 54 55 56 57 59 61 62 64 65 66 68 71 72 74 75 76 81 83 84 -> dysta2();
clara -> s:= max{dys[1..946]} = 78.3135;
bswap2(*, s=78.3135), 1. BUILD: new repr. 5
new repr. 2
after build: medoids are 2 5
and min.dist dysma[1:n] are
26.2 0 3.61 9.49 0 13.5 11 20.5 13.9 6.32
15 21.6 2.24 32.1 26.6 12.8 12 24.4 17.9 8.6
10.8 18.1 7.21 20.5 14.9 29.4 26.2 3.61 23 21.1
23 3.61 7 16.6 3.61 9.22 9.49 12.6 13 9.85
22.2 14.2 15.7 11
--> sky = sum_j D_j= 623.732
swp new 43 <-> 5 old; decreasing diss. by -205.358
Last swap: new 43 <-> 5 old; decreasing diss. by 1
end{bswap2}: sky = 418.375
selec() -> 'NAfs' obj= 3.17257
- clara sample 13 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 4
[ntt=44, nunfs=12] .. nsel[1:44]= 3 4 5 14 15 16 17 19 20 21 24 25 26 29 30 31 34 35 38 40 41 43 47 49 50 52 55 57 58 60 61 63 64 65 66 68 72 73 74 79 81 83 86 88 -> dysta2();
clara -> s:= max{dys[1..946]} = 84.1487;
bswap2(*, s=84.1487), 1. BUILD: new repr. 19
new repr. 34
after build: medoids are 19 34
and min.dist dysma[1:n] are
7.07 9.9 1.41 5.1 4.12 4.24 14.1 1.41 2.83 8.06
5.39 8.49 7.07 14.1 14.1 1.41 8.06 33.9 0 5.66
5.66 8.49 7.07 6 4 7.62 10.3 31.1 1.41 5.66
5.66 15.6 5.66 0 4.24 1.41 0 22.6 2.83 5.1
17 2 1.41 12.7
--> sky = sum_j D_j= 340.1
end{bswap2}: sky = 340.1
selec() -> 'NAfs' obj= 2.57726
- clara sample 14 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 78
[ntt=44, nunfs=13] .. nsel[1:44]= 5 7 8 9 10 11 12 18 19 21 23 27 28 29 31 32 33 35 36 38 39 46 50 51 52 56 57 58 59 60 64 65 66 68 72 73 75 77 78 80 84 86 87 88 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 15 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 73
[ntt=44, nunfs=14] .. nsel[1:44]= 2 3 8 10 18 25 26 27 29 31 33 34 35 41 42 43 44 46 47 48 49 53 54 56 57 58 59 60 63 69 70 71 72 73 75 76 77 79 81 84 85 86 87 88 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 16 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 43
[ntt=44, nunfs=15] .. nsel[1:44]= 5 6 7 8 9 12 14 16 18 22 23 24 27 28 29 30 34 35 36 39 40 41 43 45 56 57 59 60 62 64 65 67 69 70 71 73 74 75 79 81 83 85 86 87 -> dysta2();
clara -> s:= max{dys[1..946]} = 75.1665;
bswap2(*, s=75.1665), 1. BUILD: new repr. 15
new repr. 41
after build: medoids are 15 41
and min.dist dysma[1:n] are
12.7 17 2.83 6.4 15.7 5 7.07 9.9 25.5 13.6
14.1 5 15 1.41 0 0 8.06 19.8 8.49 4.24
8.49 19.8 5.66 12.6 9.9 45.3 5.66 14.8 8.49 9.9
2 10 9.9 22.6 25.5 8.49 11.3 5.83 5.1 2.83
0 7.07 15.6 21.2
--> sky = sum_j D_j= 479.721
end{bswap2}: sky = 479.721
selec() -> 'NAfs' obj= 3.31146
- clara sample 17 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 22
[ntt=44, nunfs=16] .. nsel[1:44]= 4 6 9 10 11 12 13 16 19 22 26 27 29 30 33 34 37 38 39 42 43 48 51 54 55 57 60 61 62 63 64 66 69 72 73 75 76 77 78 81 82 85 86 87 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 18 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 39
[ntt=44, nunfs=17] .. nsel[1:44]= 1 4 8 13 15 19 20 23 25 26 27 28 30 31 33 34 36 37 39 41 42 43 44 45 46 47 50 54 55 57 59 60 62 64 65 66 67 72 73 78 79 81 82 85 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 19 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 72
[ntt=44, nunfs=18] .. nsel[1:44]= 1 4 5 6 10 12 13 14 17 18 19 22 23 25 27 30 31 32 33 38 39 40 41 42 44 45 46 48 55 57 58 59 60 61 66 67 69 70 72 74 83 84 85 86 -> dysta2() gave dyst_toomany_NA --> new sample.
- clara sample 20 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 48
[ntt=44, nunfs=19] .. nsel[1:44]= 1 3 4 5 7 10 11 13 14 20 22 23 31 32 33 34 35 36 37 40 41 42 43 44 48 50 52 53 55 56 62 63 68 71 72 73 74 75 81 82 83 84 86 88 -> dysta2() gave dyst_toomany_NA --> new sample.
C clara() -> best sample _found_ ; nbest[1:44] =
c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0)
Error in clara(x, 2, samples = 20, trace = 3) :
Observation 33 has *only* NAs --> omit it for clustering
In addition: Warning message:
In clara(x, 2, samples = 20, trace = 3) :
Distance computations with NAs: using correct instead of pre-2016 wrong formula.
Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly
to suppress this warning.
> ## To see error message for > 1 missing:
> try(clara(rbind(NA,x), 2))
Error in clara(rbind(NA, x), 2) :
Observations 1,34 have *only* NAs --> omit them for clustering!
In addition: Warning message:
In clara(rbind(NA, x), 2) :
Distance computations with NAs: using correct instead of pre-2016 wrong formula.
Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly
to suppress this warning.
>
> x <- x[-33,]
> ## still had the ** dysta2() .. OUT" problem {no longer!}
> c2 <- clara(x, 2, samples = 12, trace = 3)
C clara(): (nsam,nran,n) = (44,12,87); 'large_sample',
- clara sample 1 finding 1st... new k{ran}:
.. kall: FALSE,
... nrx [0:1]= 0 0
... nsel[0:0]= 2
[ntt=43, nunfs=0] .. nsel[1:44]= 1 7 8 11 14 16 17 21 22 26 29 30 32 33 34 36 37 39 40 41 43 44 45 46 48 49 51 52 54 55 56 58 62 64 66 68 69 71 74 82 83 84 85 86 -> dysta2();
clara -> s:= max{dys[1..946]} = 76.5376;
bswap2(*, s=76.5376), 1. BUILD: new repr. 17
new repr. 40
after build: medoids are 17 40
and min.dist dysma[1:n] are
1.41 17 6.4 2.83 7.07 4.24 14.1 7.28 4.24 7.07
14.1 14.1 4 8.06 33.9 8.49 0 5.66 5.66 14.1
18.4 9.9 6.4 7.07 8 2 8.6 4.24 12.1 4.24
31.1 5.66 15.6 2 4.24 4.24 8.49 0 5.83 0
17 21.2 1.41 7.07
--> sky = sum_j D_j= 384.62
end{bswap2}: sky = 384.62
1st proper sample obj= 7.92464
- clara sample 2 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 82
... nsel[0:0]= 70
[ntt=43, nunfs=0] .. nsel[1:44]= 4 8 10 11 13 15 16 18 20 21 22 23 24 25 31 32 33 34 35 36 37 40 41 43 45 46 48 50 52 55 62 64 65 68 69 71 72 77 81 82 84 85 86 87 -> dysta2();
clara -> s:= max{dys[1..946]} = 82.7103;
bswap2(*, s=82.7103), 1. BUILD: new repr. 21
new repr. 40
after build: medoids are 21 40
and min.dist dysma[1:n] are
9.9 6.4 4.47 2.83 1.41 2.24 4.24 11.3 4.47 7.28
4.24 0 5 8.49 1.41 4 8.06 33.9 5.66 8.49
0 5.66 14.1 18.4 6.4 7.07 8 5.66 4.24 4.24
15.6 2 4.24 4.24 8.49 0 22.6 5.1 8.94 0
21.2 1.41 7.07 12.7
--> sky = sum_j D_j= 321.274
end{bswap2}: sky = 321.274
obj= 7.92464
- clara sample 3 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 82
... nsel[0:0]= 38
[ntt=43, nunfs=0] .. nsel[1:44]= 2 5 10 11 14 18 21 22 23 24 25 26 27 29 33 34 36 37 39 41 42 43 45 47 49 50 55 57 58 60 62 64 67 68 71 73 75 77 79 82 83 84 85 87 -> dysta2();
clara -> s:= max{dys[1..946]} = 85.5102;
bswap2(*, s=85.5102), 1. BUILD: new repr. 18
new repr. 32
after build: medoids are 18 32
and min.dist dysma[1:n] are
21.2 1.41 4 2.83 5.1 11.3 8.06 4.24 0 5.39
8.49 7.07 13 14.1 8.06 33.9 8.49 0 5.66 14.1
8.49 18.4 8.06 16.3 4 7.21 4.24 1.41 4.47 5.66
15.6 0 1.41 4.24 0 2.83 9.06 7.07 1.41 2
17 21.2 1.41 12.7
--> sky = sum_j D_j= 350.699
Last swap: new 40 <-> 32 old; decreasing diss. by 0.7867
end{bswap2}: sky = 350.699
new best obj= 7.91099
- clara sample 4 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 85
[ntt=43, nunfs=0] .. nsel[1:44]= 2 4 5 6 8 10 11 12 15 18 21 24 25 26 28 31 33 37 40 42 43 47 49 50 52 54 55 57 58 59 60 61 62 63 64 71 72 73 74 78 79 81 82 86 -> dysta2();
clara -> s:= max{dys[1..946]} = 78.3135;
bswap2(*, s=78.3135), 1. BUILD: new repr. 18
new repr. 43
after build: medoids are 18 43
and min.dist dysma[1:n] are
21.2 9.9 1.41 2.83 6.4 4.47 2.83 5 2.24 11.3
7.28 5 8.49 7.07 12.7 1.41 8.06 0 5.66 8.49
18.4 17.8 2 5.66 4.24 12.1 4.24 1.41 5.66 5.66
5.66 5.66 15.6 5.66 2 0 22.6 2.83 5.83 5.1
1.41 8.94 0 7.07
--> sky = sum_j D_j= 297.276
end{bswap2}: sky = 297.276
obj= 7.92464
- clara sample 5 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 84
[ntt=43, nunfs=0] .. nsel[1:44]= 4 6 9 10 12 13 15 16 17 18 20 21 24 26 27 31 34 35 36 37 40 41 42 43 45 51 52 53 54 55 56 62 64 65 67 68 69 71 72 73 74 77 79 82 -> dysta2();
clara -> s:= max{dys[1..946]} = 76.5376;
bswap2(*, s=76.5376), 1. BUILD: new repr. 20
new repr. 44
after build: medoids are 20 44
and min.dist dysma[1:n] are
9.9 2.83 5.66 4.47 5 1.41 2.24 4.24 14.1 11.3
4.47 7.28 5 7.07 15 1.41 33.9 5.66 8.49 0
5.66 14.1 8.49 18.4 6.4 8.6 4.24 14.1 12.1 4.24
31.1 15.6 2 4.24 1.41 4.24 8.49 0 22.6 2.83
5.83 5.1 1.41 0
--> sky = sum_j D_j= 350.799
end{bswap2}: sky = 350.799
obj= 7.92464
- clara sample 6 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 33
[ntt=43, nunfs=0] .. nsel[1:44]= 1 5 7 9 11 13 14 15 17 18 22 26 27 28 34 35 36 37 38 43 46 47 48 50 51 54 55 56 57 61 63 64 66 69 71 73 74 75 76 77 78 80 81 82 -> dysta2();
clara -> s:= max{dys[1..946]} = 82.0244;
bswap2(*, s=82.0244), 1. BUILD: new repr. 18
new repr. 19
after build: medoids are 18 19
and min.dist dysma[1:n] are
1.41 1.41 17 5.66 2.83 1.41 5.66 5.39 14.1 11.3
4.24 7.07 12.6 12.7 33.9 5.66 8.49 0 0 17
7.07 13.6 5.83 9.9 4.47 11.3 4.24 31.1 1.41 5.66
5.66 3.16 4.24 8.49 0 2.83 6.32 8.25 0 8.49
2.83 17 5.1 4.24
--> sky = sum_j D_j= 339.187
end{bswap2}: sky = 339.187
obj= 8.0873
- clara sample 7 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 26
[ntt=43, nunfs=0] .. nsel[1:44]= 1 3 4 5 9 10 11 18 19 21 23 24 25 30 37 38 39 40 46 47 50 54 55 56 59 62 64 65 66 67 68 70 71 72 75 76 79 80 81 82 83 84 85 86 -> dysta2();
clara -> s:= max{dys[1..946]} = 82.7103;
bswap2(*, s=82.7103), 1. BUILD: new repr. 15
new repr. 27
after build: medoids are 15 27
and min.dist dysma[1:n] are
1.41 7.07 9.9 1.41 5.66 4 2.83 11.3 1.41 8.06
0 5.39 8.49 14.1 0 3.16 5.66 5.66 7.07 16.3
7.21 10.3 4.24 31.1 5.66 15.6 0 4.24 4.24 1.41
4.24 11.3 0 22.6 9.06 0 1.41 17 8.25 2
17 21.2 1.41 7.07
--> sky = sum_j D_j= 325.427
end{bswap2}: sky = 325.427
obj= 7.91099
- clara sample 8 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 3
[ntt=43, nunfs=0] .. nsel[1:44]= 4 5 6 10 12 15 16 17 19 20 21 22 23 24 25 26 27 28 29 31 37 39 41 42 44 45 46 49 50 51 56 57 61 62 63 64 68 73 75 77 78 80 81 85 -> dysta2();
clara -> s:= max{dys[1..946]} = 84.1487;
bswap2(*, s=84.1487), 1. BUILD: new repr. 21
new repr. 36
after build: medoids are 21 36
and min.dist dysma[1:n] are
9.9 1.41 2.83 4 6.4 4.12 4.24 14.1 1.41 2.83
8.06 4.24 0 5.39 8.49 7.07 13 12.7 14.1 1.41
0 5.66 14.1 8.49 9.9 8.06 7.07 4 7.21 7.62
31.1 1.41 5.66 15.6 5.66 0 4.24 2.83 9.06 7.07
5.1 17 8.25 1.41
--> sky = sum_j D_j= 312.333
end{bswap2}: sky = 312.333
obj= 7.91099
- clara sample 9 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 59
[ntt=43, nunfs=0] .. nsel[1:44]= 1 3 5 9 11 13 17 22 23 25 29 31 32 34 37 38 40 42 43 44 47 49 53 55 56 58 62 63 64 66 68 69 70 71 72 73 74 75 76 78 81 82 84 86 -> dysta2();
clara -> s:= max{dys[1..946]} = 82.7103;
bswap2(*, s=82.7103), 1. BUILD: new repr. 15
new repr. 29
after build: medoids are 15 29
and min.dist dysma[1:n] are
1.41 7.07 1.41 5.66 2.83 1.41 14.1 4.24 0 8.49
14.1 1.41 6 33.9 0 3.16 5.66 8.49 18.4 9.9
16.3 4 14 4.24 31.1 4.47 15.6 5.66 0 4.24
4.24 8.49 11.3 0 22.6 2.83 7.07 9.06 0 5.1
8.25 2 21.2 7.07
--> sky = sum_j D_j= 356.571
Last swap: new 16 <-> 29 old; decreasing diss. by 0.311473
end{bswap2}: sky = 356.571
obj= 7.91099
- clara sample 10 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 5
[ntt=43, nunfs=0] .. nsel[1:44]= 2 4 8 11 12 13 22 24 25 26 27 29 31 34 36 37 39 41 42 44 47 48 49 50 53 57 58 59 60 61 62 63 64 66 67 70 71 72 75 77 79 82 84 87 -> dysta2();
clara -> s:= max{dys[1..946]} = 85.5102;
bswap2(*, s=85.5102), 1. BUILD: new repr. 16
new repr. 33
after build: medoids are 16 33
and min.dist dysma[1:n] are
21.2 9.9 7.81 2.83 6.4 1.41 4.24 5.39 8.49 7.07
13 14.1 1.41 33.9 8.49 0 5.66 14.1 8.49 9.9
16.3 6 4 7.21 14 1.41 4.47 5.66 5.66 5.66
15.6 5.66 0 4.24 1.41 11.3 0 22.6 9.06 7.07
1.41 2 21.2 12.7
--> sky = sum_j D_j= 368.598
Last swap: new 42 <-> 33 old; decreasing diss. by 0.115684
end{bswap2}: sky = 368.598
obj= 7.91099
- clara sample 11 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 30
[ntt=43, nunfs=0] .. nsel[1:44]= 2 7 8 9 10 16 17 20 22 23 24 26 27 29 34 35 36 37 38 41 44 47 48 49 50 51 52 53 54 59 60 61 63 64 67 68 69 75 76 78 80 81 84 85 -> dysta2();
clara -> s:= max{dys[1..946]} = 80.9938;
bswap2(*, s=80.9938), 1. BUILD: new repr. 18
new repr. 19
after build: medoids are 18 19
and min.dist dysma[1:n] are
21.2 17 7.28 5.66 1.41 4.24 14.1 5.1 4.24 0
8.54 7.07 12.6 14.1 33.9 5.66 8.49 0 0 14.1
9.9 13.6 5.83 5.83 9.9 4.47 4.24 17 11.3 5.66
5.66 5.66 5.66 3.16 1.41 4.24 8.49 8.25 0 2.83
17 5.1 21.2 1.41
--> sky = sum_j D_j= 362.717
end{bswap2}: sky = 362.717
obj= 8.0873
- clara sample 12 finding 1st... new k{ran}:
.. kall: T,
... nrx [0:1]= 37 64
... nsel[0:0]= 44
[ntt=43, nunfs=0] .. nsel[1:44]= 1 2 3 7 11 12 14 18 19 20 21 28 30 31 32 33 34 37 38 39 40 42 49 51 52 53 55 59 63 64 68 69 70 72 74 76 77 78 79 81 84 85 86 87 -> dysta2();
clara -> s:= max{dys[1..946]} = 80.9938;
bswap2(*, s=80.9938), 1. BUILD: new repr. 18
new repr. 30
after build: medoids are 18 30
and min.dist dysma[1:n] are
1.41 21.2 7.07 17 2.83 6.4 5.1 11.3 1.41 2.83
8.06 12.7 14.1 1.41 6 8.06 33.9 0 3.16 5.66
5.66 8.49 4 7.62 4.24 14 4.24 5.66 5.66 0
4.24 8.49 11.3 22.6 7.07 0 7.07 5.1 1.41 8.25
21.2 1.41 7.07 12.7
--> sky = sum_j D_j= 347.279
end{bswap2}: sky = 347.279
obj= 7.91099
C clara() -> best sample _found_ ; nbest[1:44] =
c(2,5,10,11,14,18,21,22,23,24,25,26,27,29,33,34,36,37,39,41,
42,43,45,47,49,50,55,57,58,60,62,64,67,68,71,73,75,77,79,82,
83,84,85,87)
resul(), black() and return() from C.
Warning message:
In clara(x, 2, samples = 12, trace = 3) :
Distance computations with NAs: using correct instead of pre-2016 wrong formula.
Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly
to suppress this warning.
> c2. <- clara(x, 2, samples = 12, trace = 1, correct.d=TRUE)
C clara(): (nsam,nran,n) = (44,12,87); 'large_sample',
- clara sample 1 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464
- clara sample 2 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464
- clara sample 3 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099
- clara sample 4 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464
- clara sample 5 [ntt=43, nunfs=0] -> dysta2(); obj= 7.92464
- clara sample 6 [ntt=43, nunfs=0] -> dysta2(); obj= 8.0873
- clara sample 7 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099
- clara sample 8 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099
- clara sample 9 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099
- clara sample 10 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099
- clara sample 11 [ntt=43, nunfs=0] -> dysta2(); obj= 8.0873
- clara sample 12 [ntt=43, nunfs=0] -> dysta2(); obj= 7.91099
C clara() -> best sample _found_ resul(), black() and return() from C.
> p2g <- pam(daisy(x,"gower"), k=2, trace = 3)
pam()'s bswap(*, s=0.785, pamonce=0): build 2 medoids:
new repr. 37
new repr. 64
after build: medoids are 37 64
and min.dist dysma[1:n] are
0.02 0.3 0.1 0.131 0.02 0.04 0.24 0.0898 0.08 0.0238
0.04 0.0738 0.02 0.056 0.046 0.06 0.2 0.16 0.02 0.0319
0.0817 0.06 0 0.0498 0.12 0.1 0.136 0.18 0.2 0.2
0.02 0.06 0.0576 0.48 0.08 0.12 0 0.0279 0.08 0.08
0.2 0.12 0.119 0.131 0.0938 0.1 0.185 0.06 0.04 0.0838
0.0717 0.06 0.0833 0.12 0.06 0.388 0.02 0.0438 0.08 0.08
0.08 0.22 0.08 0 0.06 0.06 0.02 0.06 0.12 0.16
0 0.32 0.04 0.0798 0.096 0 0.076 0.0398 0.02 0.24
0.0676 0.02 0.24 0.3 0.02 0.1 0.18
end{bswap()}, end{cstat()}
> if(FALSE) { ## disabled clara(*, "gower") for now (2023-11-30):
+ c2g <- clara(x, 2, samples = 12, sampsize=nrow(x), trace = 2, metric = "gower", pamLike=TRUE, correct.d=TRUE)
+ (icall <- which(names(c2) == "call"))
+ ## c2g and p2g are *quite* different !
+ table(c2g$clustering,
+ p2g$clustering)
+ ## 1 2
+ ## 1 40 32
+ ## 2 15 0 << not *one* pair of {2,2} !?!
+
+ stopifnot(exprs = {
+ all.equal(c2[-icall], c2.[-icall])
+ })
+ }# no "gower" for now
>
> data(xclara)
> suppressWarnings(RNGversion("3.5.0")) # back compatibility of results
> set.seed(123)
> xclara[sample(nrow(xclara), 50),] <- NA
> try( clara(xclara, k = 3) ) #-> "nice" error message {.. first 12 missing obs} :
Error in clara(xclara, k = 3) :
50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs --> omit them for clustering!
In addition: Warning message:
In clara(xclara, k = 3) :
Distance computations with NAs: using correct instead of pre-2016 wrong formula.
Use 'correct.d=FALSE' to get previous results or set 'correct.d=TRUE' explicitly
to suppress this warning.
> ## Error in clara(xclara, k = 3) :
> ## 50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs
> ## --> omit them for clustering!
>
> proc.time()
user system elapsed
0.149 0.039 0.205