492 lines
22 KiB
R
492 lines
22 KiB
R
|
## ----echo = FALSE, message = FALSE, warning=FALSE-----------------------------
|
||
|
library(dendextend)
|
||
|
library(knitr)
|
||
|
knitr::opts_chunk$set(
|
||
|
cache = TRUE,
|
||
|
dpi = 75,
|
||
|
fig.width = 6, fig.height = 6,
|
||
|
# dev = "svg",
|
||
|
# comment = "#>",
|
||
|
tidy = FALSE)
|
||
|
|
||
|
# https://stackoverflow.com/questions/24091735/why-pandoc-does-not-retrieve-the-image-file
|
||
|
# < ! -- rmarkdown v1 -->
|
||
|
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
iris <- datasets::iris
|
||
|
iris2 <- iris[,-5]
|
||
|
species_labels <- iris[,5]
|
||
|
library(colorspace) # get nice colors
|
||
|
species_col <- rev(rainbow_hcl(3))[as.numeric(species_labels)]
|
||
|
|
||
|
## ----fig.width=9, fig.height=9, fig.show='hold'-------------------------------
|
||
|
# Plot a SPLOM:
|
||
|
pairs(iris2, col = species_col,
|
||
|
lower.panel = NULL,
|
||
|
cex.labels=2, pch=19, cex = 1.2)
|
||
|
|
||
|
# Add a legend
|
||
|
par(xpd = TRUE)
|
||
|
legend(x = 0.05, y = 0.4, cex = 2,
|
||
|
legend = as.character(levels(species_labels)),
|
||
|
fill = unique(species_col))
|
||
|
par(xpd = NA)
|
||
|
|
||
|
|
||
|
## ----fig.height=3-------------------------------------------------------------
|
||
|
# http://blog.safaribooksonline.com/2014/03/31/mastering-parallel-coordinate-charts-r/
|
||
|
par(las = 1, mar = c(4.5, 3, 3, 2) + 0.1, cex = .8)
|
||
|
MASS::parcoord(iris2, col = species_col, var.label = TRUE, lwd = 2)
|
||
|
|
||
|
# Add Title
|
||
|
title("Parallel coordinates plot of the Iris data")
|
||
|
# Add a legend
|
||
|
par(xpd = TRUE)
|
||
|
legend(x = 1.75, y = -.25, cex = 1,
|
||
|
legend = as.character(levels(species_labels)),
|
||
|
fill = unique(species_col), horiz = TRUE)
|
||
|
par(xpd = NA)
|
||
|
|
||
|
|
||
|
## ----fig.height = 10, fig.width=7---------------------------------------------
|
||
|
|
||
|
d_iris <- dist(iris2) # method="man" # is a bit better
|
||
|
hc_iris <- hclust(d_iris, method = "complete")
|
||
|
iris_species <- rev(levels(iris[,5]))
|
||
|
|
||
|
library(dendextend)
|
||
|
dend <- as.dendrogram(hc_iris)
|
||
|
# order it the closest we can to the order of the observations:
|
||
|
dend <- rotate(dend, 1:150)
|
||
|
|
||
|
# Color the branches based on the clusters:
|
||
|
dend <- color_branches(dend, k=3) #, groupLabels=iris_species)
|
||
|
|
||
|
# Manually match the labels, as much as possible, to the real classification of the flowers:
|
||
|
labels_colors(dend) <-
|
||
|
rainbow_hcl(3)[sort_levels_values(
|
||
|
as.numeric(iris[,5])[order.dendrogram(dend)]
|
||
|
)]
|
||
|
|
||
|
# We shall add the flower type to the labels:
|
||
|
labels(dend) <- paste(as.character(iris[,5])[order.dendrogram(dend)],
|
||
|
"(",labels(dend),")",
|
||
|
sep = "")
|
||
|
# We hang the dendrogram a bit:
|
||
|
dend <- hang.dendrogram(dend,hang_height=0.1)
|
||
|
# reduce the size of the labels:
|
||
|
# dend <- assign_values_to_leaves_nodePar(dend, 0.5, "lab.cex")
|
||
|
dend <- set(dend, "labels_cex", 0.5)
|
||
|
# And plot:
|
||
|
par(mar = c(3,3,3,7))
|
||
|
plot(dend,
|
||
|
main = "Clustered Iris data set
|
||
|
(the labels give the true flower species)",
|
||
|
horiz = TRUE, nodePar = list(cex = .007))
|
||
|
legend("topleft", legend = iris_species, fill = rainbow_hcl(3))
|
||
|
|
||
|
#### BTW, notice that:
|
||
|
# labels(hc_iris) # no labels, because "iris" has no row names
|
||
|
# is.integer(labels(dend)) # this could cause problems...
|
||
|
# is.character(labels(dend)) # labels are no longer "integer"
|
||
|
|
||
|
## ----fig.width=7, fig.height=7------------------------------------------------
|
||
|
# Requires that the circlize package will be installed
|
||
|
par(mar = rep(0,4))
|
||
|
circlize_dendrogram(dend)
|
||
|
|
||
|
## ----echo=FALSE, eval=FALSE---------------------------------------------------
|
||
|
# # some_col_func <- function(n, top_color = "red4") {
|
||
|
# # seq_cols <- c("#F7FCFD", "#E0ECF4", "#BFD3E6", "#9EBCDA", "#8C96C6", "#8C6BB1",
|
||
|
# # "#88419D", "#810F7C")
|
||
|
# # c(colorRampPalette(seq_cols, bias =1)(n-1), top_color)
|
||
|
# # }
|
||
|
#
|
||
|
|
||
|
## ----fig.width=9, fig.height=9------------------------------------------------
|
||
|
|
||
|
some_col_func <- function(n) rev(colorspace::heat_hcl(n, c = c(80, 30), l = c(30, 90), power = c(1/5, 1.5)))
|
||
|
|
||
|
# scaled_iris2 <- iris2 %>% as.matrix %>% scale
|
||
|
# library(gplots)
|
||
|
gplots::heatmap.2(as.matrix(iris2),
|
||
|
main = "Heatmap for the Iris data set",
|
||
|
srtCol = 20,
|
||
|
dendrogram = "row",
|
||
|
Rowv = dend,
|
||
|
Colv = "NA", # this to make sure the columns are not ordered
|
||
|
trace="none",
|
||
|
margins =c(5,0.1),
|
||
|
key.xlab = "Cm",
|
||
|
denscol = "grey",
|
||
|
density.info = "density",
|
||
|
RowSideColors = rev(labels_colors(dend)), # to add nice colored strips
|
||
|
col = some_col_func
|
||
|
)
|
||
|
|
||
|
|
||
|
|
||
|
## ----cache = FALSE, eval = FALSE----------------------------------------------
|
||
|
# heatmaply::heatmaply(as.matrix(iris2),
|
||
|
# dendrogram = "row",
|
||
|
# Rowv = dend)
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
|
||
|
hclust_methods <- c("ward.D", "single", "complete", "average", "mcquitty",
|
||
|
"median", "centroid", "ward.D2")
|
||
|
iris_dendlist <- dendlist()
|
||
|
for(i in seq_along(hclust_methods)) {
|
||
|
hc_iris <- hclust(d_iris, method = hclust_methods[i])
|
||
|
iris_dendlist <- dendlist(iris_dendlist, as.dendrogram(hc_iris))
|
||
|
}
|
||
|
names(iris_dendlist) <- hclust_methods
|
||
|
iris_dendlist
|
||
|
|
||
|
## ----fig.width=8, fig.height=8------------------------------------------------
|
||
|
iris_dendlist_cor <- cor.dendlist(iris_dendlist)
|
||
|
iris_dendlist_cor
|
||
|
corrplot::corrplot(iris_dendlist_cor, "pie", "lower")
|
||
|
|
||
|
## ----fig.width=8, fig.height=8------------------------------------------------
|
||
|
iris_dendlist_cor_spearman <- cor.dendlist(iris_dendlist, method_coef = "spearman")
|
||
|
corrplot::corrplot(iris_dendlist_cor_spearman, "pie", "lower")
|
||
|
|
||
|
## ----fig.height=5-------------------------------------------------------------
|
||
|
# The `which` parameter allows us to pick the elements in the list to compare
|
||
|
iris_dendlist %>% dendlist(which = c(1,8)) %>% ladderize %>%
|
||
|
set("branches_k_color", k=3) %>%
|
||
|
# untangle(method = "step1side", k_seq = 3:20) %>%
|
||
|
# set("clear_branches") %>% #otherwise the single lines are not black, since they retain the previous color from the branches_k_color.
|
||
|
tanglegram(faster = TRUE) # (common_subtrees_color_branches = TRUE)
|
||
|
|
||
|
## ----fig.height=5-------------------------------------------------------------
|
||
|
# The `which` parameter allows us to pick the elements in the list to compare
|
||
|
iris_dendlist %>% dendlist(which = c(1,4)) %>% ladderize %>%
|
||
|
set("branches_k_color", k=2) %>%
|
||
|
# untangle(method = "step1side", k_seq = 3:20) %>%
|
||
|
tanglegram(faster = TRUE) # (common_subtrees_color_branches = TRUE)
|
||
|
|
||
|
## ----fig.height=5-------------------------------------------------------------
|
||
|
# The `which` parameter allows us to pick the elements in the list to compare
|
||
|
iris_dendlist %>% dendlist(which = c(1,4)) %>% ladderize %>%
|
||
|
# untangle(method = "step1side", k_seq = 3:20) %>%
|
||
|
set("rank_branches") %>%
|
||
|
tanglegram(common_subtrees_color_branches = TRUE)
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
length(unique(common_subtrees_clusters(iris_dendlist[[1]], iris_dendlist[[4]]))[-1])
|
||
|
# -1 at the end is because we are ignoring the "0" subtree, which indicates leaves that are singletons.
|
||
|
|
||
|
## ----fig.height=5-------------------------------------------------------------
|
||
|
iris_dendlist %>% dendlist(which = c(3,4)) %>% ladderize %>%
|
||
|
untangle(method = "step1side", k_seq = 2:6) %>%
|
||
|
set("branches_k_color", k=2) %>%
|
||
|
tanglegram(faster = TRUE) # (common_subtrees_color_branches = TRUE)
|
||
|
|
||
|
## ----fig.height=15------------------------------------------------------------
|
||
|
par(mfrow = c(4,2))
|
||
|
for(i in 1:8) {
|
||
|
iris_dendlist[[i]] %>% set("branches_k_color", k=2) %>% plot(axes = FALSE, horiz = TRUE)
|
||
|
title(names(iris_dendlist)[i])
|
||
|
}
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
iris_dendlist_cor2 <- cor.dendlist(iris_dendlist, method = "common")
|
||
|
iris_dendlist_cor2
|
||
|
|
||
|
## ----fig.width=5, fig.height=5------------------------------------------------
|
||
|
# corrplot::corrplot(iris_dendlist_cor2, "pie", "lower")
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
|
||
|
get_ordered_3_clusters <- function(dend) {
|
||
|
cutree(dend, k = 3)[order.dendrogram(dend)]
|
||
|
}
|
||
|
|
||
|
dend_3_clusters <- lapply(iris_dendlist, get_ordered_3_clusters)
|
||
|
|
||
|
compare_clusters_to_iris <- function(clus) {FM_index(clus, rep(1:3, each = 50), assume_sorted_vectors = TRUE)}
|
||
|
|
||
|
clusters_performance <- sapply(dend_3_clusters, compare_clusters_to_iris)
|
||
|
dotchart(sort(clusters_performance), xlim = c(0.7,1),
|
||
|
xlab = "Fowlkes-Mallows Index (from 0 to 1)",
|
||
|
main = "Perormance of clustering algorithms \n in detecting the 3 species",
|
||
|
pch = 19)
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
train <- dendextend::khan$train
|
||
|
test <- dendextend::khan$test
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
d_train <- train %>% dist %>% hclust %>% as.dendrogram
|
||
|
d_test <- test %>% dist %>% hclust %>% as.dendrogram
|
||
|
d_train_test <- dendlist(train = d_train, test = d_test)
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
d_train_test %>% cor.dendlist
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
d_train_test %>% cor.dendlist(method_coef = "spearman")
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
Bk_plot(d_train, d_test, k = 2:30, xlim = c(2,30))
|
||
|
|
||
|
## ----fig.width=8, fig.height=5------------------------------------------------
|
||
|
pre_tang_d_train_test <- d_train_test %>% ladderize %>% # untangle %>%
|
||
|
set("branches_k_color", k = 7)
|
||
|
train_branches_colors <- get_leaves_branches_col(pre_tang_d_train_test$train)
|
||
|
pre_tang_d_train_test %>% tanglegram(fast = TRUE, color_lines = train_branches_colors)
|
||
|
|
||
|
## ----echo = FALSE-------------------------------------------------------------
|
||
|
# dput(d_train_test_common)
|
||
|
d_train_test_common <- structure(list(train = structure(list(structure(list(structure(171L, label = "491565", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(178L, label = "505491", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 7.1369942952198),
|
||
|
structure(list(structure(list(structure(8L, label = "283315", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(9L, label = "897177", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 2.55936539399907),
|
||
|
structure(list(structure(list(structure(106L, label = "345553", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(112L, label = "307660", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 5.17910461856101),
|
||
|
structure(list(structure(list(structure(268L, label = "504791", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(306L, label = "782503", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 4.27052507661529),
|
||
|
structure(list(structure(list(structure(246L, label = "81518", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(290L, label = "280837", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 1.37572388944875),
|
||
|
structure(list(structure(list(structure(266L, label = "866694", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(277L, label = "811956", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 3.31301518861595),
|
||
|
structure(list(structure(273L, label = "842918", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(274L, label = "626555", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 2.71864544948399)), members = 4, midpoint = 1.5, height = 6.35097701381449)), members = 6, midpoint = 2, height = 8.7097033164167)), members = 8, midpoint = 2.25, height = 9.23807936424017)), members = 10, midpoint = 2.375, height = 11.6573350998416)), members = 12, midpoint = 2.4375, height = 17.5620766260713)), members = 14, midpoint = 2.46875, height = 30.2363452779928, class = "dendrogram"),
|
||
|
test = structure(list(structure(list(structure(list(structure(171L, label = "491565", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(178L, label = "505491", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 3.96666017450449),
|
||
|
structure(list(structure(list(structure(list(structure(268L, label = "504791", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(306L, label = "782503", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 2.31497882927685),
|
||
|
structure(list(structure(list(structure(266L, label = "866694", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(277L, label = "811956", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 1.75475236429532),
|
||
|
structure(list(structure(273L, label = "842918", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(274L, label = "626555", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 1.34617375921535)), members = 4, midpoint = 1.5, height = 2.76465021476497)), members = 6, midpoint = 2, height = 4.52927251774499),
|
||
|
structure(list(structure(list(structure(246L, label = "81518", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(290L, label = "280837", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 0.714433271901582),
|
||
|
structure(list(structure(8L, label = "283315", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(9L, label = "897177", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 1.71895552589356)), members = 4, midpoint = 1.5, height = 6.44143803354499)), members = 10, midpoint = 4.75, height = 7.736516720075)), members = 12, midpoint = 3.625, height = 11.0066972375913),
|
||
|
structure(list(structure(106L, label = "345553", members = 1L, height = 0, leaf = TRUE),
|
||
|
structure(112L, label = "307660", members = 1L, height = 0, leaf = TRUE)), members = 2L, midpoint = 0.5, height = 3.6486307417989)), members = 14, midpoint = 8.0625, height = 18.2331742971431, class = "dendrogram")), class = "dendlist", .Names = c("train",
|
||
|
"test"))
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
# This was calculated before
|
||
|
# d_train_test_common <- d_train_test %>% prune_common_subtrees.dendlist
|
||
|
# d_train_test_common
|
||
|
d_train_test_common %>% untangle %>% tanglegram(common_subtrees_color_branches = TRUE)
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
d_train_test %>% nleaves
|
||
|
d_train_test_common %>% nleaves
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
votes.repub <- cluster::votes.repub
|
||
|
|
||
|
## ----fig.height=5-------------------------------------------------------------
|
||
|
years <- as.numeric(gsub("X", "", colnames(votes.repub)))
|
||
|
|
||
|
par(las = 2, mar = c(4.5, 3, 3, 2) + 0.1, cex = .8)
|
||
|
# MASS::parcoord(votes.repub, var.label = FALSE, lwd = 1)
|
||
|
matplot(1L:ncol(votes.repub), t(votes.repub), type = "l", col = 1, lty = 1,
|
||
|
axes = F, xlab = "", ylab = "")
|
||
|
axis(1, at = seq_along(years), labels = years)
|
||
|
axis(2)
|
||
|
# Add Title
|
||
|
title("Votes for Republican Candidate\n in Presidential Elections \n (each line is a country - over the years)")
|
||
|
|
||
|
## ----fig.width=9, fig.height=9------------------------------------------------
|
||
|
arcsin_transformation <- function(x) asin(x/100)
|
||
|
|
||
|
dend_NA <- votes.repub %>% is.na %>%
|
||
|
dist %>% hclust %>% as.dendrogram %>% ladderize
|
||
|
|
||
|
dend <- votes.repub %>% arcsin_transformation %>%
|
||
|
dist %>% hclust(method = "com") %>% as.dendrogram %>%
|
||
|
rotate(labels(dend_NA)) %>%
|
||
|
color_branches(k=3)
|
||
|
|
||
|
# some_col_func <- function(n) rev(colorspace::heat_hcl(n, c = c(80, 30), l = c(30, 90), power = c(1/5, 1.5)))
|
||
|
some_col_func <- colorspace::diverge_hcl
|
||
|
|
||
|
|
||
|
# par(mar = c(3,3,3,3))
|
||
|
# library(gplots)
|
||
|
gplots::heatmap.2(as.matrix(votes.repub),
|
||
|
main = "Votes for\n Republican Presidential Candidate\n (clustered using complete)",
|
||
|
srtCol = 60,
|
||
|
dendrogram = "row",
|
||
|
Rowv = dend,
|
||
|
Colv = "NA", # this to make sure the columns are not ordered
|
||
|
trace="none",
|
||
|
margins =c(3,6),
|
||
|
key.xlab = "% Votes for Republican\n Presidential Candidate",
|
||
|
labCol = years,
|
||
|
denscol = "grey",
|
||
|
density.info = "density",
|
||
|
col = some_col_func
|
||
|
)
|
||
|
# RowSideColors = rev(labels_colors(dend)), # to add nice colored strips
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
|
||
|
hclust_methods <- c("ward.D", "single", "complete", "average", "mcquitty",
|
||
|
"median", "centroid", "ward.D2")
|
||
|
votes.repub_dendlist <- dendlist()
|
||
|
|
||
|
for(i in seq_along(hclust_methods)) {
|
||
|
tmp_dend <- votes.repub %>% arcsin_transformation %>% dist %>% hclust(method = hclust_methods[i]) %>% as.dendrogram
|
||
|
votes.repub_dendlist <- dendlist(votes.repub_dendlist, tmp_dend)
|
||
|
}
|
||
|
names(votes.repub_dendlist) <- hclust_methods
|
||
|
# votes.repub_dendlist
|
||
|
|
||
|
## ----fig.width=8, fig.height=8------------------------------------------------
|
||
|
corrplot::corrplot(cor.dendlist(votes.repub_dendlist), "pie", "lower")
|
||
|
|
||
|
## ----echo=FALSE, fig.width=9, fig.height=9------------------------------------
|
||
|
arcsin_transformation <- function(x) asin(x/100)
|
||
|
|
||
|
dend_NA <- votes.repub %>% is.na %>%
|
||
|
dist %>% hclust %>% as.dendrogram %>% ladderize
|
||
|
|
||
|
dend <- votes.repub %>% arcsin_transformation %>%
|
||
|
dist %>% hclust(method = "ave") %>% as.dendrogram %>%
|
||
|
rotate(labels(dend_NA)) %>%
|
||
|
color_branches(k=3)
|
||
|
|
||
|
# some_col_func <- function(n) rev(colorspace::heat_hcl(n, c = c(80, 30), l = c(30, 90), power = c(1/5, 1.5)))
|
||
|
some_col_func <- colorspace::diverge_hcl
|
||
|
|
||
|
|
||
|
# par(mar = c(3,3,3,3))
|
||
|
# library(gplots)
|
||
|
gplots::heatmap.2(as.matrix(votes.repub),
|
||
|
main = "Votes for\n Republican Presidential Candidate\n (clustered using average)",
|
||
|
srtCol = 60,
|
||
|
dendrogram = "row",
|
||
|
Rowv = dend,
|
||
|
Colv = "NA", # this to make sure the columns are not ordered
|
||
|
trace="none",
|
||
|
margins =c(3,6),
|
||
|
key.xlab = "% Votes for Republican\n Presidential Candidate",
|
||
|
labCol = years,
|
||
|
denscol = "grey",
|
||
|
density.info = "density",
|
||
|
col = some_col_func
|
||
|
)
|
||
|
# RowSideColors = rev(labels_colors(dend)), # to add nice colored strips
|
||
|
|
||
|
## ----echo=FALSE---------------------------------------------------------------
|
||
|
|
||
|
ord1 <- c("North Carolina", "Virginia", "Tennessee", "Kentucky", "Maryland",
|
||
|
"Delaware", "Oklahoma", "Missouri", "New Mexico", "Oregon", "Washington",
|
||
|
"California", "West Virginia", "Hawaii", "Nevada", "Arizona",
|
||
|
"Montana", "Idaho", "Wyoming", "Utah", "Colorado", "Alaska",
|
||
|
"Illinois", "New York", "Indiana", "Ohio", "Connecticut", "New Hampshire",
|
||
|
"New Jersey", "Pennsylvania", "Iowa", "South Dakota", "North Dakota",
|
||
|
"Wisconsin", "Minnesota", "Nebraska", "Kansas", "Maine", "Michigan",
|
||
|
"Massachusetts", "Rhode Island", "Vermont", "Alabama", "Georgia",
|
||
|
"Louisiana", "Arkansas", "Florida", "Texas", "South Carolina",
|
||
|
"Mississippi")
|
||
|
|
||
|
ord2 <- c("North Carolina", "Virginia", "Tennessee", "Oklahoma", "Kentucky",
|
||
|
"Maryland", "Delaware", "Missouri", "New Mexico", "West Virginia",
|
||
|
"Oregon", "Washington", "California", "Nevada", "Arizona", "Montana",
|
||
|
"Colorado", "Alaska", "Idaho", "Wyoming", "Utah", "Hawaii", "Maine",
|
||
|
"Illinois", "New York", "New Jersey", "Indiana", "Ohio", "Connecticut",
|
||
|
"New Hampshire", "Pennsylvania", "Michigan", "Iowa", "South Dakota",
|
||
|
"North Dakota", "Wisconsin", "Minnesota", "Massachusetts", "Rhode Island",
|
||
|
"Nebraska", "Kansas", "Vermont", "Alabama", "Georgia", "Louisiana",
|
||
|
"Arkansas", "Florida", "Texas", "South Carolina", "Mississippi"
|
||
|
)
|
||
|
|
||
|
# dput(lapply(dends, labels)[[2]])
|
||
|
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
dend_com <- votes.repub %>% arcsin_transformation %>%
|
||
|
dist %>% hclust(method = "com") %>% as.dendrogram %>%
|
||
|
rotate(labels(dend_NA)) %>%
|
||
|
color_branches(k=3) # %>% ladderize
|
||
|
dend_ave <- votes.repub %>% arcsin_transformation %>%
|
||
|
dist %>% hclust(method = "ave") %>% as.dendrogram %>%
|
||
|
rotate(labels(dend_NA)) %>%
|
||
|
color_branches(k=3) # %>% ladderize
|
||
|
|
||
|
# The orders were predefined after using untangle("step2side")
|
||
|
# They are omitted here to save running time.
|
||
|
dend_com <- rotate(dend_com, ord1)
|
||
|
dend_ave <- rotate(dend_ave, ord2)
|
||
|
|
||
|
dends <- dendlist(complete = dend_com, average = dend_ave) # %>% untangle("step2side")
|
||
|
dends %>% tanglegram(margin_inner = 7)
|
||
|
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
animals <- cluster::animals
|
||
|
|
||
|
colnames(animals) <- c("warm-blooded",
|
||
|
"can fly",
|
||
|
"vertebrate",
|
||
|
"endangered",
|
||
|
"live in groups",
|
||
|
"have hair")
|
||
|
|
||
|
## ----fig.width=9, fig.height=9------------------------------------------------
|
||
|
|
||
|
dend_r <- animals %>% dist(method = "man") %>% hclust(method = "ward.D") %>% as.dendrogram %>% ladderize %>%
|
||
|
color_branches(k=4)
|
||
|
|
||
|
dend_c <- t(animals) %>% dist(method = "man") %>% hclust(method = "com") %>% as.dendrogram %>% ladderize%>%
|
||
|
color_branches(k=3)
|
||
|
|
||
|
|
||
|
# some_col_func <- function(n) rev(colorspace::heat_hcl(n, c = c(80, 30), l = c(30, 90), power = c(1/5, 1.5)))
|
||
|
# some_col_func <- colorspace::diverge_hcl
|
||
|
# some_col_func <- colorspace::sequential_hcl
|
||
|
some_col_func <- function(n) (colorspace::diverge_hcl(n, h = c(246, 40), c = 96, l = c(65, 90)))
|
||
|
|
||
|
|
||
|
|
||
|
# par(mar = c(3,3,3,3))
|
||
|
# library(gplots)
|
||
|
gplots::heatmap.2(as.matrix(animals-1),
|
||
|
main = "Attributes of Animals",
|
||
|
srtCol = 35,
|
||
|
Rowv = dend_r,
|
||
|
Colv = dend_c,
|
||
|
trace="row", hline = NA, tracecol = "darkgrey",
|
||
|
margins =c(6,3),
|
||
|
key.xlab = "no / yes",
|
||
|
denscol = "grey",
|
||
|
density.info = "density",
|
||
|
col = some_col_func
|
||
|
)
|
||
|
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
|
||
|
hclust_methods <- c("ward.D", "single", "complete", "average", "mcquitty",
|
||
|
"median", "centroid", "ward.D2")
|
||
|
animals_dendlist <- dendlist()
|
||
|
|
||
|
for(i in seq_along(hclust_methods)) {
|
||
|
tmp_dend <- animals %>% dist(method = "man") %>%
|
||
|
hclust(method = hclust_methods[i]) %>% as.dendrogram
|
||
|
animals_dendlist <- dendlist(animals_dendlist, tmp_dend)
|
||
|
}
|
||
|
names(animals_dendlist) <- hclust_methods
|
||
|
# votes.repub_dendlist
|
||
|
|
||
|
## ----fig.width=8, fig.height=8------------------------------------------------
|
||
|
cophenetic_cors <- cor.dendlist(animals_dendlist)
|
||
|
corrplot::corrplot(cophenetic_cors, "pie", "lower")
|
||
|
|
||
|
## -----------------------------------------------------------------------------
|
||
|
remove_median <- dendlist(animals_dendlist, which = c(1:8)[-6] )
|
||
|
FM_cors <- cor.dendlist(remove_median, method = "FM_index", k = 4)
|
||
|
corrplot::corrplot(FM_cors, "pie", "lower")
|
||
|
|