2025-01-12 04:36:52 +08:00

865 lines
34 KiB
R

## ----echo = FALSE, message = FALSE--------------------------------------------
library(dendextend)
library(knitr)
knitr::opts_chunk$set(
cache = TRUE,
dpi = 75,
fig.width = 6, fig.height = 6,
comment = "#>",
tidy = FALSE)
# https://stackoverflow.com/questions/24091735/why-pandoc-does-not-retrieve-the-image-file
# < ! -- rmarkdown v1 -->
## ----echo=FALSE, warning=FALSE, fig.align='center', fig.width=7, fig.height=7----
suppressMessages(library(dendextend))
library(colorspace)
dend1 <- c(1:5) %>% # take some data
dist %>% # calculate a distance matrix,
hclust(method = "average") %>% # on it compute hierarchical clustering using the "average" method,
as.dendrogram # and lastly, turn that object into a dendrogram.
dend2 <- c(1:5) %>% # take some data
dist %>% # calculate a distance matrix,
hclust(method = "single") %>% # on it compute hierarchical clustering using the "average" method,
as.dendrogram # and lastly, turn that object into a dendrogram.
dend1 <- rotate(dend1, order = c(as.character(5:1)))
labels(dend1) <- rev(c("dendextend", "let's u easily", "control","(and compare) your", "dendrograms"))
labels(dend2) <- rev(c("let's u easily","dendextend" ,"control","(and compare) your", "dendrograms"))
dend1 <- dend1 %>%
color_labels %>%
set("labels_cex", c(2.2,1.4)) %>%
set("branches_lwd", c(4,1,4)) %>%
set("branches_lty", c(1,2,1)) %>%
set("nodes_pch", c(NA,19,NA)) %>%
set("nodes_cex", c(NA,2.5,NA)) %>%
set("nodes_col", c(NA,3,NA)) %>%
hang.dendrogram # %>% plot
dend2 <- color_branches(dend2)
# dend2 <- color_labels(dend2)
tanglegram(dendlist(dend1, dend2), margin_inner = 9,
color_lines = c(rep("darkgreen", 3) , rep("darkred",2)),
sub= paste("Entanglement:", round(entanglement(dendlist(dend1, dend2)),2)), cex_sub = 1.5
)
# dend2 %>% color_branches %>% plot
# dend2 %>% color_branches(k=3) %>% plot # nice, returns the tree as is...
# dend2 %>% color_labels %>% plot
# cutree(dend2,3)
## ----echo=FALSE---------------------------------------------------------------
c(person("Tal", "Galili", role = c("aut", "cre", "cph"), email =
"tal.galili@gmail.com", comment = "https://www.r-statistics.com"),
person("Gavin", "Simpson", role = "ctb"), person("Gregory","Jefferis", role
= "ctb", email = "jefferis@gmail.com",
comment ="imported code from his dendroextras package"),
person("Marco", "Gallotta", role = "ctb", comment =
"a.k.a: marcog") , person("Johan", "Renaudie", role = "ctb", comment =
"https://github.com/plannapus"), person("R core team", role = "ctb",
comment = "Thanks for the Infastructure, and code in the examples"),
person("Kurt", "Hornik", role = "ctb"), person("Uwe", "Ligges",
role = "ctb"), person("Andrej-Nikolai", "Spiess", role = "ctb"),
person("Steve", "Horvath",email = "SHorvath@mednet.ucla.edu", role =
"ctb"), person("Peter", "Langfelder",email = "Peter.Langfelder@gmail.com",
role = "ctb"), person("skullkey", role = "ctb"), person("Mark",
"Van Der Loo", email = "mark.vanderloo@gmail.com", comment =
"https://github.com/markvanderloo d3dendrogram", role = "ctb"),
person("Yoav", "Benjamini", role = "ths"))
## ----eval = FALSE-------------------------------------------------------------
# d1 <- c(1:5) # some data
# d2 <- dist(d1)
# d3 <- hclust(d2, method = "average")
# dend <- as.dendrogram(d3)
## ----eval=FALSE---------------------------------------------------------------
# dend <- as.dendrogram(hclust(dist(c(1:5)), method = "average"))
## ----eval = FALSE-------------------------------------------------------------
# dend <- c(1:5) %>% # take the a vector from 1 to 5
# dist %>% # calculate a distance matrix,
# hclust(method = "average") %>% # on it compute hierarchical clustering using the "average" method,
# as.dendrogram # and lastly, turn that object into a dendrogram.
## ----fig.width=4, fig.height=3------------------------------------------------
# Create a dend:
dend <- 1:2 %>% dist %>% hclust %>% as.dendrogram
# and plot it:
dend %>% plot
## -----------------------------------------------------------------------------
dend %>% unclass %>% str
dend %>% class
## ----fig.width=4, fig.height=3------------------------------------------------
# Create a dend:
dend <- 1:5 %>% dist %>% hclust %>% as.dendrogram
# Plot it:
dend %>% plot
## -----------------------------------------------------------------------------
dend %>% labels # get the labels of the tree
dend %>% nleaves # get the number of leaves of the tree
dend %>% nnodes # get the number of nodes in the tree (including leaves)
dend %>% head # A combination of "str" with "head"
## ----echo=FALSE, fig.height=5-------------------------------------------------
# Create a dend:
dend <- 1:5 %>% dist %>% hclust %>% as.dendrogram
# get_nodes_xy(dend)
# polygon(get_nodes_xy(dend), col = 2)
plot(dend,
leaflab = "none", # axes = FALSE, # no labels or y axis
main = "Nodes order when using \nDepth-first search in a dendrogram")
xy <- get_nodes_xy(dend)
for(i in 1:(nrow(xy)-1)) {
arrows(xy[i,1], xy[i,2], angle = 17,
length = .3,
xy[i+1,1], xy[i+1,2],
lty = 1, col = 3, lwd = 1.5)
}
points(xy, pch = 19, cex = 3)
text(xy, labels = 1:nnodes(dend),cex = 1.2, col = "white") #, adj = c(0.4,0.4))
## -----------------------------------------------------------------------------
# Create a dend:
dend <- 1:5 %>% dist %>% hclust %>% as.dendrogram
# Get various attributes
dend %>% get_nodes_attr("height") # node's height
dend %>% hang.dendrogram %>% get_nodes_attr("height") # node's height (after raising the leaves)
dend %>% get_nodes_attr("members") # number of members (leaves) under that node
dend %>% get_nodes_attr("members", id = c(2,5)) # number of members for nodes 2 and 5
dend %>% get_nodes_attr("midpoint") # how much "left" is this node from its left-most child's location
dend %>% get_nodes_attr("leaf") # is this node a leaf
dend %>% get_nodes_attr("label") # what is the label on this node
dend %>% get_nodes_attr("nodePar") # empty (for now...)
dend %>% get_nodes_attr("edgePar") # empty (for now...)
## ----fig.show='hold', fig.width=8, fig.height=3-------------------------------
dend13 <- c(1:3) %>% # take some data
dist %>% # calculate a distance matrix,
hclust(method = "average") %>% # on it compute hierarchical clustering using the "average" method,
as.dendrogram # and lastly, turn that object into a dendrogram.
# same, but for 5 leaves:
dend15 <- c(1:5) %>% dist %>% hclust(method = "average") %>% as.dendrogram
par(mfrow = c(1,2))
dend13 %>% plot(main="dend13")
dend15 %>% plot(main="dend15")
# we could have also used plot(dend)
## -----------------------------------------------------------------------------
# get the labels:
dend15 %>% labels
# this is just like labels(dend)
## -----------------------------------------------------------------------------
# change the labels, and then print them:
dend15 %>% set("labels", c(111:115)) %>% labels
# could also be done using:
# labels(dend) <- c(111:115)
## -----------------------------------------------------------------------------
dend15 %>% labels
dend15 %>% set("labels_to_char") %>% labels
## ----fig.width=8, fig.height=3------------------------------------------------
par(mfrow = c(1,2))
dend15 %>% set("labels_col", "blue") %>% plot(main = "Change label's color") # change color
dend15 %>% set("labels_cex", 2) %>% plot(main = "Change label's size") # change color
## ----fig.width=8, fig.height=3------------------------------------------------
# Produce a more complex dendrogram:
dend15_2 <- dend15 %>%
set("labels", c(111:115)) %>% # change labels
set("labels_col", c(1,2,3)) %>% # change color
set("labels_cex", c(2,1)) # change size
par(mfrow = c(1,2))
dend15 %>% plot(main = "Before")
dend15_2 %>% plot(main = "After")
## -----------------------------------------------------------------------------
# looking at only the left-most node of the "after tree":
dend15_2[[1]][[1]] %>% unclass %>% str
# looking at only the nodePar attributes in this sub-tree:
dend15_2[[1]][[1]] %>% get_nodes_attr("nodePar")
## ----fig.width=8, fig.height=3------------------------------------------------
par(mfrow = c(1,2))
dend15 %>% set("labels_cex", 2) %>% set("labels_col", value = c(3,4)) %>%
plot(main = "Recycles color \nfrom left to right")
dend15 %>% set("labels_cex", 2) %>% set("labels_col", value = c(3,4), k=2) %>%
plot(main = "Color labels \nper cluster")
abline(h = 2, lty = 2)
## ----fig.width=10, fig.height=6-----------------------------------------------
par(mfrow = c(2,3))
dend13 %>% set("nodes_pch", 19) %>% plot(main = "(1) Show the\n nodes (as a dot)") #1
dend13 %>% set("nodes_pch", 19) %>% set("nodes_cex", 2) %>%
plot(main = "(2) Show (larger)\n nodes") #2
dend13 %>% set("nodes_pch", 19) %>% set("nodes_cex", 2) %>% set("nodes_col", 3) %>%
plot(main = "(3) Show (larger+colored)\n nodes") #3
dend13 %>% set("leaves_pch", 21) %>% plot(main = "(4) Show the leaves\n (as empty circles)") #4
dend13 %>% set("leaves_pch", 21) %>% set("leaves_cex", 2) %>%
plot(main = "(5) Show (larger)\n leaf circles") #5
dend13 %>%
set("leaves_pch", 21) %>%
set("leaves_bg", "gold") %>%
set("leaves_cex", 2) %>%
set("leaves_col", "darkred") %>%
plot(main = "(6) Show (larger+colored+filled)\n leaves") #6
## ----fig.width=8, fig.height=4------------------------------------------------
par(mfrow = c(1,2))
dend15 %>% set("nodes_pch", c(19,1,4)) %>% set("nodes_cex", c(2,1,2)) %>% set("nodes_col", c(3,4)) %>%
plot(main = "Adjust nodes")
dend15 %>% set("leaves_pch", c(19,1,4)) %>% set("leaves_cex", c(2,1,2)) %>% set("leaves_col", c(3,4)) %>%
plot(main = "Adjust nodes\n(but only for leaves)")
## -----------------------------------------------------------------------------
dend15 %>% set("nodes_pch", c(19,1,4)) %>%
set("nodes_cex", c(2,1,2)) %>% set("nodes_col", c(3,4)) %>% get_nodes_attr("nodePar")
## ----fig.width=10, fig.height=3-----------------------------------------------
par(mfrow = c(1,3))
dend13 %>% set("leaves_pch", 19) %>% set("leaves_cex", 2) %>% set("leaves_col", 2) %>% # adjust the leaves
hang.dendrogram %>% # hang the leaves
plot(main = "Hanging a tree")
dend13 %>% set("leaves_pch", 19) %>% set("leaves_cex", 2) %>% set("leaves_col", 2) %>% # adjust the leaves
hang.dendrogram(hang_height = .6) %>% # hang the leaves (at some height)
plot(main = "Hanging a tree (but lower)")
dend13 %>% set("leaves_pch", 19) %>% set("leaves_cex", 2) %>% set("leaves_col", 2) %>% # adjust the leaves
hang.dendrogram %>% # hang the leaves
hang.dendrogram(hang = -1) %>% # un-hanging the leaves
plot(main = "Not hanging a tree")
## -----------------------------------------------------------------------------
dend13 %>% get_leaves_attr("height")
dend13 %>% hang.dendrogram %>% get_leaves_attr("height")
## ----fig.width=10, fig.height=3-----------------------------------------------
par(mfrow = c(1,3))
dend13 %>% plot(main = "First tree", ylim = c(0,3))
dend13 %>%
raise.dendrogram (-1) %>%
plot(main = "One point lower", ylim = c(0,3))
dend13 %>%
raise.dendrogram (1) %>%
plot(main = "One point higher", ylim = c(0,3))
## ----fig.width=10, fig.height=3-----------------------------------------------
par(mfrow = c(1,3))
dend13 %>% set("branches_lwd", 4) %>% plot(main = "Thick branches")
dend13 %>% set("branches_lty", 3) %>% plot(main = "Dashed branches")
dend13 %>% set("branches_col", 2) %>% plot(main = "Red branches")
## ----fig.width=4, fig.height=3------------------------------------------------
# Produce a more complex dendrogram:
dend15 %>%
set("branches_lwd", c(4,1)) %>%
set("branches_lty", c(1,1,3)) %>%
set("branches_col", c(1,2,3)) %>%
plot(main = "Complex branches", edge.root = TRUE)
## ----fig.width=8, fig.height=3------------------------------------------------
par(mfrow = c(1,2))
dend15 %>% set("branches_k_color", k = 3) %>% plot(main = "Nice defaults")
dend15 %>% set("branches_k_color", value = 3:1, k = 3) %>%
plot(main = "Controlling branches' colors\n(via clustering)")
# This is like using the `color_branches` function
## ----fig.width=8, fig.height=3------------------------------------------------
par(mfrow = c(1,2))
dend15 %>% set("by_labels_branches_col", value = c(1,4)) %>%
plot(main = "Adjust the branch\n if ALL (default) of its\n labels are in the list")
dend15 %>% set("by_labels_branches_col", value = c(1,4), type = "any") %>%
plot(main = "Adjust the branch\n if ANY of its\n labels are in the list")
## ----fig.width=10, fig.height=3-----------------------------------------------
# Using "Inf" in "TF_values" means to let the parameters stay as they are.
par(mfrow = c(1,3))
dend15 %>% set("by_labels_branches_col", value = c(1,4), TF_values = c(3,Inf)) %>%
plot(main = "Change colors")
dend15 %>% set("by_labels_branches_lwd", value = c(1,4), TF_values = c(8,1)) %>%
plot(main = "Change line width")
dend15 %>% set("by_labels_branches_lty", value = c(1,4), TF_values = c(3,Inf)) %>%
plot(main = "Change line type")
## ----fig.width=8, fig.height=3------------------------------------------------
dat <- iris[1:20,-5]
hca <- hclust(dist(dat))
hca2 <- hclust(dist(dat), method = "single")
dend <- as.dendrogram(hca)
dend2 <- as.dendrogram(hca2)
par(mfrow = c(1,3))
dend %>% highlight_branches_col %>% plot(main = "Coloring branches")
dend %>% highlight_branches_lwd %>% plot(main = "Emphasizing line-width")
dend %>% highlight_branches %>% plot(main = "Emphasizing color\n and line-width")
## ----fig.width=8, fig.height=4------------------------------------------------
library(viridis)
par(mfrow = c(1,3))
dend %>% highlight_branches_col %>% plot(main = "Coloring branches \n (default is reversed viridis)")
dend %>% highlight_branches_col(viridis(100)) %>% plot(main = "It is better to use \n lighter colors in the leaves")
dend %>% highlight_branches_col(rev(magma(1000))) %>% plot(main = "The magma color pallatte\n is also good")
dl <- dendlist(dend, dend2)
tanglegram(dl, sort = TRUE, common_subtrees_color_lines = FALSE, highlight_distinct_edges = FALSE, highlight_branches_lwd = FALSE)
tanglegram(dl)
tanglegram(dl, fast = TRUE)
dl <- dendlist(highlight_branches(dend), highlight_branches(dend2))
tanglegram(dl, sort = TRUE, common_subtrees_color_lines = FALSE, highlight_distinct_edges = FALSE)
# dend %>% set("highlight_branches_col") %>% plot
dl <- dendlist(dend, dend2) %>% set("highlight_branches_col")
tanglegram(dl, sort = TRUE, common_subtrees_color_lines = FALSE, highlight_distinct_edges = FALSE)
## ----fig.width=10, fig.height=3-----------------------------------------------
par(mfrow = c(1,3))
dend15 %>%
set("labels_colors") %>%
set("branches_k_color") %>%
plot(main = "First tree")
dend15 %>%
set("labels_colors") %>%
set("branches_k_color") %>%
rotate(as.character(5:1)) %>% #rotate to match labels new order
plot(main = "Rotated tree\n based on labels")
dend15 %>%
set("labels_colors") %>%
set("branches_k_color") %>%
rotate(5:1) %>% # the fifth label to go first is "4"
plot(main = "Rotated tree\n based on order")
## ----fig.width=12, fig.height=6-----------------------------------------------
dend110 <- c(1, 3:5, 7,9,10) %>% dist %>% hclust(method = "average") %>%
as.dendrogram %>% color_labels %>% color_branches
par(mfrow = c(1,3))
dend110 %>% plot(main = "Original tree")
dend110 %>% sort %>% plot(main = "labels sort")
dend110 %>% sort(type = "nodes") %>% plot(main = "nodes (ladderize) sort")
## ----fig.width=10, fig.height=3-----------------------------------------------
par(mfrow = c(1,3))
dend15 %>% plot(main = "First tree", ylim = c(0,3))
dend15 %>%
unbranch %>%
plot(main = "Unbranched tree", ylim = c(0,3))
dend15 %>%
unbranch(2) %>%
plot(main = "Unbranched tree (2)", ylim = c(0,3))
## ----fig.width=7, fig.height=3------------------------------------------------
par(mfrow = c(1,2))
dend15 %>% set("labels_colors") %>%
plot(main = "First tree", ylim = c(0,3))
dend15 %>% set("labels_colors") %>%
prune(c("1","5")) %>%
plot(main = "Prunned tree", ylim = c(0,3))
## ----fig.width=7, fig.height=3------------------------------------------------
par(mfrow = c(1,2))
dend_intersected <- intersect_trees(dend13, dend15)
dend_intersected[[1]] %>% plot
dend_intersected[[2]] %>% plot
## -----------------------------------------------------------------------------
# ladderize is like sort(..., type = "node")
dend <- iris[1:5,-5] %>% dist %>% hclust %>% as.dendrogram
par(mfrow = c(1,3))
dend %>% ladderize %>% plot(horiz = TRUE); abline(v = .2, col = 2, lty = 2)
dend %>% collapse_branch(tol = 0.2) %>% ladderize %>% plot(horiz = TRUE)
dend %>% collapse_branch(tol = 0.2) %>% ladderize %>% hang.dendrogram(hang = 0) %>% plot(horiz = TRUE)
## ----fig.width=6, fig.height=3------------------------------------------------
layout(t(c(1,1,1,2,2)))
dend15 %>% set("branches_k_color") %>% plot
dend15 %>% rect.dendrogram(k=3,
border = 8, lty = 5, lwd = 2)
dend15 %>% set("branches_k_color") %>% plot(horiz = TRUE)
dend15 %>% rect.dendrogram(k=3, horiz = TRUE,
border = 8, lty = 5, lwd = 2)
## ----fig.width=4, fig.height=4------------------------------------------------
is_odd <- ifelse(labels(dend15) %% 2, 2,3)
is_345 <- ifelse(labels(dend15) > 2, 3,4)
is_12 <- ifelse(labels(dend15) <= 2, 3,4)
k_3 <- cutree(dend15,k = 3, order_clusters_as_data = FALSE)
# The FALSE above makes sure we get the clusters in the order of the
# dendrogram, and not in that of the original data. It is like:
# cutree(dend15, k = 3)[order.dendrogram(dend15)]
the_bars <- cbind(is_odd, is_345, is_12, k_3)
the_bars[the_bars==2] <- 8
dend15 %>% plot
colored_bars(colors = the_bars, dend = dend15, sort_by_labels_order = FALSE)
# we use sort_by_labels_order = FALSE since "the_bars" were set based on the
# labels order. The more common use case is when the bars are based on a second variable
# from the same data.frame as dend was created from. Thus, the default
# sort_by_labels_order = TRUE would make more sense.
## -----------------------------------------------------------------------------
dend_mtcars <- mtcars[, c("mpg", "disp")] %>% dist %>% hclust(method = "average") %>% as.dendrogram
par(mar = c(10,2,1,1))
plot(dend_mtcars)
the_bars <- ifelse(mtcars$am, "grey", "gold")
colored_bars(colors = the_bars, dend = dend_mtcars, rowLabels = "am")
## -----------------------------------------------------------------------------
# Create a complex dend:
dend <- iris[1:30,-5] %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k=3) %>% set("branches_lwd", c(1.5,1,1.5)) %>%
set("branches_lty", c(1,1,3,1,1,2)) %>%
set("labels_colors") %>% set("labels_cex", c(.9,1.2)) %>%
set("nodes_pch", 19) %>% set("nodes_col", c("orange", "black", "plum", NA))
# plot the dend in usual "base" plotting engine:
plot(dend)
# Now let's do it in ggplot2 :)
ggd1 <- as.ggdend(dend)
library(ggplot2)
# the nodes are not implemented yet.
ggplot(ggd1) # reproducing the above plot in ggplot2 :)
ggplot(ggd1, horiz = TRUE, theme = NULL) # horiz plot (and let's remove theme) in ggplot2
# Adding some extra spice to it...
# creating a radial plot:
# ggplot(ggd1) + scale_y_reverse(expand = c(0.2, 0)) + coord_polar(theta="x")
# The text doesn't look so great, so let's remove it:
ggplot(ggd1, labels = FALSE) + scale_y_reverse(expand = c(0.2, 0)) + coord_polar(theta="x")
## ----fig.width=7, fig.height=3------------------------------------------------
if(require(DendSer)) {
par(mfrow = c(1,2))
DendSer.dendrogram(dend15)
dend15 %>% color_branches %>% plot
dend15 %>% color_branches %>% rotate_DendSer %>% plot
}
## ----message=FALSE, fig.width=7, fig.height=7---------------------------------
library(gplots)
x <- as.matrix(datasets::mtcars)
heatmap.2(x)
# now let's spice up the dendrograms a bit:
Rowv <- x %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k = 3) %>% set("branches_lwd", 4) %>%
ladderize
# rotate_DendSer(ser_weight = dist(x))
Colv <- x %>% t %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k = 2) %>% set("branches_lwd", 4) %>%
ladderize
# rotate_DendSer(ser_weight = dist(t(x)))
heatmap.2(x, Rowv = Rowv, Colv = Colv)
## ----message=FALSE, eval = FALSE----------------------------------------------
#
# # library(NMF)
# #
# # x <- as.matrix(datasets::mtcars)
# #
# # # now let's spice up the dendrograms a bit:
# # Rowv <- x %>% dist %>% hclust %>% as.dendrogram %>%
# # set("branches_k_color", k = 3) %>% set("branches_lwd", 4) %>%
# # ladderize
# # # rotate_DendSer(ser_weight = dist(x))
# # Colv <- x %>% t %>% dist %>% hclust %>% as.dendrogram %>%
# # set("branches_k_color", k = 2) %>% set("branches_lwd", 4) %>%
# # ladderize
# # # rotate_DendSer(ser_weight = dist(t(x)))
# #
# # aheatmap(x, Rowv = Rowv, Colv = Colv)
#
#
#
## -----------------------------------------------------------------------------
x <- as.matrix(datasets::mtcars)
# heatmaply(x)
# now let's spice up the dendrograms a bit:
Rowv <- x %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k = 3) %>% set("branches_lwd", 4) %>%
ladderize
# rotate_DendSer(ser_weight = dist(x))
Colv <- x %>% t %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k = 2) %>% set("branches_lwd", 4) %>%
ladderize
# rotate_DendSer(ser_weight = dist(t(x)))
## ----message=FALSE, cache = FALSE, eval = FALSE-------------------------------
# library(heatmaply)
# heatmaply(x, Rowv = Rowv, Colv = Colv)
## -----------------------------------------------------------------------------
# let's get the clusters
library(dynamicTreeCut)
data(iris)
x <- iris[,-5] %>% as.matrix
hc <- x %>% dist %>% hclust
dend <- hc %>% as.dendrogram
# Find special clusters:
clusters <- cutreeDynamic(hc, distM = as.matrix(dist(x)), method = "tree")
# we need to sort them to the order of the dendrogram:
clusters <- clusters[order.dendrogram(dend)]
clusters_numbers <- unique(clusters) - (0 %in% clusters)
n_clusters <- length(clusters_numbers)
library(colorspace)
cols <- rainbow_hcl(n_clusters)
true_species_cols <- rainbow_hcl(3)[as.numeric(iris[,][order.dendrogram(dend),5])]
dend2 <- dend %>%
branches_attr_by_clusters(clusters, values = cols) %>%
color_labels(col = true_species_cols)
plot(dend2)
clusters <- factor(clusters)
levels(clusters)[-1] <- cols[-5][c(1,4,2,3)]
# Get the clusters to have proper colors.
# fix the order of the colors to match the branches.
colored_bars(clusters, dend, sort_by_labels_order = FALSE)
# here we used sort_by_labels_order = FALSE since the clusters were already sorted based on the dendrogram's order
## ----message=FALSE, fig.width=9, results='hide'-------------------------------
par(mfrow = c(1,2))
library(pvclust)
data(lung) # 916 genes for 73 subjects
set.seed(13134)
result <- pvclust(lung[1:100, 1:10],
method.dist="cor", method.hclust="average", nboot=10)
# with pvrect
plot(result)
pvrect(result)
# with a dendrogram of pvrect
dend <- as.dendrogram(result)
result %>% as.dendrogram %>%
plot(main = "Cluster dendrogram with AU/BP values (%)\n reproduced plot with dendrogram")
result %>% text
result %>% pvrect
## ----fig.height=8, fig.width=8------------------------------------------------
par(mfrow = c(2,2))
# with a modified dendrogram of pvrect
dend %>% pvclust_show_signif(result) %>%
plot(main = "Cluster dendrogram \n bp values are highlighted by signif")
dend %>% pvclust_show_signif(result, show_type = "lwd") %>%
plot(main = "Cluster dendrogram with AU/BP values (%)\n bp values are highlighted by signif")
result %>% text
result %>% pvrect(alpha=0.95)
dend %>% pvclust_show_signif_gradient(result) %>%
plot(main = "Cluster dendrogram with AU/BP values (%)\n bp values are colored by signif")
dend %>%
pvclust_show_signif_gradient(result) %>%
pvclust_show_signif(result) %>%
plot(main = "Cluster dendrogram with AU/BP values (%)\n bp values are colored+highlighted by signif")
result %>% text
result %>% pvrect(alpha=0.95)
## -----------------------------------------------------------------------------
library(circlize)
dend <- iris[1:40,-5] %>% dist %>% hclust %>% as.dendrogram %>%
set("branches_k_color", k=3) %>% set("branches_lwd", c(5,2,1.5)) %>%
set("branches_lty", c(1,1,3,1,1,2)) %>%
set("labels_colors") %>% set("labels_cex", c(.6,1.5)) %>%
set("nodes_pch", 19) %>% set("nodes_col", c("orange", "black", "plum", NA))
par(mar = rep(0,4))
circlize_dendrogram(dend)
# circlize_dendrogram(dend, labels = FALSE)
# circlize_dendrogram(dend, facing = "inside", labels = FALSE)
## -----------------------------------------------------------------------------
# dend <- iris[1:40,-5] %>% dist %>% hclust %>% as.dendrogram %>%
# set("branches_k_color", k=3) %>% set("branches_lwd", c(5,2,1.5)) %>%
# set("branches_lty", c(1,1,3,1,1,2)) %>%
# set("labels_colors") %>% set("labels_cex", c(.9,1.2)) %>%
# set("nodes_pch", 19) %>% set("nodes_col", c("orange", "black", "plum", NA))
set.seed(2015-07-10)
# In the following we get the dendrogram but can also get extra information on top of it
circos.initialize("foo", xlim = c(0, 40))
circos.track(ylim = c(0, 1), panel.fun = function(x, y) {
circos.rect(1:40-0.8, rep(0, 40), 1:40-0.2, runif(40), col = rand_color(40), border = NA)
}, bg.border = NA)
circos.track(ylim = c(0, 1), panel.fun = function(x, y) {
circos.text(1:40-0.5, rep(0, 40), labels(dend), col = labels_colors(dend),
facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
}, bg.border = NA, track.height = 0.1)
max_height = attr(dend, "height")
circos.track(ylim = c(0, max_height), panel.fun = function(x, y) {
circos.dendrogram(dend, max_height = max_height)
}, track.height = 0.5, bg.border = NA)
circos.clear()
## -----------------------------------------------------------------------------
dend15 <- c(1:5) %>% dist %>% hclust(method = "average") %>% as.dendrogram
dend15 <- dend15 %>% set("labels_to_char")
dend51 <- dend15 %>% set("labels", as.character(5:1)) %>% match_order_by_labels(dend15)
dends_15_51 <- dendlist(dend15, dend51)
dends_15_51
head(dends_15_51)
## -----------------------------------------------------------------------------
# example 1
x <- 1:5 %>% dist %>% hclust %>% as.dendrogram
y <- set(x, "labels", 5:1)
# example 2
dend1 <- 1:10 %>% dist %>% hclust %>% as.dendrogram
dend2 <- dend1 %>% set("labels", c(1,3,2,4, 5:10) )
dend_diff(dend1, dend2)
## ----fig.width=5, fig.height=3------------------------------------------------
tanglegram(dends_15_51)
# Same as using:
# plot(dends_15_51) # since there is a plot method for dendlist
# and also:
# tanglegram(dend15, dend51)
## ----fig.width=5, fig.height=3------------------------------------------------
tanglegram(dends_15_51, common_subtrees_color_branches = TRUE)
## -----------------------------------------------------------------------------
dends_15_51 %>% entanglement # lower is better
# dends_15_51 %>% untangle(method = "DendSer") %>% entanglement # lower is better
dends_15_51 %>% untangle(method = "step1side") %>% entanglement # lower is better
## ----fig.width=5, fig.height=3------------------------------------------------
dends_15_51 %>% untangle(method = "step1side") %>%
tanglegram(common_subtrees_color_branches = TRUE)
## ----fig.width=5, fig.height=3------------------------------------------------
x <- dends_15_51
x %>% plot(main = paste("entanglement =", round(entanglement(x), 2)))
## ----fig.width=5, fig.height=3------------------------------------------------
# x <- dends_15_51 %>% untangle(method = "DendSer")
x <- dends_15_51 %>% untangle(method = "ladderize")
x %>% plot(main = paste("entanglement =", round(entanglement(x), 2)))
## ----fig.width=5, fig.height=3------------------------------------------------
set.seed(3958)
x <- dends_15_51 %>% untangle(method = "random", R = 10)
x %>% plot(main = paste("entanglement =", round(entanglement(x), 2)))
## ----fig.width=5, fig.height=3------------------------------------------------
x <- dends_15_51 %>% untangle(method = "step2side")
x %>% plot(main = paste("entanglement =", round(entanglement(x), 2)))
## -----------------------------------------------------------------------------
set.seed(23235)
ss <- sample(1:150, 10 )
dend1 <- iris[ss,-5] %>% dist %>% hclust("com") %>% as.dendrogram
dend2 <- iris[ss,-5] %>% dist %>% hclust("single") %>% as.dendrogram
dend3 <- iris[ss,-5] %>% dist %>% hclust("ave") %>% as.dendrogram
dend4 <- iris[ss,-5] %>% dist %>% hclust("centroid") %>% as.dendrogram
dend1234 <- dendlist("Complete" = dend1, "Single" = dend2, "Average" = dend3, "Centroid" = dend4)
par(mfrow = c(2,2))
plot(dend1, main = "Complete")
plot(dend2, main = "Single")
plot(dend3, main = "Average")
plot(dend4, main = "Centroid")
## -----------------------------------------------------------------------------
all.equal(dend1, dend1)
all.equal(dend1, dend2)
all.equal(dend1, dend2, use.edge.length = FALSE)
all.equal(dend1, dend2, use.edge.length = FALSE, use.topology = FALSE)
all.equal(dend2, dend4, use.edge.length = TRUE)
all.equal(dend2, dend4, use.edge.length = FALSE)
all.equal(dendlist(dend1, dend1, dend1))
all.equal(dend1234)
all.equal(dend1234, use.edge.length = FALSE)
## -----------------------------------------------------------------------------
x <- 1:5 %>% dist %>% hclust %>% as.dendrogram
y <- set(x, "labels", 5:1)
dist.dendlist(dendlist(x1 = x,x2 = x,y1 = y))
dend_diff(x,y)
dist.dendlist(dend1234)
## -----------------------------------------------------------------------------
cor.dendlist(dend1234)
## -----------------------------------------------------------------------------
library(corrplot)
corrplot(cor.dendlist(dend1234), "pie", "lower")
## ----fig.width=5, fig.height=3------------------------------------------------
# same subtrees, so there is no need to color the branches
dend1234 %>% tanglegram(which = c(2,3))
# Here the branches colors are very helpful:
dend1234 %>% tanglegram(which = c(1,2),
common_subtrees_color_branches = TRUE)
## -----------------------------------------------------------------------------
cor_bakers_gamma(dend15, dend51)
## -----------------------------------------------------------------------------
cor_bakers_gamma(dend15, dend15)
## -----------------------------------------------------------------------------
set.seed(23235)
the_cor <- cor_bakers_gamma(dend15, dend15)
the_cor2 <- cor_bakers_gamma(dend15, dend51)
the_cor
the_cor2
R <- 100
cor_bakers_gamma_results <- numeric(R)
dend_mixed <- dend15
for(i in 1:R) {
dend_mixed <- sample.dendrogram(dend_mixed, replace = FALSE)
cor_bakers_gamma_results[i] <- cor_bakers_gamma(dend15, dend_mixed)
}
plot(density(cor_bakers_gamma_results),
main = "Baker's gamma distribution under H0",
xlim = c(-1,1))
abline(v = 0, lty = 2)
abline(v = the_cor, lty = 2, col = 2)
abline(v = the_cor2, lty = 2, col = 4)
legend("topleft", legend = c("cor", "cor2"), fill = c(2,4))
round(sum(the_cor2 < cor_bakers_gamma_results)/ R, 4)
title(sub = paste("One sided p-value:",
"cor =", round(sum(the_cor < cor_bakers_gamma_results)/ R, 4),
" ; cor2 =", round(sum(the_cor2 < cor_bakers_gamma_results)/ R, 4)
))
## ----warning=FALSE------------------------------------------------------------
dend1 <- dend15
dend2 <- dend51
set.seed(23801)
R <- 100
dend1_labels <- labels(dend1)
dend2_labels <- labels(dend2)
cor_bakers_gamma_results <- numeric(R)
for(i in 1:R) {
sampled_labels <- sample(dend1_labels, replace = TRUE)
# members needs to be fixed since it will be later used in nleaves
dend_mixed1 <- sample.dendrogram(dend1,
dend_labels=dend1_labels,
fix_members=TRUE,fix_order=TRUE,fix_midpoint=FALSE,
replace = TRUE, sampled_labels=sampled_labels
)
dend_mixed2 <- sample.dendrogram(dend2, dend_labels=dend2_labels,
fix_members=TRUE,fix_order=TRUE,fix_midpoint=FALSE,
replace = TRUE, sampled_labels=sampled_labels
)
cor_bakers_gamma_results[i] <- cor_bakers_gamma(dend_mixed1, dend_mixed2, warn = FALSE)
}
# here is the tanglegram
tanglegram(dend1, dend2)
# And here is the tanglegram for one sample of our trees:
dend_mixed1 <- rank_order.dendrogram(dend_mixed1)
dend_mixed2 <- rank_order.dendrogram(dend_mixed2)
dend_mixed1 <- fix_members_attr.dendrogram(dend_mixed1)
dend_mixed2 <- fix_members_attr.dendrogram(dend_mixed2)
tanglegram(dend_mixed1, dend_mixed2)
cor_bakers_gamma(dend_mixed1, dend_mixed2, warn = FALSE)
CI95 <- quantile(cor_bakers_gamma_results, probs=c(.025,.975))
CI95
par(mfrow = c(1,1))
plot(density(cor_bakers_gamma_results),
main = "Baker's gamma bootstrap distribution",
xlim = c(-1,1))
abline(v = CI95, lty = 2, col = 3)
abline(v = cor_bakers_gamma(dend1, dend2), lty = 2, col = 2)
legend("topleft", legend =c("95% CI", "Baker's Gamma Index"), fill = c(3,2))
## -----------------------------------------------------------------------------
cor_cophenetic(dend15, dend51)
## -----------------------------------------------------------------------------
hc1 <- hclust(dist(iris[,-5]), "com")
hc2 <- hclust(dist(iris[,-5]), "single")
# FM index of a cluster with himself is 1:
FM_index(cutree(hc1, k=3), cutree(hc1, k=3))
# FM index of two clusterings:
FM_index(cutree(hc1, k=3), cutree(hc2, k=3))
# we got a value far above the expected under H0
# Using the R code:
FM_index_R(cutree(hc1, k=3), cutree(hc2, k=3))
## -----------------------------------------------------------------------------
FM_index(cutree(hc1, k=3), cutree(hc2, k=3))
## -----------------------------------------------------------------------------
0.4462 + 1.645 * sqrt(6.464092e-05)
## -----------------------------------------------------------------------------
set.seed(23235)
ss <- TRUE # sample(1:150, 30 ) # TRUE #
hc1 <- hclust(dist(iris[ss,-5]), "com")
hc2 <- hclust(dist(iris[ss,-5]), "single")
dend1 <- as.dendrogram(hc1)
dend2 <- as.dendrogram(hc2)
# cutree(tree1)
# It works the same for hclust and dendrograms:
Bk(hc1, hc2, k = 3)
Bk(dend1, dend2, k = 3)
## ----warning=FALSE------------------------------------------------------------
Bk_plot(hc1, hc2, main = "WRONG Bk plot \n(due to the way cutree works with ties in hclust)", warn = FALSE)
Bk_plot(dend1, dend2, main = "CORRECT Bk plot \n(based on dendrograms)")
## ----cache=FALSE--------------------------------------------------------------
sessionInfo()