1983 lines
2.3 MiB
HTML
1983 lines
2.3 MiB
HTML
|
<!DOCTYPE html>
|
|||
|
|
|||
|
<html>
|
|||
|
|
|||
|
<head>
|
|||
|
|
|||
|
<meta charset="utf-8" />
|
|||
|
<meta name="generator" content="pandoc" />
|
|||
|
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
|
|||
|
|
|||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|||
|
|
|||
|
<meta name="author" content="Tal Galili" />
|
|||
|
|
|||
|
<meta name="date" content="2024-11-15" />
|
|||
|
|
|||
|
<title>Introduction to dendextend</title>
|
|||
|
|
|||
|
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
|
|||
|
// be compatible with the behavior of Pandoc < 2.8).
|
|||
|
document.addEventListener('DOMContentLoaded', function(e) {
|
|||
|
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
|
|||
|
var i, h, a;
|
|||
|
for (i = 0; i < hs.length; i++) {
|
|||
|
h = hs[i];
|
|||
|
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
|
|||
|
a = h.attributes;
|
|||
|
while (a.length > 0) h.removeAttribute(a[0].name);
|
|||
|
}
|
|||
|
});
|
|||
|
</script>
|
|||
|
|
|||
|
<style type="text/css">
|
|||
|
code{white-space: pre-wrap;}
|
|||
|
span.smallcaps{font-variant: small-caps;}
|
|||
|
span.underline{text-decoration: underline;}
|
|||
|
div.column{display: inline-block; vertical-align: top; width: 50%;}
|
|||
|
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
|||
|
ul.task-list{list-style: none;}
|
|||
|
</style>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style type="text/css">
|
|||
|
code {
|
|||
|
white-space: pre;
|
|||
|
}
|
|||
|
.sourceCode {
|
|||
|
overflow: visible;
|
|||
|
}
|
|||
|
</style>
|
|||
|
<style type="text/css" data-origin="pandoc">
|
|||
|
pre > code.sourceCode { white-space: pre; position: relative; }
|
|||
|
pre > code.sourceCode > span { line-height: 1.25; }
|
|||
|
pre > code.sourceCode > span:empty { height: 1.2em; }
|
|||
|
.sourceCode { overflow: visible; }
|
|||
|
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
|||
|
div.sourceCode { margin: 1em 0; }
|
|||
|
pre.sourceCode { margin: 0; }
|
|||
|
@media screen {
|
|||
|
div.sourceCode { overflow: auto; }
|
|||
|
}
|
|||
|
@media print {
|
|||
|
pre > code.sourceCode { white-space: pre-wrap; }
|
|||
|
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
|
|||
|
}
|
|||
|
pre.numberSource code
|
|||
|
{ counter-reset: source-line 0; }
|
|||
|
pre.numberSource code > span
|
|||
|
{ position: relative; left: -4em; counter-increment: source-line; }
|
|||
|
pre.numberSource code > span > a:first-child::before
|
|||
|
{ content: counter(source-line);
|
|||
|
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
|||
|
border: none; display: inline-block;
|
|||
|
-webkit-touch-callout: none; -webkit-user-select: none;
|
|||
|
-khtml-user-select: none; -moz-user-select: none;
|
|||
|
-ms-user-select: none; user-select: none;
|
|||
|
padding: 0 4px; width: 4em;
|
|||
|
color: #aaaaaa;
|
|||
|
}
|
|||
|
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
|
|||
|
div.sourceCode
|
|||
|
{ }
|
|||
|
@media screen {
|
|||
|
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
|||
|
}
|
|||
|
code span.al { color: #ff0000; font-weight: bold; }
|
|||
|
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.at { color: #7d9029; }
|
|||
|
code span.bn { color: #40a070; }
|
|||
|
code span.bu { color: #008000; }
|
|||
|
code span.cf { color: #007020; font-weight: bold; }
|
|||
|
code span.ch { color: #4070a0; }
|
|||
|
code span.cn { color: #880000; }
|
|||
|
code span.co { color: #60a0b0; font-style: italic; }
|
|||
|
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.do { color: #ba2121; font-style: italic; }
|
|||
|
code span.dt { color: #902000; }
|
|||
|
code span.dv { color: #40a070; }
|
|||
|
code span.er { color: #ff0000; font-weight: bold; }
|
|||
|
code span.ex { }
|
|||
|
code span.fl { color: #40a070; }
|
|||
|
code span.fu { color: #06287e; }
|
|||
|
code span.im { color: #008000; font-weight: bold; }
|
|||
|
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.kw { color: #007020; font-weight: bold; }
|
|||
|
code span.op { color: #666666; }
|
|||
|
code span.ot { color: #007020; }
|
|||
|
code span.pp { color: #bc7a00; }
|
|||
|
code span.sc { color: #4070a0; }
|
|||
|
code span.ss { color: #bb6688; }
|
|||
|
code span.st { color: #4070a0; }
|
|||
|
code span.va { color: #19177c; }
|
|||
|
code span.vs { color: #4070a0; }
|
|||
|
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
</style>
|
|||
|
<script>
|
|||
|
// apply pandoc div.sourceCode style to pre.sourceCode instead
|
|||
|
(function() {
|
|||
|
var sheets = document.styleSheets;
|
|||
|
for (var i = 0; i < sheets.length; i++) {
|
|||
|
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
|
|||
|
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
|
|||
|
var j = 0;
|
|||
|
while (j < rules.length) {
|
|||
|
var rule = rules[j];
|
|||
|
// check if there is a div.sourceCode rule
|
|||
|
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
|
|||
|
j++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
var style = rule.style.cssText;
|
|||
|
// check if color or background-color is set
|
|||
|
if (rule.style.color === '' && rule.style.backgroundColor === '') {
|
|||
|
j++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
// replace div.sourceCode by a pre.sourceCode rule
|
|||
|
sheets[i].deleteRule(j);
|
|||
|
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
|
|||
|
}
|
|||
|
}
|
|||
|
})();
|
|||
|
</script>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style type="text/css">body {
|
|||
|
background-color: #fff;
|
|||
|
margin: 1em auto;
|
|||
|
max-width: 700px;
|
|||
|
overflow: visible;
|
|||
|
padding-left: 2em;
|
|||
|
padding-right: 2em;
|
|||
|
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
|||
|
font-size: 14px;
|
|||
|
line-height: 1.35;
|
|||
|
}
|
|||
|
#TOC {
|
|||
|
clear: both;
|
|||
|
margin: 0 0 10px 10px;
|
|||
|
padding: 4px;
|
|||
|
width: 400px;
|
|||
|
border: 1px solid #CCCCCC;
|
|||
|
border-radius: 5px;
|
|||
|
background-color: #f6f6f6;
|
|||
|
font-size: 13px;
|
|||
|
line-height: 1.3;
|
|||
|
}
|
|||
|
#TOC .toctitle {
|
|||
|
font-weight: bold;
|
|||
|
font-size: 15px;
|
|||
|
margin-left: 5px;
|
|||
|
}
|
|||
|
#TOC ul {
|
|||
|
padding-left: 40px;
|
|||
|
margin-left: -1.5em;
|
|||
|
margin-top: 5px;
|
|||
|
margin-bottom: 5px;
|
|||
|
}
|
|||
|
#TOC ul ul {
|
|||
|
margin-left: -2em;
|
|||
|
}
|
|||
|
#TOC li {
|
|||
|
line-height: 16px;
|
|||
|
}
|
|||
|
table {
|
|||
|
margin: 1em auto;
|
|||
|
border-width: 1px;
|
|||
|
border-color: #DDDDDD;
|
|||
|
border-style: outset;
|
|||
|
border-collapse: collapse;
|
|||
|
}
|
|||
|
table th {
|
|||
|
border-width: 2px;
|
|||
|
padding: 5px;
|
|||
|
border-style: inset;
|
|||
|
}
|
|||
|
table td {
|
|||
|
border-width: 1px;
|
|||
|
border-style: inset;
|
|||
|
line-height: 18px;
|
|||
|
padding: 5px 5px;
|
|||
|
}
|
|||
|
table, table th, table td {
|
|||
|
border-left-style: none;
|
|||
|
border-right-style: none;
|
|||
|
}
|
|||
|
table thead, table tr.even {
|
|||
|
background-color: #f7f7f7;
|
|||
|
}
|
|||
|
p {
|
|||
|
margin: 0.5em 0;
|
|||
|
}
|
|||
|
blockquote {
|
|||
|
background-color: #f6f6f6;
|
|||
|
padding: 0.25em 0.75em;
|
|||
|
}
|
|||
|
hr {
|
|||
|
border-style: solid;
|
|||
|
border: none;
|
|||
|
border-top: 1px solid #777;
|
|||
|
margin: 28px 0;
|
|||
|
}
|
|||
|
dl {
|
|||
|
margin-left: 0;
|
|||
|
}
|
|||
|
dl dd {
|
|||
|
margin-bottom: 13px;
|
|||
|
margin-left: 13px;
|
|||
|
}
|
|||
|
dl dt {
|
|||
|
font-weight: bold;
|
|||
|
}
|
|||
|
ul {
|
|||
|
margin-top: 0;
|
|||
|
}
|
|||
|
ul li {
|
|||
|
list-style: circle outside;
|
|||
|
}
|
|||
|
ul ul {
|
|||
|
margin-bottom: 0;
|
|||
|
}
|
|||
|
pre, code {
|
|||
|
background-color: #f7f7f7;
|
|||
|
border-radius: 3px;
|
|||
|
color: #333;
|
|||
|
white-space: pre-wrap;
|
|||
|
}
|
|||
|
pre {
|
|||
|
border-radius: 3px;
|
|||
|
margin: 5px 0px 10px 0px;
|
|||
|
padding: 10px;
|
|||
|
}
|
|||
|
pre:not([class]) {
|
|||
|
background-color: #f7f7f7;
|
|||
|
}
|
|||
|
code {
|
|||
|
font-family: Consolas, Monaco, 'Courier New', monospace;
|
|||
|
font-size: 85%;
|
|||
|
}
|
|||
|
p > code, li > code {
|
|||
|
padding: 2px 0px;
|
|||
|
}
|
|||
|
div.figure {
|
|||
|
text-align: center;
|
|||
|
}
|
|||
|
img {
|
|||
|
background-color: #FFFFFF;
|
|||
|
padding: 2px;
|
|||
|
border: 1px solid #DDDDDD;
|
|||
|
border-radius: 3px;
|
|||
|
border: 1px solid #CCCCCC;
|
|||
|
margin: 0 5px;
|
|||
|
}
|
|||
|
h1 {
|
|||
|
margin-top: 0;
|
|||
|
font-size: 35px;
|
|||
|
line-height: 40px;
|
|||
|
}
|
|||
|
h2 {
|
|||
|
border-bottom: 4px solid #f7f7f7;
|
|||
|
padding-top: 10px;
|
|||
|
padding-bottom: 2px;
|
|||
|
font-size: 145%;
|
|||
|
}
|
|||
|
h3 {
|
|||
|
border-bottom: 2px solid #f7f7f7;
|
|||
|
padding-top: 10px;
|
|||
|
font-size: 120%;
|
|||
|
}
|
|||
|
h4 {
|
|||
|
border-bottom: 1px solid #f7f7f7;
|
|||
|
margin-left: 8px;
|
|||
|
font-size: 105%;
|
|||
|
}
|
|||
|
h5, h6 {
|
|||
|
border-bottom: 1px solid #ccc;
|
|||
|
font-size: 105%;
|
|||
|
}
|
|||
|
a {
|
|||
|
color: #0033dd;
|
|||
|
text-decoration: none;
|
|||
|
}
|
|||
|
a:hover {
|
|||
|
color: #6666ff; }
|
|||
|
a:visited {
|
|||
|
color: #800080; }
|
|||
|
a:visited:hover {
|
|||
|
color: #BB00BB; }
|
|||
|
a[href^="http:"] {
|
|||
|
text-decoration: underline; }
|
|||
|
a[href^="https:"] {
|
|||
|
text-decoration: underline; }
|
|||
|
|
|||
|
code > span.kw { color: #555; font-weight: bold; }
|
|||
|
code > span.dt { color: #902000; }
|
|||
|
code > span.dv { color: #40a070; }
|
|||
|
code > span.bn { color: #d14; }
|
|||
|
code > span.fl { color: #d14; }
|
|||
|
code > span.ch { color: #d14; }
|
|||
|
code > span.st { color: #d14; }
|
|||
|
code > span.co { color: #888888; font-style: italic; }
|
|||
|
code > span.ot { color: #007020; }
|
|||
|
code > span.al { color: #ff0000; font-weight: bold; }
|
|||
|
code > span.fu { color: #900; font-weight: bold; }
|
|||
|
code > span.er { color: #a61717; background-color: #e3d2d2; }
|
|||
|
</style>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
</head>
|
|||
|
|
|||
|
<body>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<h1 class="title toc-ignore">Introduction to dendextend</h1>
|
|||
|
<h4 class="author">Tal Galili</h4>
|
|||
|
<h4 class="date">2024-11-15</h4>
|
|||
|
|
|||
|
|
|||
|
<div id="TOC">
|
|||
|
<ul>
|
|||
|
<li><a href="#introduction" id="toc-introduction">Introduction</a></li>
|
|||
|
<li><a href="#prerequisites" id="toc-prerequisites">Prerequisites</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#acknowledgement" id="toc-acknowledgement">Acknowledgement</a></li>
|
|||
|
<li><a href="#chaining" id="toc-chaining">Chaining</a></li>
|
|||
|
<li><a href="#a-dendrogram-is-a-nested-list-of-lists-with-attributes" id="toc-a-dendrogram-is-a-nested-list-of-lists-with-attributes">A
|
|||
|
dendrogram is a nested list of lists with attributes</a></li>
|
|||
|
<li><a href="#installation" id="toc-installation">Installation</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#how-to-explore-a-dendrograms-parameters" id="toc-how-to-explore-a-dendrograms-parameters">How to explore a
|
|||
|
dendrogram’s parameters</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#taking-a-first-look-at-a-dendrogram" id="toc-taking-a-first-look-at-a-dendrogram">Taking a first look at a
|
|||
|
dendrogram</a></li>
|
|||
|
<li><a href="#getting-nodes-attributes-in-a-depth-first-search" id="toc-getting-nodes-attributes-in-a-depth-first-search">Getting nodes
|
|||
|
attributes in a depth-first search</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#how-to-change-a-dendrogram" id="toc-how-to-change-a-dendrogram">How to change a dendrogram</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#the-set-function" id="toc-the-set-function">The “set”
|
|||
|
function</a></li>
|
|||
|
<li><a href="#two-simple-trees-to-play-with" id="toc-two-simple-trees-to-play-with">Two simple trees to play
|
|||
|
with</a></li>
|
|||
|
<li><a href="#setting-a-dendrograms-labels" id="toc-setting-a-dendrograms-labels">Setting a dendrogram’s
|
|||
|
labels</a></li>
|
|||
|
<li><a href="#setting-a-dendrograms-nodesleaves-points" id="toc-setting-a-dendrograms-nodesleaves-points">Setting a dendrogram’s
|
|||
|
nodes/leaves (points)</a></li>
|
|||
|
<li><a href="#setting-a-dendrograms-branches" id="toc-setting-a-dendrograms-branches">Setting a dendrogram’s
|
|||
|
branches</a></li>
|
|||
|
<li><a href="#changing-a-dendrograms-structure" id="toc-changing-a-dendrograms-structure">Changing a dendrogram’s
|
|||
|
structure</a></li>
|
|||
|
<li><a href="#adding-extra-bars-and-rectangles" id="toc-adding-extra-bars-and-rectangles">Adding extra bars and
|
|||
|
rectangles</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#ggplot2-integration" id="toc-ggplot2-integration">ggplot2
|
|||
|
integration</a></li>
|
|||
|
<li><a href="#enhancing-other-packages" id="toc-enhancing-other-packages">Enhancing other packages</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#dendser" id="toc-dendser">DendSer</a></li>
|
|||
|
<li><a href="#gplots" id="toc-gplots">gplots</a></li>
|
|||
|
<li><a href="#nmf" id="toc-nmf">NMF</a></li>
|
|||
|
<li><a href="#heatmaply" id="toc-heatmaply">heatmaply</a></li>
|
|||
|
<li><a href="#dynamictreecut" id="toc-dynamictreecut">dynamicTreeCut</a></li>
|
|||
|
<li><a href="#pvclust" id="toc-pvclust">pvclust</a></li>
|
|||
|
<li><a href="#circlize" id="toc-circlize">circlize</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#comparing-two-dendrograms" id="toc-comparing-two-dendrograms">Comparing two dendrograms</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#dendlist" id="toc-dendlist">dendlist</a></li>
|
|||
|
<li><a href="#dend_diff" id="toc-dend_diff">dend_diff</a></li>
|
|||
|
<li><a href="#tanglegram" id="toc-tanglegram">tanglegram</a></li>
|
|||
|
<li><a href="#correlation-measures" id="toc-correlation-measures">Correlation measures</a></li>
|
|||
|
<li><a href="#the-fowlkes-mallows-index-and-the-bk-plot" id="toc-the-fowlkes-mallows-index-and-the-bk-plot">The Fowlkes-Mallows
|
|||
|
Index and the Bk plot</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#session-info" id="toc-session-info">Session info</a></li>
|
|||
|
</ul>
|
|||
|
</div>
|
|||
|
|
|||
|
<!--
|
|||
|
%\VignetteEngine{knitr::rmarkdown}
|
|||
|
%\VignetteIndexEntry{Introduction to dendextend}
|
|||
|
-->
|
|||
|
<p><strong>Author</strong>: Tal Galili (homepage: r-statistics.com,
|
|||
|
e-mail: <a href="mailto:Tal.Galili@gmail.com" class="email">Tal.Galili@gmail.com</a> )</p>
|
|||
|
<p><strong>tl;dr</strong>: the <a href="https://cran.r-project.org/package=dendextend"><em>dendextend
|
|||
|
package</em></a> let’s you create figures like this:</p>
|
|||
|
<p><img role="img" src="
|
|||
|
<div id="introduction" class="section level2">
|
|||
|
<h2>Introduction</h2>
|
|||
|
<p>The <a href="https://cran.r-project.org/package=dendextend"><strong><em>dendextend
|
|||
|
package</em></strong></a> offers a set of functions for extending
|
|||
|
dendrogram objects in R, letting you <strong>visualize</strong> and
|
|||
|
<strong>compare</strong> trees of hierarchical clusterings, you can:</p>
|
|||
|
<ul>
|
|||
|
<li><strong>Adjust a tree’s graphical parameters</strong> - the color,
|
|||
|
size, type, etc of its branches, nodes and labels.</li>
|
|||
|
<li>Visually and statistically <strong>compare different
|
|||
|
dendrograms</strong> to one another.</li>
|
|||
|
</ul>
|
|||
|
<p>The goal of this document is to introduce you to the basic functions
|
|||
|
that dendextend provides, and show how they may be applied. We will make
|
|||
|
extensive use of “chaining” (explained next).</p>
|
|||
|
</div>
|
|||
|
<div id="prerequisites" class="section level2">
|
|||
|
<h2>Prerequisites</h2>
|
|||
|
<div id="acknowledgement" class="section level3">
|
|||
|
<h3>Acknowledgement</h3>
|
|||
|
<p>This package was made possible by the the support of my thesis
|
|||
|
adviser <a href="http://www.math.tau.ac.il/~ybenja/">Yoav Benjamini</a>,
|
|||
|
as well as code contributions from many R users. They are:</p>
|
|||
|
<pre><code>#> [1] "Tal Galili <tal.galili@gmail.com> [aut, cre, cph] (https://www.r-statistics.com)"
|
|||
|
#> [2] "Gavin Simpson [ctb]"
|
|||
|
#> [3] "Gregory Jefferis <jefferis@gmail.com> [ctb] (imported code from his dendroextras package)"
|
|||
|
#> [4] "Marco Gallotta [ctb] (a.k.a: marcog)"
|
|||
|
#> [5] "Johan Renaudie [ctb] (https://github.com/plannapus)"
|
|||
|
#> [6] "R core team [ctb] (Thanks for the Infastructure, and code in the examples)"
|
|||
|
#> [7] "Kurt Hornik [ctb]"
|
|||
|
#> [8] "Uwe Ligges [ctb]"
|
|||
|
#> [9] "Andrej-Nikolai Spiess [ctb]"
|
|||
|
#> [10] "Steve Horvath <SHorvath@mednet.ucla.edu> [ctb]"
|
|||
|
#> [11] "Peter Langfelder <Peter.Langfelder@gmail.com> [ctb]"
|
|||
|
#> [12] "skullkey [ctb]"
|
|||
|
#> [13] "Mark Van Der Loo <mark.vanderloo@gmail.com> [ctb] (https://github.com/markvanderloo d3dendrogram)"
|
|||
|
#> [14] "Yoav Benjamini [ths]"</code></pre>
|
|||
|
<p>The <strong>design</strong> of the dendextend package (and this
|
|||
|
manual!) is heavily inspired by <a href="https://hadley.nz/">Hadley
|
|||
|
Wickham’s</a> work. Especially his text on <a href="https://r-pkgs.org/">writing an R package</a>, the <a href="https://cran.r-project.org/package=devtools">devtools package</a>,
|
|||
|
and the dplyr package (specifically the use of chaining, and the <a href="https://CRAN.R-project.org/package=dplyr/vignettes/dplyr.html">Introduction
|
|||
|
text to dplyr</a>).</p>
|
|||
|
</div>
|
|||
|
<div id="chaining" class="section level3">
|
|||
|
<h3>Chaining</h3>
|
|||
|
<p>Function calls in dendextend often get a dendrogram and returns a
|
|||
|
(modified) dendrogram. This doesn’t lead to particularly elegant code if
|
|||
|
you want to do many operations at once. The same is true even in the
|
|||
|
first stage of creating a dendrogram.</p>
|
|||
|
<p>In order to construct a dendrogram, you will (often) need to go
|
|||
|
through several steps. You can either do so while keeping the
|
|||
|
intermediate results:</p>
|
|||
|
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>d1 <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="co"># some data</span></span>
|
|||
|
<span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a>d2 <span class="ot"><-</span> <span class="fu">dist</span>(d1)</span>
|
|||
|
<span id="cb2-3"><a href="#cb2-3" tabindex="-1"></a>d3 <span class="ot"><-</span> <span class="fu">hclust</span>(d2, <span class="at">method =</span> <span class="st">"average"</span>)</span>
|
|||
|
<span id="cb2-4"><a href="#cb2-4" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(d3)</span></code></pre></div>
|
|||
|
<p>Or, you can also wrap the function calls inside each other:</p>
|
|||
|
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(<span class="fu">hclust</span>(<span class="fu">dist</span>(<span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>)), <span class="at">method =</span> <span class="st">"average"</span>))</span></code></pre></div>
|
|||
|
<p>However, both solutions are not ideal: the first solution includes
|
|||
|
redundant intermediate objects, while the second is difficult to read
|
|||
|
(since the order of the operations is from inside to out, while the
|
|||
|
arguments are a long way away from the function).</p>
|
|||
|
<p>To get around this problem, dendextend encourages the use of the
|
|||
|
<code>%>%</code> (“pipe” or “chaining”) operator (imported from the
|
|||
|
magrittr package). This turns <code>x %>% f(y)</code> into
|
|||
|
<code>f(x, y)</code> so you can use it to rewrite (“chain”) multiple
|
|||
|
operations such that they can be read from left-to-right,
|
|||
|
top-to-bottom.</p>
|
|||
|
<p>For example, the following will be written as it would be
|
|||
|
explained:</p>
|
|||
|
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="sc">%>%</span> <span class="co"># take the a vector from 1 to 5</span></span>
|
|||
|
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a> dist <span class="sc">%>%</span> <span class="co"># calculate a distance matrix, </span></span>
|
|||
|
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"average"</span>) <span class="sc">%>%</span> <span class="co"># on it compute hierarchical clustering using the "average" method, </span></span>
|
|||
|
<span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a> as.dendrogram <span class="co"># and lastly, turn that object into a dendrogram.</span></span></code></pre></div>
|
|||
|
<p>For more details, you may look at:</p>
|
|||
|
<ul>
|
|||
|
<li><a href="https://cran.r-project.org/package=magrittr">magrittr on
|
|||
|
CRAN</a></li>
|
|||
|
<li><a href="https://CRAN.R-project.org/package=magrittr/vignettes/magrittr.html">Introduction
|
|||
|
to the magrittr package</a></li>
|
|||
|
<li><a href="https://www.r-statistics.com/2014/08/simpler-r-coding-with-pipes-the-present-and-future-of-the-magrittr-package/">Simpler
|
|||
|
R coding with pipes > the present and future of the magrittr
|
|||
|
package</a></li>
|
|||
|
</ul>
|
|||
|
</div>
|
|||
|
<div id="a-dendrogram-is-a-nested-list-of-lists-with-attributes" class="section level3">
|
|||
|
<h3>A dendrogram is a nested list of lists with attributes</h3>
|
|||
|
<p>The first step is working with dendrograms, is to understand that
|
|||
|
they are just a <strong>nested list of lists with attributes</strong>.
|
|||
|
Let us explore this for the following (tiny) tree:</p>
|
|||
|
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a><span class="co"># Create a dend:</span></span>
|
|||
|
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">2</span> <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a><span class="co"># and plot it:</span></span>
|
|||
|
<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a>dend <span class="sc">%>%</span> plot</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>And here is its structure (a nested list of lists with
|
|||
|
attributes):</p>
|
|||
|
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a>dend <span class="sc">%>%</span> unclass <span class="sc">%>%</span> str</span></code></pre></div>
|
|||
|
<pre><code>#> List of 2
|
|||
|
#> $ : int 1
|
|||
|
#> ..- attr(*, "label")= int 1
|
|||
|
#> ..- attr(*, "members")= int 1
|
|||
|
#> ..- attr(*, "height")= num 0
|
|||
|
#> ..- attr(*, "leaf")= logi TRUE
|
|||
|
#> $ : int 2
|
|||
|
#> ..- attr(*, "label")= int 2
|
|||
|
#> ..- attr(*, "members")= int 1
|
|||
|
#> ..- attr(*, "height")= num 0
|
|||
|
#> ..- attr(*, "leaf")= logi TRUE
|
|||
|
#> - attr(*, "members")= int 2
|
|||
|
#> - attr(*, "midpoint")= num 0.5
|
|||
|
#> - attr(*, "height")= num 1</code></pre>
|
|||
|
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a>dend <span class="sc">%>%</span> class</span></code></pre></div>
|
|||
|
<pre><code>#> [1] "dendrogram"</code></pre>
|
|||
|
</div>
|
|||
|
<div id="installation" class="section level3">
|
|||
|
<h3>Installation</h3>
|
|||
|
<p>To install the stable version on CRAN use:</p>
|
|||
|
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">'dendextend'</span>)</span></code></pre></div>
|
|||
|
<p>To install the GitHub version:</p>
|
|||
|
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a>require2 <span class="ot"><-</span> <span class="cf">function</span> (package, ...) {</span>
|
|||
|
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a> <span class="cf">if</span> (<span class="sc">!</span><span class="fu">require</span>(package)) <span class="fu">install.packages</span>(package); <span class="fu">library</span>(package)</span>
|
|||
|
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a>}</span>
|
|||
|
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a><span class="do">## require2('installr')</span></span>
|
|||
|
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="do">## install.Rtools() # run this if you are using Windows and don't have Rtools installed</span></span>
|
|||
|
<span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a></span>
|
|||
|
<span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="co"># Load devtools:</span></span>
|
|||
|
<span id="cb11-9"><a href="#cb11-9" tabindex="-1"></a><span class="fu">require2</span>(<span class="st">"devtools"</span>)</span>
|
|||
|
<span id="cb11-10"><a href="#cb11-10" tabindex="-1"></a>devtools<span class="sc">::</span><span class="fu">install_github</span>(<span class="st">'talgalili/dendextend'</span>)</span>
|
|||
|
<span id="cb11-11"><a href="#cb11-11" tabindex="-1"></a><span class="sc"><!--</span> <span class="fu">require2</span>(<span class="st">"Rcpp"</span>) <span class="sc">-</span><span class="ot">-></span></span>
|
|||
|
<span id="cb11-12"><a href="#cb11-12" tabindex="-1"></a></span>
|
|||
|
<span id="cb11-13"><a href="#cb11-13" tabindex="-1"></a><span class="co"># Having colorspace is also useful, since it is used</span></span>
|
|||
|
<span id="cb11-14"><a href="#cb11-14" tabindex="-1"></a><span class="co"># In various examples in the vignettes</span></span>
|
|||
|
<span id="cb11-15"><a href="#cb11-15" tabindex="-1"></a><span class="fu">require2</span>(<span class="st">"colorspace"</span>)</span></code></pre></div>
|
|||
|
<p>And then you may load the package using:</p>
|
|||
|
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a><span class="fu">library</span>(dendextend)</span></code></pre></div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="how-to-explore-a-dendrograms-parameters" class="section level2">
|
|||
|
<h2>How to explore a dendrogram’s parameters</h2>
|
|||
|
<div id="taking-a-first-look-at-a-dendrogram" class="section level3">
|
|||
|
<h3>Taking a first look at a dendrogram</h3>
|
|||
|
<p>For the following simple tree:</p>
|
|||
|
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="co"># Create a dend:</span></span>
|
|||
|
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a><span class="co"># Plot it:</span></span>
|
|||
|
<span id="cb13-4"><a href="#cb13-4" tabindex="-1"></a>dend <span class="sc">%>%</span> plot</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Here are some basic parameters we can get:</p>
|
|||
|
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>dend <span class="sc">%>%</span> labels <span class="co"># get the labels of the tree</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1 2 5 3 4</code></pre>
|
|||
|
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a>dend <span class="sc">%>%</span> nleaves <span class="co"># get the number of leaves of the tree</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 5</code></pre>
|
|||
|
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a>dend <span class="sc">%>%</span> nnodes <span class="co"># get the number of nodes in the tree (including leaves)</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 9</code></pre>
|
|||
|
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a>dend <span class="sc">%>%</span> head <span class="co"># A combination of "str" with "head"</span></span></code></pre></div>
|
|||
|
<pre><code>#> --[dendrogram w/ 2 branches and 5 members at h = 4]
|
|||
|
#> |--[dendrogram w/ 2 branches and 2 members at h = 1]
|
|||
|
#> | |--leaf 1
|
|||
|
#> | `--leaf 2
|
|||
|
#> `--[dendrogram w/ 2 branches and 3 members at h = 2]
|
|||
|
#> |--leaf 5
|
|||
|
#> `--[dendrogram w/ 2 branches and 2 members at h = 1]
|
|||
|
#> |--leaf 3
|
|||
|
#> `--leaf 4
|
|||
|
#> etc...</code></pre>
|
|||
|
<p>Next let us look at more sophisticated outputs.</p>
|
|||
|
</div>
|
|||
|
<div id="getting-nodes-attributes-in-a-depth-first-search" class="section level3">
|
|||
|
<h3>Getting nodes attributes in a depth-first search</h3>
|
|||
|
<p>When extracting (or inserting) attributes from a dendrogram’s nodes,
|
|||
|
it is often in a “depth-first search”. <a href="https://en.wikipedia.org/wiki/Depth-first_search">Depth-first
|
|||
|
search</a> is when an algorithm for traversing or searching tree or
|
|||
|
graph data structures. One starts at the root and explores as far as
|
|||
|
possible along each branch before backtracking.</p>
|
|||
|
<p>Here is a plot of a tree, illustrating the order in which you should
|
|||
|
read the “nodes attributes”:</p>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>We can get several nodes attributes using <code>get_nodes_attr</code>
|
|||
|
(notice the order corresponds with what is shown in the above
|
|||
|
figure):</p>
|
|||
|
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" tabindex="-1"></a><span class="co"># Create a dend:</span></span>
|
|||
|
<span id="cb22-2"><a href="#cb22-2" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb22-3"><a href="#cb22-3" tabindex="-1"></a><span class="co"># Get various attributes</span></span>
|
|||
|
<span id="cb22-4"><a href="#cb22-4" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"height"</span>) <span class="co"># node's height</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 4 1 0 0 2 0 1 0 0</code></pre>
|
|||
|
<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" tabindex="-1"></a>dend <span class="sc">%>%</span> hang.dendrogram <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"height"</span>) <span class="co"># node's height (after raising the leaves)</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 4.0 1.0 0.6 0.6 2.0 1.6 1.0 0.6 0.6</code></pre>
|
|||
|
<div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"members"</span>) <span class="co"># number of members (leaves) under that node</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 5 2 1 1 3 1 2 1 1</code></pre>
|
|||
|
<div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"members"</span>, <span class="at">id =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">5</span>)) <span class="co"># number of members for nodes 2 and 5</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 2 3</code></pre>
|
|||
|
<div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"midpoint"</span>) <span class="co"># how much "left" is this node from its left-most child's location</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1.625 0.500 NA NA 0.750 NA 0.500 NA NA</code></pre>
|
|||
|
<div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"leaf"</span>) <span class="co"># is this node a leaf</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] NA NA TRUE TRUE NA TRUE NA TRUE TRUE</code></pre>
|
|||
|
<div class="sourceCode" id="cb34"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"label"</span>) <span class="co"># what is the label on this node</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] NA NA 1 2 NA 5 NA 3 4</code></pre>
|
|||
|
<div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"nodePar"</span>) <span class="co"># empty (for now...)</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] NA NA NA NA NA NA NA NA NA</code></pre>
|
|||
|
<div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"edgePar"</span>) <span class="co"># empty (for now...)</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] NA NA NA NA NA NA NA NA NA</code></pre>
|
|||
|
<p>A similar function for leaves only is
|
|||
|
<code>get_leaves_attr</code></p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="how-to-change-a-dendrogram" class="section level2">
|
|||
|
<h2>How to change a dendrogram</h2>
|
|||
|
<div id="the-set-function" class="section level3">
|
|||
|
<h3>The “set” function</h3>
|
|||
|
<p>The fastest way to start changing parameters with dendextend is by
|
|||
|
using the <code>set</code> function. It is written as:
|
|||
|
<code>set(object, what, value)</code>, and accepts the following
|
|||
|
parameters:</p>
|
|||
|
<ol style="list-style-type: decimal">
|
|||
|
<li><strong>object</strong>: a dendrogram object,</li>
|
|||
|
<li><strong>what</strong>: a character indicating what is the property
|
|||
|
of the tree that should be set/updated</li>
|
|||
|
<li><strong>value</strong>: a vector with the value to set in the tree
|
|||
|
(the type of the value depends on the “what”). Many times, vectors which
|
|||
|
are too short are recycled.</li>
|
|||
|
</ol>
|
|||
|
<p>The <strong>what</strong> parameter accepts many options, each uses
|
|||
|
some general function in the background. These options deal with labels,
|
|||
|
nodes and branches. They are:</p>
|
|||
|
<ul>
|
|||
|
<li>labels - set the labels (using
|
|||
|
<code>labels<-.dendrogram</code>)</li>
|
|||
|
<li>labels_colors - set the labels’ colors (using
|
|||
|
<code>color_labels</code>)</li>
|
|||
|
<li>labels_cex - set the labels’ size (using
|
|||
|
<code>assign_values_to_leaves_nodePar</code>)</li>
|
|||
|
<li>labels_to_character - set the labels’ to be characters</li>
|
|||
|
<li>leaves_pch - set the leaves’ point type (using
|
|||
|
<code>assign_values_to_leaves_nodePar</code>)</li>
|
|||
|
<li>leaves_cex - set the leaves’ point size (using
|
|||
|
<code>assign_values_to_leaves_nodePar</code>)</li>
|
|||
|
<li>leaves_col - set the leaves’ point color (using
|
|||
|
<code>assign_values_to_leaves_nodePar</code>)</li>
|
|||
|
<li>leaves_bg - set the leaves’ point background color (available only
|
|||
|
for pch 21-25, using <code>assign_values_to_nodes_nodePar</code>)</li>
|
|||
|
<li>nodes_pch - set the nodes’ point type (using
|
|||
|
<code>assign_values_to_nodes_nodePar</code>)</li>
|
|||
|
<li>nodes_cex - set the nodes’ point size (using
|
|||
|
<code>assign_values_to_nodes_nodePar</code>)</li>
|
|||
|
<li>nodes_col - set the nodes’ point color (using
|
|||
|
<code>assign_values_to_nodes_nodePar</code>)</li>
|
|||
|
<li>nodes_bg - set the nodes’ point background color (available only for
|
|||
|
pch 21-25, using <code>assign_values_to_nodes_nodePar</code>)</li>
|
|||
|
<li>hang_leaves - hang the leaves (using
|
|||
|
<code>hang.dendrogram</code>)</li>
|
|||
|
<li>branches_k_color - color the branches (using
|
|||
|
<code>color_branches</code>)</li>
|
|||
|
<li>branches_col - set the color of branches (using
|
|||
|
<code>assign_values_to_branches_edgePar</code>)</li>
|
|||
|
<li>branches_lwd - set the line width of branches (using
|
|||
|
<code>assign_values_to_branches_edgePar</code>)</li>
|
|||
|
<li>branches_lty - set the line type of branches (using
|
|||
|
<code>assign_values_to_branches_edgePar</code>)</li>
|
|||
|
<li>by_labels_branches_col - set the color of branches with specific
|
|||
|
labels (using <code>branches_attr_by_labels</code>)</li>
|
|||
|
<li>by_labels_branches_lwd - set the line width of branches with
|
|||
|
specific labels (using <code>branches_attr_by_labels</code>)</li>
|
|||
|
<li>by_labels_branches_lty - set the line type of branches with specific
|
|||
|
labels (using <code>branches_attr_by_labels</code>)</li>
|
|||
|
<li>clear_branches - clear branches’ attributes (using
|
|||
|
<code>remove_branches_edgePar</code>)</li>
|
|||
|
<li>clear_leaves - clear leaves’ attributes (using
|
|||
|
<code>remove_branches_edgePar</code>)</li>
|
|||
|
</ul>
|
|||
|
</div>
|
|||
|
<div id="two-simple-trees-to-play-with" class="section level3">
|
|||
|
<h3>Two simple trees to play with</h3>
|
|||
|
<p>For illustration purposes, we will create several small tree, and
|
|||
|
demonstrate these functions on them.</p>
|
|||
|
<div class="sourceCode" id="cb40"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1" tabindex="-1"></a>dend13 <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>) <span class="sc">%>%</span> <span class="co"># take some data</span></span>
|
|||
|
<span id="cb40-2"><a href="#cb40-2" tabindex="-1"></a> dist <span class="sc">%>%</span> <span class="co"># calculate a distance matrix, </span></span>
|
|||
|
<span id="cb40-3"><a href="#cb40-3" tabindex="-1"></a> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"average"</span>) <span class="sc">%>%</span> <span class="co"># on it compute hierarchical clustering using the "average" method, </span></span>
|
|||
|
<span id="cb40-4"><a href="#cb40-4" tabindex="-1"></a> as.dendrogram <span class="co"># and lastly, turn that object into a dendrogram.</span></span>
|
|||
|
<span id="cb40-5"><a href="#cb40-5" tabindex="-1"></a><span class="co"># same, but for 5 leaves:</span></span>
|
|||
|
<span id="cb40-6"><a href="#cb40-6" tabindex="-1"></a>dend15 <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"average"</span>) <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb40-7"><a href="#cb40-7" tabindex="-1"></a></span>
|
|||
|
<span id="cb40-8"><a href="#cb40-8" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb40-9"><a href="#cb40-9" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main=</span><span class="st">"dend13"</span>)</span>
|
|||
|
<span id="cb40-10"><a href="#cb40-10" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main=</span><span class="st">"dend15"</span>)</span>
|
|||
|
<span id="cb40-11"><a href="#cb40-11" tabindex="-1"></a><span class="co"># we could have also used plot(dend)</span></span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="setting-a-dendrograms-labels" class="section level3">
|
|||
|
<h3>Setting a dendrogram’s labels</h3>
|
|||
|
<p>We can get a vector with the tree’s labels:</p>
|
|||
|
<div class="sourceCode" id="cb41"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1" tabindex="-1"></a><span class="co"># get the labels:</span></span>
|
|||
|
<span id="cb41-2"><a href="#cb41-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> labels</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1 2 5 3 4</code></pre>
|
|||
|
<div class="sourceCode" id="cb43"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" tabindex="-1"></a><span class="co"># this is just like labels(dend)</span></span></code></pre></div>
|
|||
|
<p>Notice how the tree’s labels are not 1 to 5 by order, since the tree
|
|||
|
happened to place them in a different order. We can change the names of
|
|||
|
the labels:</p>
|
|||
|
<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" tabindex="-1"></a><span class="co"># change the labels, and then print them:</span></span>
|
|||
|
<span id="cb44-2"><a href="#cb44-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels"</span>, <span class="fu">c</span>(<span class="dv">111</span><span class="sc">:</span><span class="dv">115</span>)) <span class="sc">%>%</span> labels</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 111 112 113 114 115</code></pre>
|
|||
|
<div class="sourceCode" id="cb46"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" tabindex="-1"></a><span class="co"># could also be done using:</span></span>
|
|||
|
<span id="cb46-2"><a href="#cb46-2" tabindex="-1"></a><span class="co"># labels(dend) <- c(111:115)</span></span></code></pre></div>
|
|||
|
<p>We can change the type of labels to be characters. Not doing so may
|
|||
|
be a source of various bugs and problems in many functions.</p>
|
|||
|
<div class="sourceCode" id="cb47"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb47-1"><a href="#cb47-1" tabindex="-1"></a>dend15 <span class="sc">%>%</span> labels</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1 2 5 3 4</code></pre>
|
|||
|
<div class="sourceCode" id="cb49"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb49-1"><a href="#cb49-1" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_to_char"</span>) <span class="sc">%>%</span> labels</span></code></pre></div>
|
|||
|
<pre><code>#> [1] "1" "2" "5" "3" "4"</code></pre>
|
|||
|
<p>We may also change their color and size:</p>
|
|||
|
<div class="sourceCode" id="cb51"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb51-1"><a href="#cb51-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb51-2"><a href="#cb51-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_col"</span>, <span class="st">"blue"</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Change label's color"</span>) <span class="co"># change color </span></span>
|
|||
|
<span id="cb51-3"><a href="#cb51-3" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Change label's size"</span>) <span class="co"># change color </span></span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>The function recycles, from left to right, the vector of values we
|
|||
|
give it. We can use this to create more complex patterns:</p>
|
|||
|
<div class="sourceCode" id="cb52"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" tabindex="-1"></a><span class="co"># Produce a more complex dendrogram:</span></span>
|
|||
|
<span id="cb52-2"><a href="#cb52-2" tabindex="-1"></a>dend15_2 <span class="ot"><-</span> dend15 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb52-3"><a href="#cb52-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels"</span>, <span class="fu">c</span>(<span class="dv">111</span><span class="sc">:</span><span class="dv">115</span>)) <span class="sc">%>%</span> <span class="co"># change labels</span></span>
|
|||
|
<span id="cb52-4"><a href="#cb52-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels_col"</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>)) <span class="sc">%>%</span> <span class="co"># change color </span></span>
|
|||
|
<span id="cb52-5"><a href="#cb52-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels_cex"</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>)) <span class="co"># change size</span></span>
|
|||
|
<span id="cb52-6"><a href="#cb52-6" tabindex="-1"></a></span>
|
|||
|
<span id="cb52-7"><a href="#cb52-7" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb52-8"><a href="#cb52-8" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Before"</span>)</span>
|
|||
|
<span id="cb52-9"><a href="#cb52-9" tabindex="-1"></a>dend15_2 <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"After"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Notice how these “labels parameters” are nested within the nodePar
|
|||
|
attribute:</p>
|
|||
|
<div class="sourceCode" id="cb53"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" tabindex="-1"></a><span class="co"># looking at only the left-most node of the "after tree":</span></span>
|
|||
|
<span id="cb53-2"><a href="#cb53-2" tabindex="-1"></a>dend15_2[[<span class="dv">1</span>]][[<span class="dv">1</span>]] <span class="sc">%>%</span> unclass <span class="sc">%>%</span> str </span></code></pre></div>
|
|||
|
<pre><code>#> int 1
|
|||
|
#> - attr(*, "label")= int 111
|
|||
|
#> - attr(*, "members")= int 1
|
|||
|
#> - attr(*, "height")= num 0
|
|||
|
#> - attr(*, "leaf")= logi TRUE
|
|||
|
#> - attr(*, "nodePar")=List of 3
|
|||
|
#> ..$ lab.col: num 1
|
|||
|
#> ..$ pch : logi NA
|
|||
|
#> ..$ lab.cex: num 2</code></pre>
|
|||
|
<div class="sourceCode" id="cb55"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb55-1"><a href="#cb55-1" tabindex="-1"></a><span class="co"># looking at only the nodePar attributes in this sub-tree:</span></span>
|
|||
|
<span id="cb55-2"><a href="#cb55-2" tabindex="-1"></a>dend15_2[[<span class="dv">1</span>]][[<span class="dv">1</span>]] <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"nodePar"</span>) </span></code></pre></div>
|
|||
|
<pre><code>#> [,1]
|
|||
|
#> lab.col 1
|
|||
|
#> pch NA
|
|||
|
#> lab.cex 2</code></pre>
|
|||
|
<p>When it comes to color, we can also set the parameter “k”, which will
|
|||
|
cut the tree into k clusters, and assign a different color to each label
|
|||
|
(based on its cluster):</p>
|
|||
|
<div class="sourceCode" id="cb57"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb57-1"><a href="#cb57-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb57-2"><a href="#cb57-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_col"</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb57-3"><a href="#cb57-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Recycles color </span><span class="sc">\n</span><span class="st">from left to right"</span>)</span>
|
|||
|
<span id="cb57-4"><a href="#cb57-4" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_col"</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>), <span class="at">k=</span><span class="dv">2</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb57-5"><a href="#cb57-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Color labels </span><span class="sc">\n</span><span class="st">per cluster"</span>)</span>
|
|||
|
<span id="cb57-6"><a href="#cb57-6" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">h =</span> <span class="dv">2</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="setting-a-dendrograms-nodesleaves-points" class="section level3">
|
|||
|
<h3>Setting a dendrogram’s nodes/leaves (points)</h3>
|
|||
|
<p>Each node in a tree can be represented and controlled using the
|
|||
|
<code>assign_values_to_nodes_nodePar</code>, and for the special case of
|
|||
|
the nodes of leaves, the <code>assign_values_to_leaves_nodePar</code>
|
|||
|
function is more appropriate (and faster) to use. We can control the
|
|||
|
following properties: pch (point type), cex (point size), and col (point
|
|||
|
color). For pch we can additionally set bg (“background”, although it’s
|
|||
|
really a fill for the shape). When bg is set, the outline of the point
|
|||
|
is defined by col and the internal fill is determined by bg. For
|
|||
|
example:</p>
|
|||
|
<div class="sourceCode" id="cb58"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb58-1"><a href="#cb58-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb58-2"><a href="#cb58-2" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"(1) Show the</span><span class="sc">\n</span><span class="st"> nodes (as a dot)"</span>) <span class="co">#1</span></span>
|
|||
|
<span id="cb58-3"><a href="#cb58-3" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-4"><a href="#cb58-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"(2) Show (larger)</span><span class="sc">\n</span><span class="st"> nodes"</span>) <span class="co">#2</span></span>
|
|||
|
<span id="cb58-5"><a href="#cb58-5" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_col"</span>, <span class="dv">3</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-6"><a href="#cb58-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"(3) Show (larger+colored)</span><span class="sc">\n</span><span class="st"> nodes"</span>) <span class="co">#3</span></span>
|
|||
|
<span id="cb58-7"><a href="#cb58-7" tabindex="-1"></a></span>
|
|||
|
<span id="cb58-8"><a href="#cb58-8" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_pch"</span>, <span class="dv">21</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"(4) Show the leaves</span><span class="sc">\n</span><span class="st"> (as empty circles)"</span>) <span class="co">#4</span></span>
|
|||
|
<span id="cb58-9"><a href="#cb58-9" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_pch"</span>, <span class="dv">21</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-10"><a href="#cb58-10" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"(5) Show (larger)</span><span class="sc">\n</span><span class="st"> leaf circles"</span>) <span class="co">#5</span></span>
|
|||
|
<span id="cb58-11"><a href="#cb58-11" tabindex="-1"></a>dend13 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-12"><a href="#cb58-12" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"leaves_pch"</span>, <span class="dv">21</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-13"><a href="#cb58-13" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"leaves_bg"</span>, <span class="st">"gold"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-14"><a href="#cb58-14" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"leaves_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-15"><a href="#cb58-15" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"leaves_col"</span>, <span class="st">"darkred"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb58-16"><a href="#cb58-16" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"(6) Show (larger+colored+filled)</span><span class="sc">\n</span><span class="st"> leaves"</span>) <span class="co">#6</span></span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>And with recycling we can produce more complex outputs:</p>
|
|||
|
<div class="sourceCode" id="cb59"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb59-1"><a href="#cb59-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb59-2"><a href="#cb59-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_pch"</span>, <span class="fu">c</span>(<span class="dv">19</span>,<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_cex"</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_col"</span>, <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb59-3"><a href="#cb59-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Adjust nodes"</span>)</span>
|
|||
|
<span id="cb59-4"><a href="#cb59-4" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_pch"</span>, <span class="fu">c</span>(<span class="dv">19</span>,<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_cex"</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_col"</span>, <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb59-5"><a href="#cb59-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Adjust nodes</span><span class="sc">\n</span><span class="st">(but only for leaves)"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Notice how recycling works in a depth-first order (which is just left
|
|||
|
to right, when we only adjust the leaves). Here are the node’s
|
|||
|
parameters after adjustment:</p>
|
|||
|
<div class="sourceCode" id="cb60"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb60-1"><a href="#cb60-1" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_pch"</span>, <span class="fu">c</span>(<span class="dv">19</span>,<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb60-2"><a href="#cb60-2" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"nodes_cex"</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_col"</span>, <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> <span class="fu">get_nodes_attr</span>(<span class="st">"nodePar"</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
|
|||
|
#> pch 19 1 4 19 1 4 19 1 4
|
|||
|
#> cex 2 1 2 2 1 2 2 1 2
|
|||
|
#> col 3 4 3 4 3 4 3 4 3</code></pre>
|
|||
|
<p>We can also change the height of of the leaves by using the
|
|||
|
<code>hang.dendrogram</code> function:</p>
|
|||
|
<div class="sourceCode" id="cb62"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb62-1"><a href="#cb62-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb62-2"><a href="#cb62-2" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_col"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="co"># adjust the leaves</span></span>
|
|||
|
<span id="cb62-3"><a href="#cb62-3" tabindex="-1"></a> hang.dendrogram <span class="sc">%>%</span> <span class="co"># hang the leaves</span></span>
|
|||
|
<span id="cb62-4"><a href="#cb62-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Hanging a tree"</span>)</span>
|
|||
|
<span id="cb62-5"><a href="#cb62-5" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_col"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="co"># adjust the leaves</span></span>
|
|||
|
<span id="cb62-6"><a href="#cb62-6" tabindex="-1"></a> <span class="fu">hang.dendrogram</span>(<span class="at">hang_height =</span> .<span class="dv">6</span>) <span class="sc">%>%</span> <span class="co"># hang the leaves (at some height)</span></span>
|
|||
|
<span id="cb62-7"><a href="#cb62-7" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Hanging a tree (but lower)"</span>)</span>
|
|||
|
<span id="cb62-8"><a href="#cb62-8" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_cex"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"leaves_col"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="co"># adjust the leaves</span></span>
|
|||
|
<span id="cb62-9"><a href="#cb62-9" tabindex="-1"></a> hang.dendrogram <span class="sc">%>%</span> <span class="co"># hang the leaves</span></span>
|
|||
|
<span id="cb62-10"><a href="#cb62-10" tabindex="-1"></a> <span class="fu">hang.dendrogram</span>(<span class="at">hang =</span> <span class="sc">-</span><span class="dv">1</span>) <span class="sc">%>%</span> <span class="co"># un-hanging the leaves</span></span>
|
|||
|
<span id="cb62-11"><a href="#cb62-11" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Not hanging a tree"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>An example of what this function does to the leaves heights:</p>
|
|||
|
<div class="sourceCode" id="cb63"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb63-1"><a href="#cb63-1" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">get_leaves_attr</span>(<span class="st">"height"</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0 0 0</code></pre>
|
|||
|
<div class="sourceCode" id="cb65"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb65-1"><a href="#cb65-1" tabindex="-1"></a>dend13 <span class="sc">%>%</span> hang.dendrogram <span class="sc">%>%</span> <span class="fu">get_leaves_attr</span>(<span class="st">"height"</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1.35 0.85 0.85</code></pre>
|
|||
|
<p>We can also control the general heights of nodes using
|
|||
|
<code>raise.dendrogram</code>:</p>
|
|||
|
<div class="sourceCode" id="cb67"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb67-1"><a href="#cb67-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb67-2"><a href="#cb67-2" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"First tree"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb67-3"><a href="#cb67-3" tabindex="-1"></a>dend13 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb67-4"><a href="#cb67-4" tabindex="-1"></a> <span class="fu">raise.dendrogram</span> (<span class="sc">-</span><span class="dv">1</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb67-5"><a href="#cb67-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"One point lower"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb67-6"><a href="#cb67-6" tabindex="-1"></a>dend13 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb67-7"><a href="#cb67-7" tabindex="-1"></a> <span class="fu">raise.dendrogram</span> (<span class="dv">1</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb67-8"><a href="#cb67-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"One point higher"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>If you wish to make the branches under the root have the same height,
|
|||
|
you can use the <code>flatten.dendrogram</code> function.</p>
|
|||
|
</div>
|
|||
|
<div id="setting-a-dendrograms-branches" class="section level3">
|
|||
|
<h3>Setting a dendrogram’s branches</h3>
|
|||
|
<div id="adjusting-all-branches" class="section level4">
|
|||
|
<h4>Adjusting all branches</h4>
|
|||
|
<p>Similar to adjusting nodes, we can also control line width (lwd),
|
|||
|
line type (lty), and color (col) for branches:</p>
|
|||
|
<div class="sourceCode" id="cb68"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb68-1"><a href="#cb68-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb68-2"><a href="#cb68-2" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="dv">4</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Thick branches"</span>)</span>
|
|||
|
<span id="cb68-3"><a href="#cb68-3" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lty"</span>, <span class="dv">3</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Dashed branches"</span>)</span>
|
|||
|
<span id="cb68-4"><a href="#cb68-4" tabindex="-1"></a>dend13 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_col"</span>, <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Red branches"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>We may also use recycling to create more complex patterns:</p>
|
|||
|
<div class="sourceCode" id="cb69"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb69-1"><a href="#cb69-1" tabindex="-1"></a><span class="co"># Produce a more complex dendrogram:</span></span>
|
|||
|
<span id="cb69-2"><a href="#cb69-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb69-3"><a href="#cb69-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="fu">c</span>(<span class="dv">4</span>,<span class="dv">1</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb69-4"><a href="#cb69-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_lty"</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">3</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb69-5"><a href="#cb69-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_col"</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb69-6"><a href="#cb69-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Complex branches"</span>, <span class="at">edge.root =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Notice how the first branch (the root) is considered when going
|
|||
|
through and creating the tree, but it is <strong>ignored</strong> in the
|
|||
|
actual plotting (this is actually a “missing feature” in
|
|||
|
<code>plot.dendrogram</code>).</p>
|
|||
|
</div>
|
|||
|
<div id="coloring-branches-based-on-clustering" class="section level4">
|
|||
|
<h4>Coloring branches based on clustering</h4>
|
|||
|
<p>We may also control the colors of the branches based on using
|
|||
|
clustering:</p>
|
|||
|
<div class="sourceCode" id="cb70"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb70-1"><a href="#cb70-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb70-2"><a href="#cb70-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Nice defaults"</span>)</span>
|
|||
|
<span id="cb70-3"><a href="#cb70-3" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">value =</span> <span class="dv">3</span><span class="sc">:</span><span class="dv">1</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb70-4"><a href="#cb70-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Controlling branches' colors</span><span class="sc">\n</span><span class="st">(via clustering)"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb71"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb71-1"><a href="#cb71-1" tabindex="-1"></a><span class="co"># This is like using the `color_branches` function</span></span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="adjusting-branches-based-on-labels" class="section level4">
|
|||
|
<h4>Adjusting branches based on labels</h4>
|
|||
|
<p>The most powerful way to control branches is through the
|
|||
|
<code>branches_attr_by_labels</code> function (with variations through
|
|||
|
the <code>set</code> function). The function allows you to change
|
|||
|
col/lwd/lty of branches if they match some “labels condition”. Follow
|
|||
|
carefully:</p>
|
|||
|
<div class="sourceCode" id="cb72"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb72-1"><a href="#cb72-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb72-2"><a href="#cb72-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"by_labels_branches_col"</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb72-3"><a href="#cb72-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Adjust the branch</span><span class="sc">\n</span><span class="st"> if ALL (default) of its</span><span class="sc">\n</span><span class="st"> labels are in the list"</span>)</span>
|
|||
|
<span id="cb72-4"><a href="#cb72-4" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"by_labels_branches_col"</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">type =</span> <span class="st">"any"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb72-5"><a href="#cb72-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Adjust the branch</span><span class="sc">\n</span><span class="st"> if ANY of its</span><span class="sc">\n</span><span class="st"> labels are in the list"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>We can use this to change the size/type/color of the branches:</p>
|
|||
|
<div class="sourceCode" id="cb73"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb73-1"><a href="#cb73-1" tabindex="-1"></a><span class="co"># Using "Inf" in "TF_values" means to let the parameters stay as they are.</span></span>
|
|||
|
<span id="cb73-2"><a href="#cb73-2" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb73-3"><a href="#cb73-3" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"by_labels_branches_col"</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">TF_values =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="cn">Inf</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb73-4"><a href="#cb73-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Change colors"</span>)</span>
|
|||
|
<span id="cb73-5"><a href="#cb73-5" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"by_labels_branches_lwd"</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">TF_values =</span> <span class="fu">c</span>(<span class="dv">8</span>,<span class="dv">1</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb73-6"><a href="#cb73-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Change line width"</span>)</span>
|
|||
|
<span id="cb73-7"><a href="#cb73-7" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"by_labels_branches_lty"</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">TF_values =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="cn">Inf</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb73-8"><a href="#cb73-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Change line type"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="highlighting-branches-different-heights-using-line-width-and-color" class="section level4">
|
|||
|
<h4>Highlighting branches’ different heights using line width and
|
|||
|
color</h4>
|
|||
|
<p>The <code>highlight_branches</code> function helps to more easily see
|
|||
|
the topological structure of a tree, by adjusting branches appearence
|
|||
|
(color and line width) based on their height in the tree. For
|
|||
|
example:</p>
|
|||
|
<div class="sourceCode" id="cb74"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb74-1"><a href="#cb74-1" tabindex="-1"></a>dat <span class="ot"><-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">20</span>,<span class="sc">-</span><span class="dv">5</span>]</span>
|
|||
|
<span id="cb74-2"><a href="#cb74-2" tabindex="-1"></a>hca <span class="ot"><-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(dat))</span>
|
|||
|
<span id="cb74-3"><a href="#cb74-3" tabindex="-1"></a>hca2 <span class="ot"><-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(dat), <span class="at">method =</span> <span class="st">"single"</span>)</span>
|
|||
|
<span id="cb74-4"><a href="#cb74-4" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(hca)</span>
|
|||
|
<span id="cb74-5"><a href="#cb74-5" tabindex="-1"></a>dend2 <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(hca2)</span>
|
|||
|
<span id="cb74-6"><a href="#cb74-6" tabindex="-1"></a></span>
|
|||
|
<span id="cb74-7"><a href="#cb74-7" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb74-8"><a href="#cb74-8" tabindex="-1"></a>dend <span class="sc">%>%</span> highlight_branches_col <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Coloring branches"</span>)</span>
|
|||
|
<span id="cb74-9"><a href="#cb74-9" tabindex="-1"></a>dend <span class="sc">%>%</span> highlight_branches_lwd <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Emphasizing line-width"</span>)</span>
|
|||
|
<span id="cb74-10"><a href="#cb74-10" tabindex="-1"></a>dend <span class="sc">%>%</span> highlight_branches <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Emphasizing color</span><span class="sc">\n</span><span class="st"> and line-width"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Tanglegrams are even easier to compare when using</p>
|
|||
|
<div class="sourceCode" id="cb75"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb75-1"><a href="#cb75-1" tabindex="-1"></a><span class="fu">library</span>(viridis)</span>
|
|||
|
<span id="cb75-2"><a href="#cb75-2" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb75-3"><a href="#cb75-3" tabindex="-1"></a>dend <span class="sc">%>%</span> highlight_branches_col <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Coloring branches </span><span class="sc">\n</span><span class="st"> (default is reversed viridis)"</span>)</span>
|
|||
|
<span id="cb75-4"><a href="#cb75-4" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">highlight_branches_col</span>(<span class="fu">viridis</span>(<span class="dv">100</span>)) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"It is better to use </span><span class="sc">\n</span><span class="st"> lighter colors in the leaves"</span>)</span>
|
|||
|
<span id="cb75-5"><a href="#cb75-5" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">highlight_branches_col</span>(<span class="fu">rev</span>(<span class="fu">magma</span>(<span class="dv">1000</span>))) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"The magma color pallatte</span><span class="sc">\n</span><span class="st"> is also good"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb76"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb76-1"><a href="#cb76-1" tabindex="-1"></a>dl <span class="ot"><-</span> <span class="fu">dendlist</span>(dend, dend2)</span>
|
|||
|
<span id="cb76-2"><a href="#cb76-2" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">sort =</span> <span class="cn">TRUE</span>, <span class="at">common_subtrees_color_lines =</span> <span class="cn">FALSE</span>, <span class="at">highlight_distinct_edges =</span> <span class="cn">FALSE</span>, <span class="at">highlight_branches_lwd =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb77"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb77-1"><a href="#cb77-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dl)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb78"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb78-1"><a href="#cb78-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">fast =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb79"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb79-1"><a href="#cb79-1" tabindex="-1"></a>dl <span class="ot"><-</span> <span class="fu">dendlist</span>(<span class="fu">highlight_branches</span>(dend), <span class="fu">highlight_branches</span>(dend2))</span>
|
|||
|
<span id="cb79-2"><a href="#cb79-2" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">sort =</span> <span class="cn">TRUE</span>, <span class="at">common_subtrees_color_lines =</span> <span class="cn">FALSE</span>, <span class="at">highlight_distinct_edges =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb80"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb80-1"><a href="#cb80-1" tabindex="-1"></a><span class="co"># dend %>% set("highlight_branches_col") %>% plot</span></span>
|
|||
|
<span id="cb80-2"><a href="#cb80-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb80-3"><a href="#cb80-3" tabindex="-1"></a>dl <span class="ot"><-</span> <span class="fu">dendlist</span>(dend, dend2) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"highlight_branches_col"</span>)</span>
|
|||
|
<span id="cb80-4"><a href="#cb80-4" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">sort =</span> <span class="cn">TRUE</span>, <span class="at">common_subtrees_color_lines =</span> <span class="cn">FALSE</span>, <span class="at">highlight_distinct_edges =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="changing-a-dendrograms-structure" class="section level3">
|
|||
|
<h3>Changing a dendrogram’s structure</h3>
|
|||
|
<div id="rotation" class="section level4">
|
|||
|
<h4>Rotation</h4>
|
|||
|
<p>A dendrogram is an object which can be rotated on its hinges without
|
|||
|
changing its topology. Rotating a dendrogram in base R can be done using
|
|||
|
the <code>reorder</code> function. The problem with this function is
|
|||
|
that it is not very intuitive. For this reason the <code>rotate</code>
|
|||
|
function was written. It has two main arguments: the “object” (a
|
|||
|
dendrogram), and the “order” we wish to rotate it by. The “order”
|
|||
|
parameter can be either a numeric vector, used in a similar way we would
|
|||
|
order a simple character vector. Or, the order parameter can also be a
|
|||
|
character vector of the labels of the tree, given in the new desired
|
|||
|
order of the tree. It is also worth noting that some order are
|
|||
|
impossible to achieve for a given tree’s topology. In such cases, the
|
|||
|
function will do its “best” to get as close as possible to the requested
|
|||
|
rotation.</p>
|
|||
|
<div class="sourceCode" id="cb81"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb81-1"><a href="#cb81-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb81-2"><a href="#cb81-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-3"><a href="#cb81-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels_colors"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-4"><a href="#cb81-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-5"><a href="#cb81-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"First tree"</span>)</span>
|
|||
|
<span id="cb81-6"><a href="#cb81-6" tabindex="-1"></a>dend15 <span class="sc">%>%</span></span>
|
|||
|
<span id="cb81-7"><a href="#cb81-7" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels_colors"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-8"><a href="#cb81-8" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-9"><a href="#cb81-9" tabindex="-1"></a> <span class="fu">rotate</span>(<span class="fu">as.character</span>(<span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)) <span class="sc">%>%</span> <span class="co">#rotate to match labels new order</span></span>
|
|||
|
<span id="cb81-10"><a href="#cb81-10" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Rotated tree</span><span class="sc">\n</span><span class="st"> based on labels"</span>)</span>
|
|||
|
<span id="cb81-11"><a href="#cb81-11" tabindex="-1"></a>dend15 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-12"><a href="#cb81-12" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels_colors"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-13"><a href="#cb81-13" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb81-14"><a href="#cb81-14" tabindex="-1"></a> <span class="fu">rotate</span>(<span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>) <span class="sc">%>%</span> <span class="co"># the fifth label to go first is "4"</span></span>
|
|||
|
<span id="cb81-15"><a href="#cb81-15" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Rotated tree</span><span class="sc">\n</span><span class="st"> based on order"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>A new convenience S3 function for <code>sort</code>
|
|||
|
(<code>sort.dendrogram</code>) was added:</p>
|
|||
|
<div class="sourceCode" id="cb82"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb82-1"><a href="#cb82-1" tabindex="-1"></a>dend110 <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span><span class="sc">:</span><span class="dv">5</span>, <span class="dv">7</span>,<span class="dv">9</span>,<span class="dv">10</span>) <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"average"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb82-2"><a href="#cb82-2" tabindex="-1"></a> as.dendrogram <span class="sc">%>%</span> color_labels <span class="sc">%>%</span> color_branches</span>
|
|||
|
<span id="cb82-3"><a href="#cb82-3" tabindex="-1"></a></span>
|
|||
|
<span id="cb82-4"><a href="#cb82-4" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb82-5"><a href="#cb82-5" tabindex="-1"></a>dend110 <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Original tree"</span>)</span>
|
|||
|
<span id="cb82-6"><a href="#cb82-6" tabindex="-1"></a>dend110 <span class="sc">%>%</span> sort <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"labels sort"</span>)</span>
|
|||
|
<span id="cb82-7"><a href="#cb82-7" tabindex="-1"></a>dend110 <span class="sc">%>%</span> <span class="fu">sort</span>(<span class="at">type =</span> <span class="st">"nodes"</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"nodes (ladderize) sort"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="unbranching" class="section level4">
|
|||
|
<h4>Unbranching</h4>
|
|||
|
<p>We can unbranch a tree:</p>
|
|||
|
<div class="sourceCode" id="cb83"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb83-1"><a href="#cb83-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb83-2"><a href="#cb83-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"First tree"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb83-3"><a href="#cb83-3" tabindex="-1"></a>dend15 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb83-4"><a href="#cb83-4" tabindex="-1"></a> unbranch <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb83-5"><a href="#cb83-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Unbranched tree"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb83-6"><a href="#cb83-6" tabindex="-1"></a>dend15 <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb83-7"><a href="#cb83-7" tabindex="-1"></a> <span class="fu">unbranch</span>(<span class="dv">2</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb83-8"><a href="#cb83-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Unbranched tree (2)"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="pruning" class="section level4">
|
|||
|
<h4>Pruning</h4>
|
|||
|
<p>We can prune a tree based on the labels:</p>
|
|||
|
<div class="sourceCode" id="cb84"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb84-1"><a href="#cb84-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb84-2"><a href="#cb84-2" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_colors"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb84-3"><a href="#cb84-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"First tree"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb84-4"><a href="#cb84-4" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_colors"</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb84-5"><a href="#cb84-5" tabindex="-1"></a> <span class="fu">prune</span>(<span class="fu">c</span>(<span class="st">"1"</span>,<span class="st">"5"</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb84-6"><a href="#cb84-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Prunned tree"</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>For pruning two trees to have matching labels, we can use the
|
|||
|
<code>intersect_trees</code> function:</p>
|
|||
|
<div class="sourceCode" id="cb85"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb85-1"><a href="#cb85-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb85-2"><a href="#cb85-2" tabindex="-1"></a>dend_intersected <span class="ot"><-</span> <span class="fu">intersect_trees</span>(dend13, dend15)</span>
|
|||
|
<span id="cb85-3"><a href="#cb85-3" tabindex="-1"></a>dend_intersected[[<span class="dv">1</span>]] <span class="sc">%>%</span> plot</span>
|
|||
|
<span id="cb85-4"><a href="#cb85-4" tabindex="-1"></a>dend_intersected[[<span class="dv">2</span>]] <span class="sc">%>%</span> plot</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="collapse-branches" class="section level4">
|
|||
|
<h4>Collapse branches</h4>
|
|||
|
<p>We can collapse branches under a tolerance level using the
|
|||
|
<code>collapse_branch</code> function:</p>
|
|||
|
<div class="sourceCode" id="cb86"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb86-1"><a href="#cb86-1" tabindex="-1"></a><span class="co"># ladderize is like sort(..., type = "node")</span></span>
|
|||
|
<span id="cb86-2"><a href="#cb86-2" tabindex="-1"></a>dend <span class="ot"><-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb86-3"><a href="#cb86-3" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
|
|||
|
<span id="cb86-4"><a href="#cb86-4" tabindex="-1"></a>dend <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>); <span class="fu">abline</span>(<span class="at">v =</span> .<span class="dv">2</span>, <span class="at">col =</span> <span class="dv">2</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span>
|
|||
|
<span id="cb86-5"><a href="#cb86-5" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">collapse_branch</span>(<span class="at">tol =</span> <span class="fl">0.2</span>) <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>)</span>
|
|||
|
<span id="cb86-6"><a href="#cb86-6" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">collapse_branch</span>(<span class="at">tol =</span> <span class="fl">0.2</span>) <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> <span class="fu">hang.dendrogram</span>(<span class="at">hang =</span> <span class="dv">0</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="adding-extra-bars-and-rectangles" class="section level3">
|
|||
|
<h3>Adding extra bars and rectangles</h3>
|
|||
|
<div id="adding-colored-rectangles" class="section level4">
|
|||
|
<h4>Adding colored rectangles</h4>
|
|||
|
<p>Earlier we have seen how to highlight clusters in a dendrogram by
|
|||
|
coloring branches. We can also draw rectangles around the branches of a
|
|||
|
dendrogram in order to highlight the corresponding clusters. First the
|
|||
|
dendrogram is cut at a certain level, then a rectangle is drawn around
|
|||
|
selected branches. This is done using the <code>rect.dendrogram</code>,
|
|||
|
which is modeled based on the <code>rect.hclust</code> function. One
|
|||
|
advantage of <code>rect.dendrogram</code> over <code>rect.hclust</code>,
|
|||
|
is that it also works on horizontally plotted trees:</p>
|
|||
|
<div class="sourceCode" id="cb87"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb87-1"><a href="#cb87-1" tabindex="-1"></a><span class="fu">layout</span>(<span class="fu">t</span>(<span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">2</span>)))</span>
|
|||
|
<span id="cb87-2"><a href="#cb87-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb87-3"><a href="#cb87-3" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>) <span class="sc">%>%</span> plot</span>
|
|||
|
<span id="cb87-4"><a href="#cb87-4" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">rect.dendrogram</span>(<span class="at">k=</span><span class="dv">3</span>, </span>
|
|||
|
<span id="cb87-5"><a href="#cb87-5" tabindex="-1"></a> <span class="at">border =</span> <span class="dv">8</span>, <span class="at">lty =</span> <span class="dv">5</span>, <span class="at">lwd =</span> <span class="dv">2</span>)</span>
|
|||
|
<span id="cb87-6"><a href="#cb87-6" tabindex="-1"></a></span>
|
|||
|
<span id="cb87-7"><a href="#cb87-7" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>)</span>
|
|||
|
<span id="cb87-8"><a href="#cb87-8" tabindex="-1"></a>dend15 <span class="sc">%>%</span> <span class="fu">rect.dendrogram</span>(<span class="at">k=</span><span class="dv">3</span>, <span class="at">horiz =</span> <span class="cn">TRUE</span>,</span>
|
|||
|
<span id="cb87-9"><a href="#cb87-9" tabindex="-1"></a> <span class="at">border =</span> <span class="dv">8</span>, <span class="at">lty =</span> <span class="dv">5</span>, <span class="at">lwd =</span> <span class="dv">2</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="adding-colored-bars" class="section level4">
|
|||
|
<h4>Adding colored bars</h4>
|
|||
|
<p>Adding colored bars to a dendrogram may be useful to show clusters or
|
|||
|
some outside categorization of the items. For example:</p>
|
|||
|
<div class="sourceCode" id="cb88"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb88-1"><a href="#cb88-1" tabindex="-1"></a>is_odd <span class="ot"><-</span> <span class="fu">ifelse</span>(<span class="fu">labels</span>(dend15) <span class="sc">%%</span> <span class="dv">2</span>, <span class="dv">2</span>,<span class="dv">3</span>)</span>
|
|||
|
<span id="cb88-2"><a href="#cb88-2" tabindex="-1"></a>is_345 <span class="ot"><-</span> <span class="fu">ifelse</span>(<span class="fu">labels</span>(dend15) <span class="sc">></span> <span class="dv">2</span>, <span class="dv">3</span>,<span class="dv">4</span>)</span>
|
|||
|
<span id="cb88-3"><a href="#cb88-3" tabindex="-1"></a>is_12 <span class="ot"><-</span> <span class="fu">ifelse</span>(<span class="fu">labels</span>(dend15) <span class="sc"><=</span> <span class="dv">2</span>, <span class="dv">3</span>,<span class="dv">4</span>)</span>
|
|||
|
<span id="cb88-4"><a href="#cb88-4" tabindex="-1"></a>k_3 <span class="ot"><-</span> <span class="fu">cutree</span>(dend15,<span class="at">k =</span> <span class="dv">3</span>, <span class="at">order_clusters_as_data =</span> <span class="cn">FALSE</span>) </span>
|
|||
|
<span id="cb88-5"><a href="#cb88-5" tabindex="-1"></a><span class="co"># The FALSE above makes sure we get the clusters in the order of the</span></span>
|
|||
|
<span id="cb88-6"><a href="#cb88-6" tabindex="-1"></a><span class="co"># dendrogram, and not in that of the original data. It is like:</span></span>
|
|||
|
<span id="cb88-7"><a href="#cb88-7" tabindex="-1"></a><span class="co"># cutree(dend15, k = 3)[order.dendrogram(dend15)]</span></span>
|
|||
|
<span id="cb88-8"><a href="#cb88-8" tabindex="-1"></a>the_bars <span class="ot"><-</span> <span class="fu">cbind</span>(is_odd, is_345, is_12, k_3)</span>
|
|||
|
<span id="cb88-9"><a href="#cb88-9" tabindex="-1"></a>the_bars[the_bars<span class="sc">==</span><span class="dv">2</span>] <span class="ot"><-</span> <span class="dv">8</span></span>
|
|||
|
<span id="cb88-10"><a href="#cb88-10" tabindex="-1"></a></span>
|
|||
|
<span id="cb88-11"><a href="#cb88-11" tabindex="-1"></a>dend15 <span class="sc">%>%</span> plot</span>
|
|||
|
<span id="cb88-12"><a href="#cb88-12" tabindex="-1"></a><span class="fu">colored_bars</span>(<span class="at">colors =</span> the_bars, <span class="at">dend =</span> dend15, <span class="at">sort_by_labels_order =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb89"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb89-1"><a href="#cb89-1" tabindex="-1"></a><span class="co"># we use sort_by_labels_order = FALSE since "the_bars" were set based on the</span></span>
|
|||
|
<span id="cb89-2"><a href="#cb89-2" tabindex="-1"></a><span class="co"># labels order. The more common use case is when the bars are based on a second variable</span></span>
|
|||
|
<span id="cb89-3"><a href="#cb89-3" tabindex="-1"></a><span class="co"># from the same data.frame as dend was created from. Thus, the default </span></span>
|
|||
|
<span id="cb89-4"><a href="#cb89-4" tabindex="-1"></a><span class="co"># sort_by_labels_order = TRUE would make more sense.</span></span></code></pre></div>
|
|||
|
<p>Another example, based on mtcars (in which the default of
|
|||
|
<code>sort_by_labels_order = TRUE</code> makes sense):</p>
|
|||
|
<div class="sourceCode" id="cb90"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb90-1"><a href="#cb90-1" tabindex="-1"></a>dend_mtcars <span class="ot"><-</span> mtcars[, <span class="fu">c</span>(<span class="st">"mpg"</span>, <span class="st">"disp"</span>)] <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"average"</span>) <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb90-2"><a href="#cb90-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb90-3"><a href="#cb90-3" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">c</span>(<span class="dv">10</span>,<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">1</span>))</span>
|
|||
|
<span id="cb90-4"><a href="#cb90-4" tabindex="-1"></a><span class="fu">plot</span>(dend_mtcars)</span>
|
|||
|
<span id="cb90-5"><a href="#cb90-5" tabindex="-1"></a>the_bars <span class="ot"><-</span> <span class="fu">ifelse</span>(mtcars<span class="sc">$</span>am, <span class="st">"grey"</span>, <span class="st">"gold"</span>)</span>
|
|||
|
<span id="cb90-6"><a href="#cb90-6" tabindex="-1"></a><span class="fu">colored_bars</span>(<span class="at">colors =</span> the_bars, <span class="at">dend =</span> dend_mtcars, <span class="at">rowLabels =</span> <span class="st">"am"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="ggplot2-integration" class="section level2">
|
|||
|
<h2>ggplot2 integration</h2>
|
|||
|
<p>The core process is to transform a dendrogram into a
|
|||
|
<code>ggdend</code> object using <code>as.ggdend</code>, and then plot
|
|||
|
it using <code>ggplot</code> (a new S3 <code>ggplot.ggdend</code>
|
|||
|
function is available). These two steps can be done in one command with
|
|||
|
either the function <code>ggplot</code> or <code>ggdend</code>.</p>
|
|||
|
<p>The reason we want to have <code>as.ggdend</code> (and not only
|
|||
|
<code>ggplot.dendrogram</code>), is (1) so that you could create your
|
|||
|
own mapping of <code>ggdend</code> and, (2) since <code>as.ggdend</code>
|
|||
|
might be slow for large trees, it is probably better to be able to run
|
|||
|
it only once for such cases.</p>
|
|||
|
<p>A <code>ggdend</code> class object is a list with 3 components:
|
|||
|
segments, labels, nodes. Each one contains the graphical parameters from
|
|||
|
the original dendrogram, but in a tabular form that can be used by
|
|||
|
<code>ggplot2+geom_segment+geom_text</code> to create a dendrogram
|
|||
|
plot.</p>
|
|||
|
<p>The function <code>prepare.ggdend</code> is used by
|
|||
|
<code>plot.ggdend</code> to take the ggdend object and prepare it for
|
|||
|
plotting. This is because the defaults of various parameters in
|
|||
|
dendrogram’s are not always stored in the object itself, but are
|
|||
|
built-in into the <code>plot.dendrogram</code> function. For example,
|
|||
|
the color of the labels is not (by default) specified in the dendrogram
|
|||
|
(only if we change it from black to something else). Hence, when taking
|
|||
|
the object into a different plotting engine (say ggplot2), we want to
|
|||
|
prepare the object by filling-in various defaults. This function is
|
|||
|
automatically invoked within the <code>plot.ggdend</code> function. You
|
|||
|
would probably use it only if you’d wish to build your own ggplot2
|
|||
|
mapping.</p>
|
|||
|
<div class="sourceCode" id="cb91"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb91-1"><a href="#cb91-1" tabindex="-1"></a><span class="co"># Create a complex dend:</span></span>
|
|||
|
<span id="cb91-2"><a href="#cb91-2" tabindex="-1"></a>dend <span class="ot"><-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">30</span>,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb91-3"><a href="#cb91-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k=</span><span class="dv">3</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="fu">c</span>(<span class="fl">1.5</span>,<span class="dv">1</span>,<span class="fl">1.5</span>)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb91-4"><a href="#cb91-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_lty"</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">3</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb91-5"><a href="#cb91-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels_colors"</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_cex"</span>, <span class="fu">c</span>(.<span class="dv">9</span>,<span class="fl">1.2</span>)) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb91-6"><a href="#cb91-6" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"nodes_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_col"</span>, <span class="fu">c</span>(<span class="st">"orange"</span>, <span class="st">"black"</span>, <span class="st">"plum"</span>, <span class="cn">NA</span>))</span>
|
|||
|
<span id="cb91-7"><a href="#cb91-7" tabindex="-1"></a><span class="co"># plot the dend in usual "base" plotting engine:</span></span>
|
|||
|
<span id="cb91-8"><a href="#cb91-8" tabindex="-1"></a><span class="fu">plot</span>(dend)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb92"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb92-1"><a href="#cb92-1" tabindex="-1"></a><span class="co"># Now let's do it in ggplot2 :)</span></span>
|
|||
|
<span id="cb92-2"><a href="#cb92-2" tabindex="-1"></a>ggd1 <span class="ot"><-</span> <span class="fu">as.ggdend</span>(dend)</span>
|
|||
|
<span id="cb92-3"><a href="#cb92-3" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
|
|||
|
<span id="cb92-4"><a href="#cb92-4" tabindex="-1"></a><span class="co"># the nodes are not implemented yet.</span></span>
|
|||
|
<span id="cb92-5"><a href="#cb92-5" tabindex="-1"></a><span class="fu">ggplot</span>(ggd1) <span class="co"># reproducing the above plot in ggplot2 :)</span></span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb93"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb93-1"><a href="#cb93-1" tabindex="-1"></a><span class="fu">ggplot</span>(ggd1, <span class="at">horiz =</span> <span class="cn">TRUE</span>, <span class="at">theme =</span> <span class="cn">NULL</span>) <span class="co"># horiz plot (and let's remove theme) in ggplot2</span></span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb94"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb94-1"><a href="#cb94-1" tabindex="-1"></a><span class="co"># Adding some extra spice to it...</span></span>
|
|||
|
<span id="cb94-2"><a href="#cb94-2" tabindex="-1"></a><span class="co"># creating a radial plot:</span></span>
|
|||
|
<span id="cb94-3"><a href="#cb94-3" tabindex="-1"></a><span class="co"># ggplot(ggd1) + scale_y_reverse(expand = c(0.2, 0)) + coord_polar(theta="x")</span></span>
|
|||
|
<span id="cb94-4"><a href="#cb94-4" tabindex="-1"></a><span class="co"># The text doesn't look so great, so let's remove it:</span></span>
|
|||
|
<span id="cb94-5"><a href="#cb94-5" tabindex="-1"></a><span class="fu">ggplot</span>(ggd1, <span class="at">labels =</span> <span class="cn">FALSE</span>) <span class="sc">+</span> <span class="fu">scale_y_reverse</span>(<span class="at">expand =</span> <span class="fu">c</span>(<span class="fl">0.2</span>, <span class="dv">0</span>)) <span class="sc">+</span> <span class="fu">coord_polar</span>(<span class="at">theta=</span><span class="st">"x"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p><strong>Credit:</strong> These functions are <em>extended</em>
|
|||
|
versions of the functions <code>ggdendrogram</code>,
|
|||
|
<code>dendro_data</code> (and the hidden <code>dendrogram_data</code>)
|
|||
|
from Andrie de Vries’s <a href="https://cran.r-project.org/package=ggdendro">ggdendro</a> package.
|
|||
|
The motivation for this fork is the need to add more graphical
|
|||
|
parameters to the plotted tree. This required a strong mixture of
|
|||
|
functions from ggdendro and dendextend (to the point that it seemed
|
|||
|
better to just fork the code into its current form).</p>
|
|||
|
</div>
|
|||
|
<div id="enhancing-other-packages" class="section level2">
|
|||
|
<h2>Enhancing other packages</h2>
|
|||
|
<p>The dendextend package aims to extend and enhance features from the R
|
|||
|
ecosystem. Let us take a look at several examples.</p>
|
|||
|
<div id="dendser" class="section level3">
|
|||
|
<h3>DendSer</h3>
|
|||
|
<p>The DendSer package helps in re-arranging a dendrogram to optimize
|
|||
|
visualization-based cost functions. Until now it was only used for
|
|||
|
<code>hclust</code> objects, but it can easily be connected to
|
|||
|
<code>dendrogram</code> objects by trying to turn the dendrogram into
|
|||
|
hclust, on which it runs DendSer. This can be used to rotate the
|
|||
|
dendrogram easily by using the <code>rotate_DendSer</code> function:</p>
|
|||
|
<div class="sourceCode" id="cb95"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb95-1"><a href="#cb95-1" tabindex="-1"></a><span class="cf">if</span>(<span class="fu">require</span>(DendSer)) {</span>
|
|||
|
<span id="cb95-2"><a href="#cb95-2" tabindex="-1"></a> <span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb95-3"><a href="#cb95-3" tabindex="-1"></a> <span class="fu">DendSer.dendrogram</span>(dend15)</span>
|
|||
|
<span id="cb95-4"><a href="#cb95-4" tabindex="-1"></a> </span>
|
|||
|
<span id="cb95-5"><a href="#cb95-5" tabindex="-1"></a> dend15 <span class="sc">%>%</span> color_branches <span class="sc">%>%</span> plot</span>
|
|||
|
<span id="cb95-6"><a href="#cb95-6" tabindex="-1"></a> dend15 <span class="sc">%>%</span> color_branches <span class="sc">%>%</span> rotate_DendSer <span class="sc">%>%</span> plot</span>
|
|||
|
<span id="cb95-7"><a href="#cb95-7" tabindex="-1"></a>}</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="gplots" class="section level3">
|
|||
|
<h3>gplots</h3>
|
|||
|
<p>The gplots package brings us the <code>heatmap.2</code> function. In
|
|||
|
it, we can use our modified dendrograms to get more informative
|
|||
|
heat-maps:</p>
|
|||
|
<div class="sourceCode" id="cb96"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb96-1"><a href="#cb96-1" tabindex="-1"></a><span class="fu">library</span>(gplots)</span>
|
|||
|
<span id="cb96-2"><a href="#cb96-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb96-3"><a href="#cb96-3" tabindex="-1"></a>x <span class="ot"><-</span> <span class="fu">as.matrix</span>(datasets<span class="sc">::</span>mtcars)</span>
|
|||
|
<span id="cb96-4"><a href="#cb96-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb96-5"><a href="#cb96-5" tabindex="-1"></a><span class="fu">heatmap.2</span>(x)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb97"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb97-1"><a href="#cb97-1" tabindex="-1"></a><span class="co"># now let's spice up the dendrograms a bit:</span></span>
|
|||
|
<span id="cb97-2"><a href="#cb97-2" tabindex="-1"></a>Rowv <span class="ot"><-</span> x <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb97-3"><a href="#cb97-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="dv">4</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb97-4"><a href="#cb97-4" tabindex="-1"></a> ladderize</span>
|
|||
|
<span id="cb97-5"><a href="#cb97-5" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(x))</span></span>
|
|||
|
<span id="cb97-6"><a href="#cb97-6" tabindex="-1"></a>Colv <span class="ot"><-</span> x <span class="sc">%>%</span> t <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb97-7"><a href="#cb97-7" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k =</span> <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="dv">4</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb97-8"><a href="#cb97-8" tabindex="-1"></a> ladderize</span>
|
|||
|
<span id="cb97-9"><a href="#cb97-9" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(t(x)))</span></span>
|
|||
|
<span id="cb97-10"><a href="#cb97-10" tabindex="-1"></a></span>
|
|||
|
<span id="cb97-11"><a href="#cb97-11" tabindex="-1"></a><span class="fu">heatmap.2</span>(x, <span class="at">Rowv =</span> Rowv, <span class="at">Colv =</span> Colv)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="nmf" class="section level3">
|
|||
|
<h3>NMF</h3>
|
|||
|
<p>The same as gplots, NMF offers a heatmap function called
|
|||
|
<code>aheatmap</code>. We can update it just as we would
|
|||
|
<code>heatmap.2</code>.</p>
|
|||
|
<p>Since NMF was removed from CRAN (it could still be installed from
|
|||
|
source), the example code is still available but not ran in this
|
|||
|
vignette.</p>
|
|||
|
<div class="sourceCode" id="cb98"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb98-1"><a href="#cb98-1" tabindex="-1"></a><span class="co"># library(NMF)</span></span>
|
|||
|
<span id="cb98-2"><a href="#cb98-2" tabindex="-1"></a><span class="co"># </span></span>
|
|||
|
<span id="cb98-3"><a href="#cb98-3" tabindex="-1"></a><span class="co"># x <- as.matrix(datasets::mtcars)</span></span>
|
|||
|
<span id="cb98-4"><a href="#cb98-4" tabindex="-1"></a><span class="co"># </span></span>
|
|||
|
<span id="cb98-5"><a href="#cb98-5" tabindex="-1"></a><span class="co"># # now let's spice up the dendrograms a bit:</span></span>
|
|||
|
<span id="cb98-6"><a href="#cb98-6" tabindex="-1"></a><span class="co"># Rowv <- x %>% dist %>% hclust %>% as.dendrogram %>%</span></span>
|
|||
|
<span id="cb98-7"><a href="#cb98-7" tabindex="-1"></a><span class="co"># set("branches_k_color", k = 3) %>% set("branches_lwd", 4) %>%</span></span>
|
|||
|
<span id="cb98-8"><a href="#cb98-8" tabindex="-1"></a><span class="co"># ladderize</span></span>
|
|||
|
<span id="cb98-9"><a href="#cb98-9" tabindex="-1"></a><span class="co"># # rotate_DendSer(ser_weight = dist(x))</span></span>
|
|||
|
<span id="cb98-10"><a href="#cb98-10" tabindex="-1"></a><span class="co"># Colv <- x %>% t %>% dist %>% hclust %>% as.dendrogram %>%</span></span>
|
|||
|
<span id="cb98-11"><a href="#cb98-11" tabindex="-1"></a><span class="co"># set("branches_k_color", k = 2) %>% set("branches_lwd", 4) %>%</span></span>
|
|||
|
<span id="cb98-12"><a href="#cb98-12" tabindex="-1"></a><span class="co"># ladderize</span></span>
|
|||
|
<span id="cb98-13"><a href="#cb98-13" tabindex="-1"></a><span class="co"># # rotate_DendSer(ser_weight = dist(t(x)))</span></span>
|
|||
|
<span id="cb98-14"><a href="#cb98-14" tabindex="-1"></a><span class="co"># </span></span>
|
|||
|
<span id="cb98-15"><a href="#cb98-15" tabindex="-1"></a><span class="co"># aheatmap(x, Rowv = Rowv, Colv = Colv)</span></span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="heatmaply" class="section level3">
|
|||
|
<h3>heatmaply</h3>
|
|||
|
<p>The heatmaply package create interactive heat-maps that are usable
|
|||
|
from the R console, in the ‘RStudio’ viewer pane, in ‘R Markdown’
|
|||
|
documents, and in ‘Shiny’ apps. By hovering the mouse pointer over a
|
|||
|
cell or a dendrogram to show details, drag a rectangle to zoom.</p>
|
|||
|
<p>The use is very similar to what we’ve seen before, we just use
|
|||
|
<code>heatmaply</code> instead of <code>heatmap.2</code>:</p>
|
|||
|
<div class="sourceCode" id="cb99"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb99-1"><a href="#cb99-1" tabindex="-1"></a>x <span class="ot"><-</span> <span class="fu">as.matrix</span>(datasets<span class="sc">::</span>mtcars)</span>
|
|||
|
<span id="cb99-2"><a href="#cb99-2" tabindex="-1"></a><span class="co"># heatmaply(x)</span></span>
|
|||
|
<span id="cb99-3"><a href="#cb99-3" tabindex="-1"></a><span class="co"># now let's spice up the dendrograms a bit:</span></span>
|
|||
|
<span id="cb99-4"><a href="#cb99-4" tabindex="-1"></a>Rowv <span class="ot"><-</span> x <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb99-5"><a href="#cb99-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="dv">4</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb99-6"><a href="#cb99-6" tabindex="-1"></a> ladderize</span>
|
|||
|
<span id="cb99-7"><a href="#cb99-7" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(x))</span></span>
|
|||
|
<span id="cb99-8"><a href="#cb99-8" tabindex="-1"></a>Colv <span class="ot"><-</span> x <span class="sc">%>%</span> t <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb99-9"><a href="#cb99-9" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k =</span> <span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="dv">4</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb99-10"><a href="#cb99-10" tabindex="-1"></a> ladderize</span>
|
|||
|
<span id="cb99-11"><a href="#cb99-11" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(t(x)))</span></span></code></pre></div>
|
|||
|
<p>Here we need to use <code>cache=FALSe</code> in the markdown:</p>
|
|||
|
<div class="sourceCode" id="cb100"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb100-1"><a href="#cb100-1" tabindex="-1"></a><span class="fu">library</span>(heatmaply)</span>
|
|||
|
<span id="cb100-2"><a href="#cb100-2" tabindex="-1"></a><span class="fu">heatmaply</span>(x, <span class="at">Rowv =</span> Rowv, <span class="at">Colv =</span> Colv)</span></code></pre></div>
|
|||
|
<p>I avoided running the code from above due to space issues on CRAN.
|
|||
|
For live examples, please go to:</p>
|
|||
|
<ul>
|
|||
|
<li><a href="https://talgalili.github.io/heatmaply/articles/heatmaply.html" class="uri">https://talgalili.github.io/heatmaply/articles/heatmaply.html</a></li>
|
|||
|
</ul>
|
|||
|
</div>
|
|||
|
<div id="dynamictreecut" class="section level3">
|
|||
|
<h3>dynamicTreeCut</h3>
|
|||
|
<p>The <code>cutreeDynamic</code> function offers a wrapper for two
|
|||
|
methods of adaptive branch pruning of hierarchical clustering
|
|||
|
dendrograms. The results of which can now be visualized by both updating
|
|||
|
the branches, as well as using the <code>colored_bars</code> function
|
|||
|
(which was adjusted for use with plots of dendrograms):</p>
|
|||
|
<div class="sourceCode" id="cb101"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb101-1"><a href="#cb101-1" tabindex="-1"></a><span class="co"># let's get the clusters</span></span>
|
|||
|
<span id="cb101-2"><a href="#cb101-2" tabindex="-1"></a><span class="fu">library</span>(dynamicTreeCut)</span>
|
|||
|
<span id="cb101-3"><a href="#cb101-3" tabindex="-1"></a><span class="fu">data</span>(iris)</span>
|
|||
|
<span id="cb101-4"><a href="#cb101-4" tabindex="-1"></a>x <span class="ot"><-</span> iris[,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> as.matrix</span>
|
|||
|
<span id="cb101-5"><a href="#cb101-5" tabindex="-1"></a>hc <span class="ot"><-</span> x <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust</span>
|
|||
|
<span id="cb101-6"><a href="#cb101-6" tabindex="-1"></a>dend <span class="ot"><-</span> hc <span class="sc">%>%</span> as.dendrogram </span>
|
|||
|
<span id="cb101-7"><a href="#cb101-7" tabindex="-1"></a></span>
|
|||
|
<span id="cb101-8"><a href="#cb101-8" tabindex="-1"></a><span class="co"># Find special clusters:</span></span>
|
|||
|
<span id="cb101-9"><a href="#cb101-9" tabindex="-1"></a>clusters <span class="ot"><-</span> <span class="fu">cutreeDynamic</span>(hc, <span class="at">distM =</span> <span class="fu">as.matrix</span>(<span class="fu">dist</span>(x)), <span class="at">method =</span> <span class="st">"tree"</span>)</span>
|
|||
|
<span id="cb101-10"><a href="#cb101-10" tabindex="-1"></a><span class="co"># we need to sort them to the order of the dendrogram:</span></span>
|
|||
|
<span id="cb101-11"><a href="#cb101-11" tabindex="-1"></a>clusters <span class="ot"><-</span> clusters[<span class="fu">order.dendrogram</span>(dend)]</span>
|
|||
|
<span id="cb101-12"><a href="#cb101-12" tabindex="-1"></a>clusters_numbers <span class="ot"><-</span> <span class="fu">unique</span>(clusters) <span class="sc">-</span> (<span class="dv">0</span> <span class="sc">%in%</span> clusters)</span>
|
|||
|
<span id="cb101-13"><a href="#cb101-13" tabindex="-1"></a>n_clusters <span class="ot"><-</span> <span class="fu">length</span>(clusters_numbers)</span>
|
|||
|
<span id="cb101-14"><a href="#cb101-14" tabindex="-1"></a></span>
|
|||
|
<span id="cb101-15"><a href="#cb101-15" tabindex="-1"></a><span class="fu">library</span>(colorspace)</span>
|
|||
|
<span id="cb101-16"><a href="#cb101-16" tabindex="-1"></a>cols <span class="ot"><-</span> <span class="fu">rainbow_hcl</span>(n_clusters)</span>
|
|||
|
<span id="cb101-17"><a href="#cb101-17" tabindex="-1"></a>true_species_cols <span class="ot"><-</span> <span class="fu">rainbow_hcl</span>(<span class="dv">3</span>)[<span class="fu">as.numeric</span>(iris[,][<span class="fu">order.dendrogram</span>(dend),<span class="dv">5</span>])]</span>
|
|||
|
<span id="cb101-18"><a href="#cb101-18" tabindex="-1"></a>dend2 <span class="ot"><-</span> dend <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb101-19"><a href="#cb101-19" tabindex="-1"></a> <span class="fu">branches_attr_by_clusters</span>(clusters, <span class="at">values =</span> cols) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb101-20"><a href="#cb101-20" tabindex="-1"></a> <span class="fu">color_labels</span>(<span class="at">col =</span> true_species_cols)</span>
|
|||
|
<span id="cb101-21"><a href="#cb101-21" tabindex="-1"></a><span class="fu">plot</span>(dend2)</span>
|
|||
|
<span id="cb101-22"><a href="#cb101-22" tabindex="-1"></a>clusters <span class="ot"><-</span> <span class="fu">factor</span>(clusters)</span>
|
|||
|
<span id="cb101-23"><a href="#cb101-23" tabindex="-1"></a><span class="fu">levels</span>(clusters)[<span class="sc">-</span><span class="dv">1</span>] <span class="ot"><-</span> cols[<span class="sc">-</span><span class="dv">5</span>][<span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>,<span class="dv">2</span>,<span class="dv">3</span>)] </span>
|
|||
|
<span id="cb101-24"><a href="#cb101-24" tabindex="-1"></a> <span class="co"># Get the clusters to have proper colors.</span></span>
|
|||
|
<span id="cb101-25"><a href="#cb101-25" tabindex="-1"></a> <span class="co"># fix the order of the colors to match the branches.</span></span>
|
|||
|
<span id="cb101-26"><a href="#cb101-26" tabindex="-1"></a><span class="fu">colored_bars</span>(clusters, dend, <span class="at">sort_by_labels_order =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb102"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb102-1"><a href="#cb102-1" tabindex="-1"></a><span class="co"># here we used sort_by_labels_order = FALSE since the clusters were already sorted based on the dendrogram's order</span></span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="pvclust" class="section level3">
|
|||
|
<h3>pvclust</h3>
|
|||
|
<p>The pvclust library calculates “p-values”” for hierarchical
|
|||
|
clustering via multiscale bootstrap re-sampling. Hierarchical clustering
|
|||
|
is done for given data and p-values are computed for each of the
|
|||
|
clusters. The dendextend package let’s us reproduce the plot from
|
|||
|
pvclust, but with a dendrogram (instead of an hclust object), which also
|
|||
|
lets us extend the visualization.</p>
|
|||
|
<div class="sourceCode" id="cb103"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb103-1"><a href="#cb103-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb103-2"><a href="#cb103-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb103-3"><a href="#cb103-3" tabindex="-1"></a><span class="fu">library</span>(pvclust)</span>
|
|||
|
<span id="cb103-4"><a href="#cb103-4" tabindex="-1"></a><span class="fu">data</span>(lung) <span class="co"># 916 genes for 73 subjects</span></span>
|
|||
|
<span id="cb103-5"><a href="#cb103-5" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">13134</span>)</span>
|
|||
|
<span id="cb103-6"><a href="#cb103-6" tabindex="-1"></a>result <span class="ot"><-</span> <span class="fu">pvclust</span>(lung[<span class="dv">1</span><span class="sc">:</span><span class="dv">100</span>, <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span>], </span>
|
|||
|
<span id="cb103-7"><a href="#cb103-7" tabindex="-1"></a> <span class="at">method.dist=</span><span class="st">"cor"</span>, <span class="at">method.hclust=</span><span class="st">"average"</span>, <span class="at">nboot=</span><span class="dv">10</span>)</span>
|
|||
|
<span id="cb103-8"><a href="#cb103-8" tabindex="-1"></a></span>
|
|||
|
<span id="cb103-9"><a href="#cb103-9" tabindex="-1"></a><span class="co"># with pvrect</span></span>
|
|||
|
<span id="cb103-10"><a href="#cb103-10" tabindex="-1"></a><span class="fu">plot</span>(result)</span>
|
|||
|
<span id="cb103-11"><a href="#cb103-11" tabindex="-1"></a><span class="fu">pvrect</span>(result)</span>
|
|||
|
<span id="cb103-12"><a href="#cb103-12" tabindex="-1"></a></span>
|
|||
|
<span id="cb103-13"><a href="#cb103-13" tabindex="-1"></a><span class="co"># with a dendrogram of pvrect</span></span>
|
|||
|
<span id="cb103-14"><a href="#cb103-14" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(result)</span>
|
|||
|
<span id="cb103-15"><a href="#cb103-15" tabindex="-1"></a>result <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb103-16"><a href="#cb103-16" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> reproduced plot with dendrogram"</span>)</span>
|
|||
|
<span id="cb103-17"><a href="#cb103-17" tabindex="-1"></a>result <span class="sc">%>%</span> text</span>
|
|||
|
<span id="cb103-18"><a href="#cb103-18" tabindex="-1"></a>result <span class="sc">%>%</span> pvrect</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Let’s color and thicken the branches based on the p-values:</p>
|
|||
|
<div class="sourceCode" id="cb104"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb104-1"><a href="#cb104-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb104-2"><a href="#cb104-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb104-3"><a href="#cb104-3" tabindex="-1"></a><span class="co"># with a modified dendrogram of pvrect</span></span>
|
|||
|
<span id="cb104-4"><a href="#cb104-4" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">pvclust_show_signif</span>(result) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb104-5"><a href="#cb104-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Cluster dendrogram </span><span class="sc">\n</span><span class="st"> bp values are highlighted by signif"</span>)</span>
|
|||
|
<span id="cb104-6"><a href="#cb104-6" tabindex="-1"></a></span>
|
|||
|
<span id="cb104-7"><a href="#cb104-7" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">pvclust_show_signif</span>(result, <span class="at">show_type =</span> <span class="st">"lwd"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb104-8"><a href="#cb104-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> bp values are highlighted by signif"</span>)</span>
|
|||
|
<span id="cb104-9"><a href="#cb104-9" tabindex="-1"></a>result <span class="sc">%>%</span> text</span>
|
|||
|
<span id="cb104-10"><a href="#cb104-10" tabindex="-1"></a>result <span class="sc">%>%</span> <span class="fu">pvrect</span>(<span class="at">alpha=</span><span class="fl">0.95</span>)</span>
|
|||
|
<span id="cb104-11"><a href="#cb104-11" tabindex="-1"></a></span>
|
|||
|
<span id="cb104-12"><a href="#cb104-12" tabindex="-1"></a></span>
|
|||
|
<span id="cb104-13"><a href="#cb104-13" tabindex="-1"></a>dend <span class="sc">%>%</span> <span class="fu">pvclust_show_signif_gradient</span>(result) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb104-14"><a href="#cb104-14" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> bp values are colored by signif"</span>)</span>
|
|||
|
<span id="cb104-15"><a href="#cb104-15" tabindex="-1"></a></span>
|
|||
|
<span id="cb104-16"><a href="#cb104-16" tabindex="-1"></a>dend <span class="sc">%>%</span></span>
|
|||
|
<span id="cb104-17"><a href="#cb104-17" tabindex="-1"></a> <span class="fu">pvclust_show_signif_gradient</span>(result) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb104-18"><a href="#cb104-18" tabindex="-1"></a> <span class="fu">pvclust_show_signif</span>(result) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb104-19"><a href="#cb104-19" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">"Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> bp values are colored+highlighted by signif"</span>)</span>
|
|||
|
<span id="cb104-20"><a href="#cb104-20" tabindex="-1"></a>result <span class="sc">%>%</span> text</span>
|
|||
|
<span id="cb104-21"><a href="#cb104-21" tabindex="-1"></a>result <span class="sc">%>%</span> <span class="fu">pvrect</span>(<span class="at">alpha=</span><span class="fl">0.95</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="circlize" class="section level3">
|
|||
|
<h3>circlize</h3>
|
|||
|
<p>Circular layout is an efficient way for the visualization of huge
|
|||
|
amounts of information. The circlize package provides an implementation
|
|||
|
of circular layout generation in R, including a solution for dendrogram
|
|||
|
objects produced using dendextend:</p>
|
|||
|
<div class="sourceCode" id="cb105"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb105-1"><a href="#cb105-1" tabindex="-1"></a><span class="fu">library</span>(circlize)</span>
|
|||
|
<span id="cb105-2"><a href="#cb105-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb105-3"><a href="#cb105-3" tabindex="-1"></a>dend <span class="ot"><-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">40</span>,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb105-4"><a href="#cb105-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k=</span><span class="dv">3</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_lwd"</span>, <span class="fu">c</span>(<span class="dv">5</span>,<span class="dv">2</span>,<span class="fl">1.5</span>)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb105-5"><a href="#cb105-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_lty"</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">3</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb105-6"><a href="#cb105-6" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"labels_colors"</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_cex"</span>, <span class="fu">c</span>(.<span class="dv">6</span>,<span class="fl">1.5</span>)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb105-7"><a href="#cb105-7" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"nodes_pch"</span>, <span class="dv">19</span>) <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"nodes_col"</span>, <span class="fu">c</span>(<span class="st">"orange"</span>, <span class="st">"black"</span>, <span class="st">"plum"</span>, <span class="cn">NA</span>))</span>
|
|||
|
<span id="cb105-8"><a href="#cb105-8" tabindex="-1"></a></span>
|
|||
|
<span id="cb105-9"><a href="#cb105-9" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">rep</span>(<span class="dv">0</span>,<span class="dv">4</span>))</span>
|
|||
|
<span id="cb105-10"><a href="#cb105-10" tabindex="-1"></a><span class="fu">circlize_dendrogram</span>(dend)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb106"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb106-1"><a href="#cb106-1" tabindex="-1"></a><span class="co"># circlize_dendrogram(dend, labels = FALSE)</span></span>
|
|||
|
<span id="cb106-2"><a href="#cb106-2" tabindex="-1"></a><span class="co"># circlize_dendrogram(dend, facing = "inside", labels = FALSE)</span></span></code></pre></div>
|
|||
|
<p>The above is a wrapper for functions in circlize. An advantage for
|
|||
|
using the circlize package directly is for plotting a circular
|
|||
|
dendrogram so that you can add more graphics for the elements in the
|
|||
|
tree just by adding more tracks using . For example:</p>
|
|||
|
<div class="sourceCode" id="cb107"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb107-1"><a href="#cb107-1" tabindex="-1"></a><span class="co"># dend <- iris[1:40,-5] %>% dist %>% hclust %>% as.dendrogram %>%</span></span>
|
|||
|
<span id="cb107-2"><a href="#cb107-2" tabindex="-1"></a><span class="co"># set("branches_k_color", k=3) %>% set("branches_lwd", c(5,2,1.5)) %>%</span></span>
|
|||
|
<span id="cb107-3"><a href="#cb107-3" tabindex="-1"></a><span class="co"># set("branches_lty", c(1,1,3,1,1,2)) %>%</span></span>
|
|||
|
<span id="cb107-4"><a href="#cb107-4" tabindex="-1"></a><span class="co"># set("labels_colors") %>% set("labels_cex", c(.9,1.2)) %>%</span></span>
|
|||
|
<span id="cb107-5"><a href="#cb107-5" tabindex="-1"></a><span class="co"># set("nodes_pch", 19) %>% set("nodes_col", c("orange", "black", "plum", NA))</span></span>
|
|||
|
<span id="cb107-6"><a href="#cb107-6" tabindex="-1"></a></span>
|
|||
|
<span id="cb107-7"><a href="#cb107-7" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2015-07-10</span>) </span>
|
|||
|
<span id="cb107-8"><a href="#cb107-8" tabindex="-1"></a><span class="co"># In the following we get the dendrogram but can also get extra information on top of it</span></span>
|
|||
|
<span id="cb107-9"><a href="#cb107-9" tabindex="-1"></a><span class="fu">circos.initialize</span>(<span class="st">"foo"</span>, <span class="at">xlim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">40</span>))</span>
|
|||
|
<span id="cb107-10"><a href="#cb107-10" tabindex="-1"></a><span class="fu">circos.track</span>(<span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="at">panel.fun =</span> <span class="cf">function</span>(x, y) {</span>
|
|||
|
<span id="cb107-11"><a href="#cb107-11" tabindex="-1"></a> <span class="fu">circos.rect</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">40</span><span class="fl">-0.8</span>, <span class="fu">rep</span>(<span class="dv">0</span>, <span class="dv">40</span>), <span class="dv">1</span><span class="sc">:</span><span class="dv">40</span><span class="fl">-0.2</span>, <span class="fu">runif</span>(<span class="dv">40</span>), <span class="at">col =</span> <span class="fu">rand_color</span>(<span class="dv">40</span>), <span class="at">border =</span> <span class="cn">NA</span>)</span>
|
|||
|
<span id="cb107-12"><a href="#cb107-12" tabindex="-1"></a>}, <span class="at">bg.border =</span> <span class="cn">NA</span>)</span>
|
|||
|
<span id="cb107-13"><a href="#cb107-13" tabindex="-1"></a><span class="fu">circos.track</span>(<span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="at">panel.fun =</span> <span class="cf">function</span>(x, y) {</span>
|
|||
|
<span id="cb107-14"><a href="#cb107-14" tabindex="-1"></a> <span class="fu">circos.text</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">40</span><span class="fl">-0.5</span>, <span class="fu">rep</span>(<span class="dv">0</span>, <span class="dv">40</span>), <span class="fu">labels</span>(dend), <span class="at">col =</span> <span class="fu">labels_colors</span>(dend),</span>
|
|||
|
<span id="cb107-15"><a href="#cb107-15" tabindex="-1"></a> <span class="at">facing =</span> <span class="st">"clockwise"</span>, <span class="at">niceFacing =</span> <span class="cn">TRUE</span>, <span class="at">adj =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.5</span>))</span>
|
|||
|
<span id="cb107-16"><a href="#cb107-16" tabindex="-1"></a>}, <span class="at">bg.border =</span> <span class="cn">NA</span>, <span class="at">track.height =</span> <span class="fl">0.1</span>)</span>
|
|||
|
<span id="cb107-17"><a href="#cb107-17" tabindex="-1"></a>max_height <span class="ot">=</span> <span class="fu">attr</span>(dend, <span class="st">"height"</span>)</span>
|
|||
|
<span id="cb107-18"><a href="#cb107-18" tabindex="-1"></a><span class="fu">circos.track</span>(<span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, max_height), <span class="at">panel.fun =</span> <span class="cf">function</span>(x, y) {</span>
|
|||
|
<span id="cb107-19"><a href="#cb107-19" tabindex="-1"></a> <span class="fu">circos.dendrogram</span>(dend, <span class="at">max_height =</span> max_height)</span>
|
|||
|
<span id="cb107-20"><a href="#cb107-20" tabindex="-1"></a>}, <span class="at">track.height =</span> <span class="fl">0.5</span>, <span class="at">bg.border =</span> <span class="cn">NA</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb108"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb108-1"><a href="#cb108-1" tabindex="-1"></a><span class="fu">circos.clear</span>()</span></code></pre></div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="comparing-two-dendrograms" class="section level2">
|
|||
|
<h2>Comparing two dendrograms</h2>
|
|||
|
<div id="dendlist" class="section level3">
|
|||
|
<h3>dendlist</h3>
|
|||
|
<p>A <code>dendlist</code> is a function which produces the dendlist
|
|||
|
class. It accepts several dendrograms and/or dendlist objects and chain
|
|||
|
them all together. This function aim to help with the usability of
|
|||
|
comparing two or more dendrograms.</p>
|
|||
|
<div class="sourceCode" id="cb109"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb109-1"><a href="#cb109-1" tabindex="-1"></a>dend15 <span class="ot"><-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"average"</span>) <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb109-2"><a href="#cb109-2" tabindex="-1"></a>dend15 <span class="ot"><-</span> dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels_to_char"</span>)</span>
|
|||
|
<span id="cb109-3"><a href="#cb109-3" tabindex="-1"></a>dend51 <span class="ot"><-</span> dend15 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels"</span>, <span class="fu">as.character</span>(<span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)) <span class="sc">%>%</span> <span class="fu">match_order_by_labels</span>(dend15)</span>
|
|||
|
<span id="cb109-4"><a href="#cb109-4" tabindex="-1"></a>dends_15_51 <span class="ot"><-</span> <span class="fu">dendlist</span>(dend15, dend51)</span>
|
|||
|
<span id="cb109-5"><a href="#cb109-5" tabindex="-1"></a>dends_15_51</span></code></pre></div>
|
|||
|
<pre><code>#> [[1]]
|
|||
|
#> 'dendrogram' with 2 branches and 5 members total, at height 2.5
|
|||
|
#>
|
|||
|
#> [[2]]
|
|||
|
#> 'dendrogram' with 2 branches and 5 members total, at height 2.5
|
|||
|
#>
|
|||
|
#> attr(,"class")
|
|||
|
#> [1] "dendlist"</code></pre>
|
|||
|
<div class="sourceCode" id="cb111"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb111-1"><a href="#cb111-1" tabindex="-1"></a><span class="fu">head</span>(dends_15_51)</span></code></pre></div>
|
|||
|
<pre><code>#> ============
|
|||
|
#> dend 1
|
|||
|
#> ---------
|
|||
|
#> --[dendrogram w/ 2 branches and 5 members at h = 2.5]
|
|||
|
#> |--[dendrogram w/ 2 branches and 2 members at h = 1]
|
|||
|
#> | |--leaf "1"
|
|||
|
#> | `--leaf "2"
|
|||
|
#> `--[dendrogram w/ 2 branches and 3 members at h = 1.5]
|
|||
|
#> |--leaf "5"
|
|||
|
#> `--[dendrogram w/ 2 branches and 2 members at h = 1]
|
|||
|
#> |--leaf "3"
|
|||
|
#> `--leaf "4"
|
|||
|
#> etc...
|
|||
|
#> ============
|
|||
|
#> dend 2
|
|||
|
#> ---------
|
|||
|
#> --[dendrogram w/ 2 branches and 5 members at h = 2.5]
|
|||
|
#> |--[dendrogram w/ 2 branches and 2 members at h = 1]
|
|||
|
#> | |--leaf "5"
|
|||
|
#> | `--leaf "4"
|
|||
|
#> `--[dendrogram w/ 2 branches and 3 members at h = 1.5]
|
|||
|
#> |--leaf "3"
|
|||
|
#> `--[dendrogram w/ 2 branches and 2 members at h = 1]
|
|||
|
#> |--leaf "2"
|
|||
|
#> `--leaf "1"
|
|||
|
#> etc...</code></pre>
|
|||
|
<p>The function <code>match_order_by_labels</code> makes sure that the
|
|||
|
order in the leaves corresponds to the same labels in both trees.</p>
|
|||
|
</div>
|
|||
|
<div id="dend_diff" class="section level3">
|
|||
|
<h3>dend_diff</h3>
|
|||
|
<p>The <code>dend_diff</code> function plots two trees side by side,
|
|||
|
highlighting edges unique to each tree in red, it relies on the
|
|||
|
<code>distinct_edges</code> function.</p>
|
|||
|
<p>For example:</p>
|
|||
|
<div class="sourceCode" id="cb113"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb113-1"><a href="#cb113-1" tabindex="-1"></a><span class="co"># example 1</span></span>
|
|||
|
<span id="cb113-2"><a href="#cb113-2" tabindex="-1"></a>x <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb113-3"><a href="#cb113-3" tabindex="-1"></a>y <span class="ot"><-</span> <span class="fu">set</span>(x, <span class="st">"labels"</span>, <span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)</span>
|
|||
|
<span id="cb113-4"><a href="#cb113-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb113-5"><a href="#cb113-5" tabindex="-1"></a><span class="co"># example 2</span></span>
|
|||
|
<span id="cb113-6"><a href="#cb113-6" tabindex="-1"></a>dend1 <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span> <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb113-7"><a href="#cb113-7" tabindex="-1"></a>dend2 <span class="ot"><-</span> dend1 <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"labels"</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>,<span class="dv">2</span>,<span class="dv">4</span>, <span class="dv">5</span><span class="sc">:</span><span class="dv">10</span>) )</span>
|
|||
|
<span id="cb113-8"><a href="#cb113-8" tabindex="-1"></a><span class="fu">dend_diff</span>(dend1, dend2)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>See the <code>highlight_distinct_edges</code> function for more
|
|||
|
control over how to create the distinction (color, line width, line
|
|||
|
type).</p>
|
|||
|
</div>
|
|||
|
<div id="tanglegram" class="section level3">
|
|||
|
<h3>tanglegram</h3>
|
|||
|
<p>A tanglegram plot gives two dendrogram (with the same set of labels),
|
|||
|
one facing the other, and having their labels connected by lines.
|
|||
|
Tanglegram can be used for visually comparing two methods of
|
|||
|
Hierarchical clustering, and are sometimes used in biology when
|
|||
|
comparing two phylogenetic trees.</p>
|
|||
|
<p>Here is an example of creating a tanglegram using dendextend:</p>
|
|||
|
<div class="sourceCode" id="cb114"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb114-1"><a href="#cb114-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dends_15_51)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb115"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb115-1"><a href="#cb115-1" tabindex="-1"></a><span class="co"># Same as using:</span></span>
|
|||
|
<span id="cb115-2"><a href="#cb115-2" tabindex="-1"></a><span class="co"># plot(dends_15_51) # since there is a plot method for dendlist</span></span>
|
|||
|
<span id="cb115-3"><a href="#cb115-3" tabindex="-1"></a><span class="co"># and also: </span></span>
|
|||
|
<span id="cb115-4"><a href="#cb115-4" tabindex="-1"></a><span class="co"># tanglegram(dend15, dend51)</span></span></code></pre></div>
|
|||
|
<p>Notice how “unique” nodes are highlighted with dashed lines (i.e.:
|
|||
|
nodes which contains a combination of labels/items, which are not
|
|||
|
present in the other tree). This can be turned off using
|
|||
|
<code>highlight_distinct_edges = FALSE</code>. Also notice how the
|
|||
|
connecting lines are colored to highlight two sub-trees which are
|
|||
|
present in both dendrograms. This can be turned off by setting
|
|||
|
<code>common_subtrees_color_lines = FALSE</code>. We can also color the
|
|||
|
branches of the trees to show the two common sub-trees using
|
|||
|
<code>common_subtrees_color_branches = TRUE</code>:</p>
|
|||
|
<div class="sourceCode" id="cb116"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb116-1"><a href="#cb116-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dends_15_51, <span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>We may wish to improve the layout of the trees. For this we have the
|
|||
|
<code>entanglement</code>, to measure the quality of the alignment of
|
|||
|
the two trees in the tanglegram layout, and the <code>untangle</code>
|
|||
|
function, for improving it.</p>
|
|||
|
<div class="sourceCode" id="cb117"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb117-1"><a href="#cb117-1" tabindex="-1"></a>dends_15_51 <span class="sc">%>%</span> entanglement <span class="co"># lower is better</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.9167078</code></pre>
|
|||
|
<div class="sourceCode" id="cb119"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb119-1"><a href="#cb119-1" tabindex="-1"></a><span class="co"># dends_15_51 %>% untangle(method = "DendSer") %>% entanglement # lower is better</span></span>
|
|||
|
<span id="cb119-2"><a href="#cb119-2" tabindex="-1"></a>dends_15_51 <span class="sc">%>%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">"step1side"</span>) <span class="sc">%>%</span> entanglement <span class="co"># lower is better</span></span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0</code></pre>
|
|||
|
<p>Notice that just because we can get two trees to have horizontal
|
|||
|
connecting lines, it doesn’t mean these trees are identical (or even
|
|||
|
very similar topologically):</p>
|
|||
|
<div class="sourceCode" id="cb121"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb121-1"><a href="#cb121-1" tabindex="-1"></a>dends_15_51 <span class="sc">%>%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">"step1side"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb121-2"><a href="#cb121-2" tabindex="-1"></a> <span class="fu">tanglegram</span>(<span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Entanglement is measured by giving the left tree’s labels the values
|
|||
|
of 1 till tree size, and than match these numbers with the right tree.
|
|||
|
Now, entanglement is the L norm distance between these two vectors. That
|
|||
|
is, we take the sum of the absolute difference (each one in the power of
|
|||
|
L). e.g: <code>sum(abs(x-y)**L)</code>. And this is divided by the
|
|||
|
“worst case” entanglement level (e.g: when the right tree is the
|
|||
|
complete reverse of the left tree).</p>
|
|||
|
<p>L tells us which penalty level we are at (L0, L1, L2, partial L’s
|
|||
|
etc). L>1 means that we give a big penalty for sharp angles. While
|
|||
|
L->0 means that any time something is not a straight horizontal line,
|
|||
|
it gets a large penalty If L=0.1 it means that we much prefer straight
|
|||
|
lines over non straight lines</p>
|
|||
|
<p>Finding an optimal rotation for the tanglegram of two dendrogram is a
|
|||
|
hard problem. This problem is also harder for larger trees.</p>
|
|||
|
<p>Let’s see how well some untangle methods can do.</p>
|
|||
|
<p>Without doing anything:</p>
|
|||
|
<div class="sourceCode" id="cb122"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb122-1"><a href="#cb122-1" tabindex="-1"></a>x <span class="ot"><-</span> dends_15_51 </span>
|
|||
|
<span id="cb122-2"><a href="#cb122-2" tabindex="-1"></a>x <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">"entanglement ="</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Using DendSer:</p>
|
|||
|
<div class="sourceCode" id="cb123"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb123-1"><a href="#cb123-1" tabindex="-1"></a><span class="co"># x <- dends_15_51 %>% untangle(method = "DendSer") </span></span>
|
|||
|
<span id="cb123-2"><a href="#cb123-2" tabindex="-1"></a>x <span class="ot"><-</span> dends_15_51 <span class="sc">%>%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">"ladderize"</span>) </span>
|
|||
|
<span id="cb123-3"><a href="#cb123-3" tabindex="-1"></a>x <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">"entanglement ="</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>One solution for improving the tanglegram would be to randomly search
|
|||
|
the rotated tree space for a better solution. Here is how to use a
|
|||
|
random search:</p>
|
|||
|
<div class="sourceCode" id="cb124"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb124-1"><a href="#cb124-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">3958</span>)</span>
|
|||
|
<span id="cb124-2"><a href="#cb124-2" tabindex="-1"></a>x <span class="ot"><-</span> dends_15_51 <span class="sc">%>%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">"random"</span>, <span class="at">R =</span> <span class="dv">10</span>) </span>
|
|||
|
<span id="cb124-3"><a href="#cb124-3" tabindex="-1"></a>x <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">"entanglement ="</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>We can see we already got something better. An advantage of the
|
|||
|
random search is the ability to create many many trees and compare them
|
|||
|
to find the best pair.</p>
|
|||
|
<p>Let’s use a greedy forward step wise rotation of the two trees (first
|
|||
|
the left, then the right, and so on), to see if we can find a better
|
|||
|
solution for comparing the two trees. Notice that this may take some
|
|||
|
time to run (the larger the tree, the longer it would take), but we can
|
|||
|
limit the search for smaller k’s, and see what improvement that can
|
|||
|
bring us using step2side (slowest):</p>
|
|||
|
<div class="sourceCode" id="cb125"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb125-1"><a href="#cb125-1" tabindex="-1"></a>x <span class="ot"><-</span> dends_15_51 <span class="sc">%>%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">"step2side"</span>) </span>
|
|||
|
<span id="cb125-2"><a href="#cb125-2" tabindex="-1"></a>x <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">"entanglement ="</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>We got perfect entanglement (0).</p>
|
|||
|
</div>
|
|||
|
<div id="correlation-measures" class="section level3">
|
|||
|
<h3>Correlation measures</h3>
|
|||
|
<p>We shall use the following for the upcoming examples:</p>
|
|||
|
<div class="sourceCode" id="cb126"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb126-1"><a href="#cb126-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23235</span>)</span>
|
|||
|
<span id="cb126-2"><a href="#cb126-2" tabindex="-1"></a>ss <span class="ot"><-</span> <span class="fu">sample</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">150</span>, <span class="dv">10</span> )</span>
|
|||
|
<span id="cb126-3"><a href="#cb126-3" tabindex="-1"></a>dend1 <span class="ot"><-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="st">"com"</span>) <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb126-4"><a href="#cb126-4" tabindex="-1"></a>dend2 <span class="ot"><-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="st">"single"</span>) <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb126-5"><a href="#cb126-5" tabindex="-1"></a>dend3 <span class="ot"><-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="st">"ave"</span>) <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb126-6"><a href="#cb126-6" tabindex="-1"></a>dend4 <span class="ot"><-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="st">"centroid"</span>) <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb126-7"><a href="#cb126-7" tabindex="-1"></a></span>
|
|||
|
<span id="cb126-8"><a href="#cb126-8" tabindex="-1"></a>dend1234 <span class="ot"><-</span> <span class="fu">dendlist</span>(<span class="st">"Complete"</span> <span class="ot">=</span> dend1, <span class="st">"Single"</span> <span class="ot">=</span> dend2, <span class="st">"Average"</span> <span class="ot">=</span> dend3, <span class="st">"Centroid"</span> <span class="ot">=</span> dend4)</span>
|
|||
|
<span id="cb126-9"><a href="#cb126-9" tabindex="-1"></a></span>
|
|||
|
<span id="cb126-10"><a href="#cb126-10" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb126-11"><a href="#cb126-11" tabindex="-1"></a><span class="fu">plot</span>(dend1, <span class="at">main =</span> <span class="st">"Complete"</span>)</span>
|
|||
|
<span id="cb126-12"><a href="#cb126-12" tabindex="-1"></a><span class="fu">plot</span>(dend2, <span class="at">main =</span> <span class="st">"Single"</span>)</span>
|
|||
|
<span id="cb126-13"><a href="#cb126-13" tabindex="-1"></a><span class="fu">plot</span>(dend3, <span class="at">main =</span> <span class="st">"Average"</span>)</span>
|
|||
|
<span id="cb126-14"><a href="#cb126-14" tabindex="-1"></a><span class="fu">plot</span>(dend4, <span class="at">main =</span> <span class="st">"Centroid"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div id="global-comparison-of-two-or-more-dendrograms" class="section level4">
|
|||
|
<h4>Global Comparison of two (or more) dendrograms</h4>
|
|||
|
<p>The <code>all.equal.dendrogram</code> function makes a global
|
|||
|
comparison of two or more dendrograms trees.</p>
|
|||
|
<div class="sourceCode" id="cb127"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb127-1"><a href="#cb127-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend1)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] TRUE</code></pre>
|
|||
|
<div class="sourceCode" id="cb129"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb129-1"><a href="#cb129-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend2)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] "Difference in branch heights - Mean relative difference: 0.4932164"</code></pre>
|
|||
|
<div class="sourceCode" id="cb131"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb131-1"><a href="#cb131-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend2, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 2, 7, 13 | Unique edges in current: 7, 9, 11"</code></pre>
|
|||
|
<div class="sourceCode" id="cb133"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb133-1"><a href="#cb133-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend2, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>, <span class="at">use.topology =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] TRUE</code></pre>
|
|||
|
<div class="sourceCode" id="cb135"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb135-1"><a href="#cb135-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend2, dend4, <span class="at">use.edge.length =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] "Difference in branch heights - Mean relative difference: 0.1969642"</code></pre>
|
|||
|
<div class="sourceCode" id="cb137"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb137-1"><a href="#cb137-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend2, dend4, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 11 | Unique edges in current: 13"</code></pre>
|
|||
|
<div class="sourceCode" id="cb139"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb139-1"><a href="#cb139-1" tabindex="-1"></a><span class="fu">all.equal</span>(<span class="fu">dendlist</span>(dend1, dend1, dend1))</span></code></pre></div>
|
|||
|
<pre><code>#> [1] TRUE</code></pre>
|
|||
|
<div class="sourceCode" id="cb141"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb141-1"><a href="#cb141-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1234)</span></code></pre></div>
|
|||
|
<pre><code>#> 1==2
|
|||
|
#> "Difference in branch heights - Mean relative difference: 0.4932164"
|
|||
|
#> 1==3
|
|||
|
#> "Difference in branch heights - Mean relative difference: 0.2767035"
|
|||
|
#> 1==4
|
|||
|
#> "Difference in branch heights - Mean relative difference: 0.4081231"
|
|||
|
#> 2==3
|
|||
|
#> "Difference in branch heights - Mean relative difference: 0.4545673"
|
|||
|
#> 2==4
|
|||
|
#> "Difference in branch heights - Mean relative difference: 0.1969642"
|
|||
|
#> 3==4
|
|||
|
#> "Difference in branch heights - Mean relative difference: 0.1970749"</code></pre>
|
|||
|
<div class="sourceCode" id="cb143"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb143-1"><a href="#cb143-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1234, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> 1==2
|
|||
|
#> "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 2, 7, 13 | Unique edges in current: 7, 9, 11"
|
|||
|
#> 1==3
|
|||
|
#> "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 7 | Unique edges in current: 7"
|
|||
|
#> 1==4
|
|||
|
#> "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 2, 7 | Unique edges in current: 7, 9"
|
|||
|
#> 2==3
|
|||
|
#> "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 9, 11 | Unique edges in current: 8, 15"
|
|||
|
#> 2==4
|
|||
|
#> "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 11 | Unique edges in current: 13"
|
|||
|
#> 3==4
|
|||
|
#> "Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 15 | Unique edges in current: 9"</code></pre>
|
|||
|
</div>
|
|||
|
<div id="distance-matrix-using-dist.dendlist" class="section level4">
|
|||
|
<h4>Distance matrix using dist.dendlist</h4>
|
|||
|
<p>The <code>dist.dendlist</code> function computes the Robinson-Foulds
|
|||
|
distance (also known as symmetric difference) between two dendrograms.
|
|||
|
This is the sum of edges in both trees with labels that exist in only
|
|||
|
one of the two trees (i.e.: the length of
|
|||
|
<code>distinct_edges</code>).</p>
|
|||
|
<div class="sourceCode" id="cb145"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb145-1"><a href="#cb145-1" tabindex="-1"></a>x <span class="ot"><-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb145-2"><a href="#cb145-2" tabindex="-1"></a>y <span class="ot"><-</span> <span class="fu">set</span>(x, <span class="st">"labels"</span>, <span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)</span>
|
|||
|
<span id="cb145-3"><a href="#cb145-3" tabindex="-1"></a></span>
|
|||
|
<span id="cb145-4"><a href="#cb145-4" tabindex="-1"></a><span class="fu">dist.dendlist</span>(<span class="fu">dendlist</span>(<span class="at">x1 =</span> x,<span class="at">x2 =</span> x,<span class="at">y1 =</span> y))</span></code></pre></div>
|
|||
|
<pre><code>#> x1 x2
|
|||
|
#> x2 0
|
|||
|
#> y1 4 4</code></pre>
|
|||
|
<div class="sourceCode" id="cb147"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb147-1"><a href="#cb147-1" tabindex="-1"></a><span class="fu">dend_diff</span>(x,y)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb148"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb148-1"><a href="#cb148-1" tabindex="-1"></a><span class="fu">dist.dendlist</span>(dend1234)</span></code></pre></div>
|
|||
|
<pre><code>#> Complete Single Average
|
|||
|
#> Single 6
|
|||
|
#> Average 2 4
|
|||
|
#> Centroid 4 2 2</code></pre>
|
|||
|
<p>This function might implement other topological distances in the
|
|||
|
future.</p>
|
|||
|
</div>
|
|||
|
<div id="correlation-matrix-using-cor.dendlist" class="section level4">
|
|||
|
<h4>Correlation matrix using cor.dendlist</h4>
|
|||
|
<p>Both Baker’s Gamma and cophenetic correlation (Which will be
|
|||
|
introduced shortly), can be calculated to create a correlation matrix
|
|||
|
using the <code>cor.dendlist</code> function (the default method is
|
|||
|
cophenetic correlation):</p>
|
|||
|
<div class="sourceCode" id="cb150"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb150-1"><a href="#cb150-1" tabindex="-1"></a><span class="fu">cor.dendlist</span>(dend1234)</span></code></pre></div>
|
|||
|
<pre><code>#> Complete Single Average Centroid
|
|||
|
#> Complete 1.0000000 0.4272001 0.5635291 0.4466374
|
|||
|
#> Single 0.4272001 1.0000000 0.9508998 0.9910913
|
|||
|
#> Average 0.5635291 0.9508998 1.0000000 0.9556376
|
|||
|
#> Centroid 0.4466374 0.9910913 0.9556376 1.0000000</code></pre>
|
|||
|
<p>The corrplot library offers a nice visualization:</p>
|
|||
|
<div class="sourceCode" id="cb152"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb152-1"><a href="#cb152-1" tabindex="-1"></a><span class="fu">library</span>(corrplot)</span>
|
|||
|
<span id="cb152-2"><a href="#cb152-2" tabindex="-1"></a><span class="fu">corrplot</span>(<span class="fu">cor.dendlist</span>(dend1234), <span class="st">"pie"</span>, <span class="st">"lower"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>Which easily tells us that single, average and centroid give similar
|
|||
|
results, while complete is somewhat different.</p>
|
|||
|
<div class="sourceCode" id="cb153"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb153-1"><a href="#cb153-1" tabindex="-1"></a><span class="co"># same subtrees, so there is no need to color the branches</span></span>
|
|||
|
<span id="cb153-2"><a href="#cb153-2" tabindex="-1"></a>dend1234 <span class="sc">%>%</span> <span class="fu">tanglegram</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">3</span>)) </span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb154"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb154-1"><a href="#cb154-1" tabindex="-1"></a><span class="co"># Here the branches colors are very helpful:</span></span>
|
|||
|
<span id="cb154-2"><a href="#cb154-2" tabindex="-1"></a>dend1234 <span class="sc">%>%</span> <span class="fu">tanglegram</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>), </span>
|
|||
|
<span id="cb154-3"><a href="#cb154-3" tabindex="-1"></a> <span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
<div id="bakers-gamma-index" class="section level4">
|
|||
|
<h4>Baker’s Gamma Index</h4>
|
|||
|
<p>Baker’s Gamma Index (see baker’s paper from 1974) is a measure of
|
|||
|
association (similarity) between two trees of Hierarchical clustering
|
|||
|
(dendrograms). It is defined as the rank correlation between the stages
|
|||
|
at which pairs of objects combine in each of the two trees.</p>
|
|||
|
<p>Or more detailed: It is calculated by taking two items, and see what
|
|||
|
is the highest possible level of k (number of cluster groups created
|
|||
|
when cutting the tree) for which the two item still belongs to the same
|
|||
|
tree. That k is returned, and the same is done for these two items for
|
|||
|
the second tree. There are n over 2 combinations of such pairs of items
|
|||
|
from the items in the tree, and all of these numbers are calculated for
|
|||
|
each of the two trees. Then, these two sets of numbers (a set for the
|
|||
|
items in each tree) are paired according to the pairs of items compared,
|
|||
|
and a Spearman correlation is calculated.</p>
|
|||
|
<p>The value can range between -1 to 1. With near 0 values meaning that
|
|||
|
the two trees are not statistically similar. For exact p-value one
|
|||
|
should use a permutation test. One such option will be to permute over
|
|||
|
the labels of one tree many times, calculating the distribution under
|
|||
|
the null hypothesis (keeping the trees topologies constant).</p>
|
|||
|
<p>Notice that this measure is not affected by the height of a branch
|
|||
|
but only of its relative position compared with other branches.</p>
|
|||
|
<div class="sourceCode" id="cb155"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb155-1"><a href="#cb155-1" tabindex="-1"></a><span class="fu">cor_bakers_gamma</span>(dend15, dend51)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.2751938</code></pre>
|
|||
|
<p>Even that we can reach perfect entanglement, Baker’s gamma shows us
|
|||
|
that the tree’s topology is not identical. As opposed with the
|
|||
|
correlation of a tree with itself:</p>
|
|||
|
<div class="sourceCode" id="cb157"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb157-1"><a href="#cb157-1" tabindex="-1"></a><span class="fu">cor_bakers_gamma</span>(dend15, dend15)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1</code></pre>
|
|||
|
<p>Since the observations creating the Baker’s Gamma Index of such a
|
|||
|
measure are correlated, we need to perform a permutation test for the
|
|||
|
calculation of the statistical significance of the index. Let’s look at
|
|||
|
the distribution of Baker’s Gamma Index under the null hypothesis
|
|||
|
(assuming fixed tree topologies). This will be different for different
|
|||
|
tree structures and sizes. Here are the results when the compared tree
|
|||
|
is itself (after shuffling its own labels), and when comparing tree 1 to
|
|||
|
the shuffled tree 2:</p>
|
|||
|
<div class="sourceCode" id="cb159"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb159-1"><a href="#cb159-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23235</span>)</span>
|
|||
|
<span id="cb159-2"><a href="#cb159-2" tabindex="-1"></a>the_cor <span class="ot"><-</span> <span class="fu">cor_bakers_gamma</span>(dend15, dend15)</span>
|
|||
|
<span id="cb159-3"><a href="#cb159-3" tabindex="-1"></a>the_cor2 <span class="ot"><-</span> <span class="fu">cor_bakers_gamma</span>(dend15, dend51)</span>
|
|||
|
<span id="cb159-4"><a href="#cb159-4" tabindex="-1"></a>the_cor</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1</code></pre>
|
|||
|
<div class="sourceCode" id="cb161"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb161-1"><a href="#cb161-1" tabindex="-1"></a>the_cor2</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.2751938</code></pre>
|
|||
|
<div class="sourceCode" id="cb163"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb163-1"><a href="#cb163-1" tabindex="-1"></a>R <span class="ot"><-</span> <span class="dv">100</span></span>
|
|||
|
<span id="cb163-2"><a href="#cb163-2" tabindex="-1"></a>cor_bakers_gamma_results <span class="ot"><-</span> <span class="fu">numeric</span>(R)</span>
|
|||
|
<span id="cb163-3"><a href="#cb163-3" tabindex="-1"></a>dend_mixed <span class="ot"><-</span> dend15</span>
|
|||
|
<span id="cb163-4"><a href="#cb163-4" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span>R) {</span>
|
|||
|
<span id="cb163-5"><a href="#cb163-5" tabindex="-1"></a> dend_mixed <span class="ot"><-</span> <span class="fu">sample.dendrogram</span>(dend_mixed, <span class="at">replace =</span> <span class="cn">FALSE</span>)</span>
|
|||
|
<span id="cb163-6"><a href="#cb163-6" tabindex="-1"></a> cor_bakers_gamma_results[i] <span class="ot"><-</span> <span class="fu">cor_bakers_gamma</span>(dend15, dend_mixed)</span>
|
|||
|
<span id="cb163-7"><a href="#cb163-7" tabindex="-1"></a>}</span>
|
|||
|
<span id="cb163-8"><a href="#cb163-8" tabindex="-1"></a><span class="fu">plot</span>(<span class="fu">density</span>(cor_bakers_gamma_results),</span>
|
|||
|
<span id="cb163-9"><a href="#cb163-9" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"Baker's gamma distribution under H0"</span>,</span>
|
|||
|
<span id="cb163-10"><a href="#cb163-10" tabindex="-1"></a> <span class="at">xlim =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">1</span>,<span class="dv">1</span>))</span>
|
|||
|
<span id="cb163-11"><a href="#cb163-11" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="dv">0</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span>
|
|||
|
<span id="cb163-12"><a href="#cb163-12" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> the_cor, <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">2</span>)</span>
|
|||
|
<span id="cb163-13"><a href="#cb163-13" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> the_cor2, <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">4</span>)</span>
|
|||
|
<span id="cb163-14"><a href="#cb163-14" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">"topleft"</span>, <span class="at">legend =</span> <span class="fu">c</span>(<span class="st">"cor"</span>, <span class="st">"cor2"</span>), <span class="at">fill =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">4</span>))</span>
|
|||
|
<span id="cb163-15"><a href="#cb163-15" tabindex="-1"></a><span class="fu">round</span>(<span class="fu">sum</span>(the_cor2 <span class="sc"><</span> cor_bakers_gamma_results)<span class="sc">/</span> R, <span class="dv">4</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.17</code></pre>
|
|||
|
<div class="sourceCode" id="cb165"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb165-1"><a href="#cb165-1" tabindex="-1"></a><span class="fu">title</span>(<span class="at">sub =</span> <span class="fu">paste</span>(<span class="st">"One sided p-value:"</span>,</span>
|
|||
|
<span id="cb165-2"><a href="#cb165-2" tabindex="-1"></a> <span class="st">"cor ="</span>, <span class="fu">round</span>(<span class="fu">sum</span>(the_cor <span class="sc"><</span> cor_bakers_gamma_results)<span class="sc">/</span> R, <span class="dv">4</span>),</span>
|
|||
|
<span id="cb165-3"><a href="#cb165-3" tabindex="-1"></a> <span class="st">" ; cor2 ="</span>, <span class="fu">round</span>(<span class="fu">sum</span>(the_cor2 <span class="sc"><</span> cor_bakers_gamma_results)<span class="sc">/</span> R, <span class="dv">4</span>)</span>
|
|||
|
<span id="cb165-4"><a href="#cb165-4" tabindex="-1"></a> ))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>We can see that we do not have enough evidence that dend15 and dend51
|
|||
|
are significantly “similar” (i.e.: with a correlation larger than
|
|||
|
0).</p>
|
|||
|
<p>We can also build a bootstrap confidence interval, using
|
|||
|
<code>sample.dendrogram</code>, for the correlation. This function can
|
|||
|
be very slow for larger trees, so make sure you use if carefully:</p>
|
|||
|
<div class="sourceCode" id="cb166"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb166-1"><a href="#cb166-1" tabindex="-1"></a>dend1 <span class="ot"><-</span> dend15</span>
|
|||
|
<span id="cb166-2"><a href="#cb166-2" tabindex="-1"></a>dend2 <span class="ot"><-</span> dend51</span>
|
|||
|
<span id="cb166-3"><a href="#cb166-3" tabindex="-1"></a></span>
|
|||
|
<span id="cb166-4"><a href="#cb166-4" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23801</span>)</span>
|
|||
|
<span id="cb166-5"><a href="#cb166-5" tabindex="-1"></a></span>
|
|||
|
<span id="cb166-6"><a href="#cb166-6" tabindex="-1"></a>R <span class="ot"><-</span> <span class="dv">100</span></span>
|
|||
|
<span id="cb166-7"><a href="#cb166-7" tabindex="-1"></a>dend1_labels <span class="ot"><-</span> <span class="fu">labels</span>(dend1)</span>
|
|||
|
<span id="cb166-8"><a href="#cb166-8" tabindex="-1"></a>dend2_labels <span class="ot"><-</span> <span class="fu">labels</span>(dend2)</span>
|
|||
|
<span id="cb166-9"><a href="#cb166-9" tabindex="-1"></a>cor_bakers_gamma_results <span class="ot"><-</span> <span class="fu">numeric</span>(R)</span>
|
|||
|
<span id="cb166-10"><a href="#cb166-10" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span>R) {</span>
|
|||
|
<span id="cb166-11"><a href="#cb166-11" tabindex="-1"></a> sampled_labels <span class="ot"><-</span> <span class="fu">sample</span>(dend1_labels, <span class="at">replace =</span> <span class="cn">TRUE</span>)</span>
|
|||
|
<span id="cb166-12"><a href="#cb166-12" tabindex="-1"></a> <span class="co"># members needs to be fixed since it will be later used in nleaves</span></span>
|
|||
|
<span id="cb166-13"><a href="#cb166-13" tabindex="-1"></a> dend_mixed1 <span class="ot"><-</span> <span class="fu">sample.dendrogram</span>(dend1, </span>
|
|||
|
<span id="cb166-14"><a href="#cb166-14" tabindex="-1"></a> <span class="at">dend_labels=</span>dend1_labels,</span>
|
|||
|
<span id="cb166-15"><a href="#cb166-15" tabindex="-1"></a> <span class="at">fix_members=</span><span class="cn">TRUE</span>,<span class="at">fix_order=</span><span class="cn">TRUE</span>,<span class="at">fix_midpoint=</span><span class="cn">FALSE</span>,</span>
|
|||
|
<span id="cb166-16"><a href="#cb166-16" tabindex="-1"></a> <span class="at">replace =</span> <span class="cn">TRUE</span>, <span class="at">sampled_labels=</span>sampled_labels</span>
|
|||
|
<span id="cb166-17"><a href="#cb166-17" tabindex="-1"></a> )</span>
|
|||
|
<span id="cb166-18"><a href="#cb166-18" tabindex="-1"></a> dend_mixed2 <span class="ot"><-</span> <span class="fu">sample.dendrogram</span>(dend2, <span class="at">dend_labels=</span>dend2_labels,</span>
|
|||
|
<span id="cb166-19"><a href="#cb166-19" tabindex="-1"></a> <span class="at">fix_members=</span><span class="cn">TRUE</span>,<span class="at">fix_order=</span><span class="cn">TRUE</span>,<span class="at">fix_midpoint=</span><span class="cn">FALSE</span>,</span>
|
|||
|
<span id="cb166-20"><a href="#cb166-20" tabindex="-1"></a> <span class="at">replace =</span> <span class="cn">TRUE</span>, <span class="at">sampled_labels=</span>sampled_labels</span>
|
|||
|
<span id="cb166-21"><a href="#cb166-21" tabindex="-1"></a> ) </span>
|
|||
|
<span id="cb166-22"><a href="#cb166-22" tabindex="-1"></a> cor_bakers_gamma_results[i] <span class="ot"><-</span> <span class="fu">cor_bakers_gamma</span>(dend_mixed1, dend_mixed2, <span class="at">warn =</span> <span class="cn">FALSE</span>)</span>
|
|||
|
<span id="cb166-23"><a href="#cb166-23" tabindex="-1"></a>}</span>
|
|||
|
<span id="cb166-24"><a href="#cb166-24" tabindex="-1"></a></span>
|
|||
|
<span id="cb166-25"><a href="#cb166-25" tabindex="-1"></a></span>
|
|||
|
<span id="cb166-26"><a href="#cb166-26" tabindex="-1"></a><span class="co"># here is the tanglegram</span></span>
|
|||
|
<span id="cb166-27"><a href="#cb166-27" tabindex="-1"></a><span class="fu">tanglegram</span>(dend1, dend2)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb167"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb167-1"><a href="#cb167-1" tabindex="-1"></a><span class="co"># And here is the tanglegram for one sample of our trees:</span></span>
|
|||
|
<span id="cb167-2"><a href="#cb167-2" tabindex="-1"></a>dend_mixed1 <span class="ot"><-</span> <span class="fu">rank_order.dendrogram</span>(dend_mixed1)</span>
|
|||
|
<span id="cb167-3"><a href="#cb167-3" tabindex="-1"></a>dend_mixed2 <span class="ot"><-</span> <span class="fu">rank_order.dendrogram</span>(dend_mixed2)</span>
|
|||
|
<span id="cb167-4"><a href="#cb167-4" tabindex="-1"></a>dend_mixed1 <span class="ot"><-</span> <span class="fu">fix_members_attr.dendrogram</span>(dend_mixed1)</span>
|
|||
|
<span id="cb167-5"><a href="#cb167-5" tabindex="-1"></a>dend_mixed2 <span class="ot"><-</span> <span class="fu">fix_members_attr.dendrogram</span>(dend_mixed2)</span>
|
|||
|
<span id="cb167-6"><a href="#cb167-6" tabindex="-1"></a><span class="fu">tanglegram</span>(dend_mixed1, dend_mixed2)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb168"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb168-1"><a href="#cb168-1" tabindex="-1"></a><span class="fu">cor_bakers_gamma</span>(dend_mixed1, dend_mixed2, <span class="at">warn =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1</code></pre>
|
|||
|
<div class="sourceCode" id="cb170"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb170-1"><a href="#cb170-1" tabindex="-1"></a>CI95 <span class="ot"><-</span> <span class="fu">quantile</span>(cor_bakers_gamma_results, <span class="at">probs=</span><span class="fu">c</span>(.<span class="dv">025</span>,.<span class="dv">975</span>))</span>
|
|||
|
<span id="cb170-2"><a href="#cb170-2" tabindex="-1"></a>CI95</span></code></pre></div>
|
|||
|
<pre><code>#> 2.5% 97.5%
|
|||
|
#> 0.2751938 1.0000000</code></pre>
|
|||
|
<div class="sourceCode" id="cb172"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb172-1"><a href="#cb172-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>))</span>
|
|||
|
<span id="cb172-2"><a href="#cb172-2" tabindex="-1"></a><span class="fu">plot</span>(<span class="fu">density</span>(cor_bakers_gamma_results),</span>
|
|||
|
<span id="cb172-3"><a href="#cb172-3" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"Baker's gamma bootstrap distribution"</span>,</span>
|
|||
|
<span id="cb172-4"><a href="#cb172-4" tabindex="-1"></a> <span class="at">xlim =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">1</span>,<span class="dv">1</span>))</span>
|
|||
|
<span id="cb172-5"><a href="#cb172-5" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> CI95, <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">3</span>)</span>
|
|||
|
<span id="cb172-6"><a href="#cb172-6" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">cor_bakers_gamma</span>(dend1, dend2), <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">2</span>)</span>
|
|||
|
<span id="cb172-7"><a href="#cb172-7" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">"topleft"</span>, <span class="at">legend =</span><span class="fu">c</span>(<span class="st">"95% CI"</span>, <span class="st">"Baker's Gamma Index"</span>), <span class="at">fill =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">2</span>))</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<p>The bootstrap sampling can do weird things with small trees. In this
|
|||
|
case we had many times that the two trees got perfect correlation. The
|
|||
|
usage and interpretation should be done carefully!</p>
|
|||
|
</div>
|
|||
|
<div id="cophenetic-correlation" class="section level4">
|
|||
|
<h4>Cophenetic correlation</h4>
|
|||
|
<p>The cophenetic distance between two observations that have been
|
|||
|
clustered is defined to be the inter-group dissimilarity at which the
|
|||
|
two observations are first combined into a single cluster. This distance
|
|||
|
has many ties and restrictions. The cophenetic correlation (see sokal
|
|||
|
1962) is the correlation between two cophenetic distance matrices of two
|
|||
|
trees.</p>
|
|||
|
<p>The value can range between -1 to 1. With near 0 values meaning that
|
|||
|
the two trees are not statistically similar. For exact p-value one
|
|||
|
should result to a permutation test. One such option will be to permute
|
|||
|
over the labels of one tree many times, and calculating the distribution
|
|||
|
under the null hypothesis (keeping the trees topologies constant).</p>
|
|||
|
<div class="sourceCode" id="cb173"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb173-1"><a href="#cb173-1" tabindex="-1"></a><span class="fu">cor_cophenetic</span>(dend15, dend51)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.3125</code></pre>
|
|||
|
<p>The function <code>cor_cophenetic</code> is faster than
|
|||
|
<code>cor_bakers_gamma</code>, and might be preferred for that
|
|||
|
reason.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="the-fowlkes-mallows-index-and-the-bk-plot" class="section level3">
|
|||
|
<h3>The Fowlkes-Mallows Index and the Bk plot</h3>
|
|||
|
<div id="the-fowlkes-mallows-index" class="section level4">
|
|||
|
<h4>The Fowlkes-Mallows Index</h4>
|
|||
|
<p>The Fowlkes-Mallows Index (see fowlkes 1983) (FM Index, or Bk) is a
|
|||
|
measure of similarity between two clusterings. The FM index ranges from
|
|||
|
0 to 1, a higher value indicates a greater similarity between the two
|
|||
|
clusters.</p>
|
|||
|
<p>The dendextend package allows the calculation of FM-Index, its
|
|||
|
expectancy and variance under the null hypothesis, and a creation of
|
|||
|
permutations of the FM-Index under H0. Thanks to the profdpm package, we
|
|||
|
have another example of calculating the FM (though it does not offer the
|
|||
|
expectancy and variance under H0):</p>
|
|||
|
<div class="sourceCode" id="cb175"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb175-1"><a href="#cb175-1" tabindex="-1"></a>hc1 <span class="ot"><-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">"com"</span>)</span>
|
|||
|
<span id="cb175-2"><a href="#cb175-2" tabindex="-1"></a>hc2 <span class="ot"><-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">"single"</span>)</span>
|
|||
|
<span id="cb175-3"><a href="#cb175-3" tabindex="-1"></a></span>
|
|||
|
<span id="cb175-4"><a href="#cb175-4" tabindex="-1"></a><span class="co"># FM index of a cluster with himself is 1:</span></span>
|
|||
|
<span id="cb175-5"><a href="#cb175-5" tabindex="-1"></a><span class="fu">FM_index</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>))</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 1
|
|||
|
#> attr(,"E_FM")
|
|||
|
#> [1] 0.37217
|
|||
|
#> attr(,"V_FM")
|
|||
|
#> [1] 5.985372e-05</code></pre>
|
|||
|
<div class="sourceCode" id="cb177"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb177-1"><a href="#cb177-1" tabindex="-1"></a><span class="co"># FM index of two clusterings:</span></span>
|
|||
|
<span id="cb177-2"><a href="#cb177-2" tabindex="-1"></a><span class="fu">FM_index</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc2, <span class="at">k=</span><span class="dv">3</span>)) </span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.8059522
|
|||
|
#> attr(,"E_FM")
|
|||
|
#> [1] 0.4462325
|
|||
|
#> attr(,"V_FM")
|
|||
|
#> [1] 6.464092e-05</code></pre>
|
|||
|
<div class="sourceCode" id="cb179"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb179-1"><a href="#cb179-1" tabindex="-1"></a><span class="co"># we got a value far above the expected under H0</span></span>
|
|||
|
<span id="cb179-2"><a href="#cb179-2" tabindex="-1"></a> </span>
|
|||
|
<span id="cb179-3"><a href="#cb179-3" tabindex="-1"></a><span class="co"># Using the R code:</span></span>
|
|||
|
<span id="cb179-4"><a href="#cb179-4" tabindex="-1"></a><span class="fu">FM_index_R</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc2, <span class="at">k=</span><span class="dv">3</span>))</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.8059522
|
|||
|
#> attr(,"E_FM")
|
|||
|
#> [1] 0.4462325
|
|||
|
#> attr(,"V_FM")
|
|||
|
#> [1] 6.464092e-05</code></pre>
|
|||
|
<p>The E_FM and V_FM are the values expected under the null hypothesis
|
|||
|
that the two trees have the same topology but one is a random shuffle of
|
|||
|
the labels of the other (i.e.: “no connection” between the trees).</p>
|
|||
|
<p>So for the values:</p>
|
|||
|
<div class="sourceCode" id="cb181"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb181-1"><a href="#cb181-1" tabindex="-1"></a><span class="fu">FM_index</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc2, <span class="at">k=</span><span class="dv">3</span>)) </span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.8059522
|
|||
|
#> attr(,"E_FM")
|
|||
|
#> [1] 0.4462325
|
|||
|
#> attr(,"V_FM")
|
|||
|
#> [1] 6.464092e-05</code></pre>
|
|||
|
<p>We can take (under a normal asymptotic distribution)</p>
|
|||
|
<div class="sourceCode" id="cb183"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb183-1"><a href="#cb183-1" tabindex="-1"></a><span class="fl">0.4462</span> <span class="sc">+</span> <span class="fl">1.645</span> <span class="sc">*</span> <span class="fu">sqrt</span>(<span class="fl">6.464092e-05</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> [1] 0.4594257</code></pre>
|
|||
|
<p>And since 0.8059 (our value) > 0.4594 (the critical value under
|
|||
|
H0, with alpha=5% for a one sided test) - then we can say that we
|
|||
|
significantly reject the hypothesis that the two trees are
|
|||
|
“not-similar”.</p>
|
|||
|
</div>
|
|||
|
<div id="the-bk-plot" class="section level4">
|
|||
|
<h4>The Bk plot</h4>
|
|||
|
<p>In the Bk method we calculate the FM Index (Bk) for each k
|
|||
|
(k=2,3,…,n-1) number of clusters, giving the association between the two
|
|||
|
trees when each is cut to have k groups. The similarity between two
|
|||
|
hierarchical clustering dendrograms, can be investigated, using the
|
|||
|
(k,Bk) plot: For every level of splitting of the two dendrograms which
|
|||
|
produces k clusters in each tree, the plot shows the number Bk, and
|
|||
|
therefore enables the investigation of potential nuances in the
|
|||
|
structure of similarity. The Bk measures the number of pairs of items
|
|||
|
which are in the same cluster in both dendrograms, one of the clusters
|
|||
|
in one of the trees and one of the clusters in the other tree, divided
|
|||
|
by the geometric mean of the number of pairs of items which are in the
|
|||
|
same cluster in each tree. Namely, <span class="math inline">\({a_{uv}}
|
|||
|
= 1\left( {or{\rm{ }}{{\rm{b}}_{uv}} = 1} \right)\)</span> if the items
|
|||
|
u and v are in the same cluster in the first tree (second tree), when it
|
|||
|
is cut so to give k clusters, and otherwise 0:</p>
|
|||
|
<p><span class="math display">\[{FM_k} = {B_k} =
|
|||
|
\frac{{\sum\limits_{}^{} {{a_{uv}}{b_{uv}}} }}{{\sqrt {\sum\limits_{}^{}
|
|||
|
{{a_{uv}}} \sum\limits_{}^{} {{b_{uv}}} } }}\]</span></p>
|
|||
|
<p>The Bk measure can be plotted for every value of k (except k=n) in
|
|||
|
order to create the “(k,Bk) plot”. The plot compares the similarity of
|
|||
|
the two trees for different cuts. The mean and variance of Bk, under the
|
|||
|
null hypothesis (that the two trees are not “similar”), and under the
|
|||
|
assumption that the margins of the matching matrix are fixed, are given
|
|||
|
in Fowlkes and Mallows (see fowlkes 1983). They allow making inference
|
|||
|
on whether the results obtained are different from what would have been
|
|||
|
expected under the null hypothesis (of now particular order of the
|
|||
|
trees’ labels).</p>
|
|||
|
<p>The <code>Bk</code> and the <code>Bk_plot</code> functions allow the
|
|||
|
calculation of the FM-Index for a range of k values on two trees. Here
|
|||
|
are examples:</p>
|
|||
|
<div class="sourceCode" id="cb185"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb185-1"><a href="#cb185-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23235</span>)</span>
|
|||
|
<span id="cb185-2"><a href="#cb185-2" tabindex="-1"></a>ss <span class="ot"><-</span> <span class="cn">TRUE</span> <span class="co"># sample(1:150, 30 ) # TRUE #</span></span>
|
|||
|
<span id="cb185-3"><a href="#cb185-3" tabindex="-1"></a>hc1 <span class="ot"><-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[ss,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">"com"</span>)</span>
|
|||
|
<span id="cb185-4"><a href="#cb185-4" tabindex="-1"></a>hc2 <span class="ot"><-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[ss,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">"single"</span>)</span>
|
|||
|
<span id="cb185-5"><a href="#cb185-5" tabindex="-1"></a>dend1 <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(hc1)</span>
|
|||
|
<span id="cb185-6"><a href="#cb185-6" tabindex="-1"></a>dend2 <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(hc2)</span>
|
|||
|
<span id="cb185-7"><a href="#cb185-7" tabindex="-1"></a><span class="co"># cutree(tree1) </span></span>
|
|||
|
<span id="cb185-8"><a href="#cb185-8" tabindex="-1"></a></span>
|
|||
|
<span id="cb185-9"><a href="#cb185-9" tabindex="-1"></a><span class="co"># It works the same for hclust and dendrograms:</span></span>
|
|||
|
<span id="cb185-10"><a href="#cb185-10" tabindex="-1"></a><span class="fu">Bk</span>(hc1, hc2, <span class="at">k =</span> <span class="dv">3</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> $`3`
|
|||
|
#> [1] 0.8059522
|
|||
|
#> attr(,"E_FM")
|
|||
|
#> [1] 0.4462325
|
|||
|
#> attr(,"V_FM")
|
|||
|
#> [1] 6.464092e-05</code></pre>
|
|||
|
<div class="sourceCode" id="cb187"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb187-1"><a href="#cb187-1" tabindex="-1"></a><span class="fu">Bk</span>(dend1, dend2, <span class="at">k =</span> <span class="dv">3</span>)</span></code></pre></div>
|
|||
|
<pre><code>#> $`3`
|
|||
|
#> [1] 0.8059522
|
|||
|
#> attr(,"E_FM")
|
|||
|
#> [1] 0.4462325
|
|||
|
#> attr(,"V_FM")
|
|||
|
#> [1] 6.464092e-05</code></pre>
|
|||
|
<p>The Bk plot:</p>
|
|||
|
<div class="sourceCode" id="cb189"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb189-1"><a href="#cb189-1" tabindex="-1"></a><span class="fu">Bk_plot</span>(hc1, hc2, <span class="at">main =</span> <span class="st">"WRONG Bk plot </span><span class="sc">\n</span><span class="st">(due to the way cutree works with ties in hclust)"</span>, <span class="at">warn =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
<div class="sourceCode" id="cb190"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb190-1"><a href="#cb190-1" tabindex="-1"></a><span class="fu">Bk_plot</span>(dend1, dend2, <span class="at">main =</span> <span class="st">"CORRECT Bk plot </span><span class="sc">\n</span><span class="st">(based on dendrograms)"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="session-info" class="section level1">
|
|||
|
<h1>Session info</h1>
|
|||
|
<div class="sourceCode" id="cb191"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb191-1"><a href="#cb191-1" tabindex="-1"></a><span class="fu">sessionInfo</span>()</span></code></pre></div>
|
|||
|
<pre><code>#> R version 4.4.1 (2024-06-14)
|
|||
|
#> Platform: x86_64-apple-darwin20
|
|||
|
#> Running under: macOS Big Sur 11.7.10
|
|||
|
#>
|
|||
|
#> Matrix products: default
|
|||
|
#> BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
|
|||
|
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
|
|||
|
#>
|
|||
|
#> locale:
|
|||
|
#> [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
|
|||
|
#>
|
|||
|
#> time zone: America/New_York
|
|||
|
#> tzcode source: internal
|
|||
|
#>
|
|||
|
#> attached base packages:
|
|||
|
#> [1] stats graphics grDevices utils datasets methods base
|
|||
|
#>
|
|||
|
#> other attached packages:
|
|||
|
#> [1] corrplot_0.94 pvclust_2.2-0 dynamicTreeCut_1.63-1
|
|||
|
#> [4] gplots_3.1.3.1 DendSer_1.0.2 seriation_1.5.6
|
|||
|
#> [7] gclus_1.3.2 cluster_2.1.6 viridis_0.6.5
|
|||
|
#> [10] viridisLite_0.4.2 ggplot2_3.5.1 circlize_0.4.16
|
|||
|
#> [13] colorspace_2.1-1 knitr_1.48 dendextend_1.19.0
|
|||
|
#>
|
|||
|
#> loaded via a namespace (and not attached):
|
|||
|
#> [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3
|
|||
|
#> [4] bitops_1.0-8 KernSmooth_2.23-24 shape_1.4.6.1
|
|||
|
#> [7] gtools_3.9.5 digest_0.6.37 magrittr_2.0.3
|
|||
|
#> [10] caTools_1.18.2 evaluate_0.24.0 grid_4.4.1
|
|||
|
#> [13] iterators_1.0.14 fastmap_1.2.0 foreach_1.5.2
|
|||
|
#> [16] jsonlite_1.8.8 GlobalOptions_0.1.2 gridExtra_2.3
|
|||
|
#> [19] fansi_1.0.6 scales_1.3.0 codetools_0.2-20
|
|||
|
#> [22] jquerylib_0.1.4 registry_0.5-1 cli_3.6.3
|
|||
|
#> [25] rlang_1.1.4 munsell_0.5.1 withr_3.0.1
|
|||
|
#> [28] cachem_1.1.0 yaml_2.3.10 tools_4.4.1
|
|||
|
#> [31] dplyr_1.1.4 ca_0.71.1 TSP_1.2-4
|
|||
|
#> [34] vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4
|
|||
|
#> [37] MASS_7.3-60.2 pkgconfig_2.0.3 pillar_1.9.0
|
|||
|
#> [40] bslib_0.8.0 gtable_0.3.5 glue_1.7.0
|
|||
|
#> [43] xfun_0.47 tibble_3.2.1 tidyselect_1.2.1
|
|||
|
#> [46] highr_0.11 rstudioapi_0.16.0 farver_2.1.2
|
|||
|
#> [49] htmltools_0.5.8.1 rmarkdown_2.28 labeling_0.4.3
|
|||
|
#> [52] compiler_4.4.1</code></pre>
|
|||
|
</div>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<!-- code folding -->
|
|||
|
|
|||
|
|
|||
|
<!-- dynamically load mathjax for compatibility with self-contained -->
|
|||
|
<script>
|
|||
|
(function () {
|
|||
|
var script = document.createElement("script");
|
|||
|
script.type = "text/javascript";
|
|||
|
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
|
|||
|
document.getElementsByTagName("head")[0].appendChild(script);
|
|||
|
})();
|
|||
|
</script>
|
|||
|
|
|||
|
</body>
|
|||
|
</html>
|