1983 lines
2.3 MiB
HTML
Raw Normal View History

2025-01-12 00:52:51 +08:00
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="author" content="Tal Galili" />
<meta name="date" content="2024-11-15" />
<title>Introduction to dendextend</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">Introduction to dendextend</h1>
<h4 class="author">Tal Galili</h4>
<h4 class="date">2024-11-15</h4>
<div id="TOC">
<ul>
<li><a href="#introduction" id="toc-introduction">Introduction</a></li>
<li><a href="#prerequisites" id="toc-prerequisites">Prerequisites</a>
<ul>
<li><a href="#acknowledgement" id="toc-acknowledgement">Acknowledgement</a></li>
<li><a href="#chaining" id="toc-chaining">Chaining</a></li>
<li><a href="#a-dendrogram-is-a-nested-list-of-lists-with-attributes" id="toc-a-dendrogram-is-a-nested-list-of-lists-with-attributes">A
dendrogram is a nested list of lists with attributes</a></li>
<li><a href="#installation" id="toc-installation">Installation</a></li>
</ul></li>
<li><a href="#how-to-explore-a-dendrograms-parameters" id="toc-how-to-explore-a-dendrograms-parameters">How to explore a
dendrograms parameters</a>
<ul>
<li><a href="#taking-a-first-look-at-a-dendrogram" id="toc-taking-a-first-look-at-a-dendrogram">Taking a first look at a
dendrogram</a></li>
<li><a href="#getting-nodes-attributes-in-a-depth-first-search" id="toc-getting-nodes-attributes-in-a-depth-first-search">Getting nodes
attributes in a depth-first search</a></li>
</ul></li>
<li><a href="#how-to-change-a-dendrogram" id="toc-how-to-change-a-dendrogram">How to change a dendrogram</a>
<ul>
<li><a href="#the-set-function" id="toc-the-set-function">The “set”
function</a></li>
<li><a href="#two-simple-trees-to-play-with" id="toc-two-simple-trees-to-play-with">Two simple trees to play
with</a></li>
<li><a href="#setting-a-dendrograms-labels" id="toc-setting-a-dendrograms-labels">Setting a dendrograms
labels</a></li>
<li><a href="#setting-a-dendrograms-nodesleaves-points" id="toc-setting-a-dendrograms-nodesleaves-points">Setting a dendrograms
nodes/leaves (points)</a></li>
<li><a href="#setting-a-dendrograms-branches" id="toc-setting-a-dendrograms-branches">Setting a dendrograms
branches</a></li>
<li><a href="#changing-a-dendrograms-structure" id="toc-changing-a-dendrograms-structure">Changing a dendrograms
structure</a></li>
<li><a href="#adding-extra-bars-and-rectangles" id="toc-adding-extra-bars-and-rectangles">Adding extra bars and
rectangles</a></li>
</ul></li>
<li><a href="#ggplot2-integration" id="toc-ggplot2-integration">ggplot2
integration</a></li>
<li><a href="#enhancing-other-packages" id="toc-enhancing-other-packages">Enhancing other packages</a>
<ul>
<li><a href="#dendser" id="toc-dendser">DendSer</a></li>
<li><a href="#gplots" id="toc-gplots">gplots</a></li>
<li><a href="#nmf" id="toc-nmf">NMF</a></li>
<li><a href="#heatmaply" id="toc-heatmaply">heatmaply</a></li>
<li><a href="#dynamictreecut" id="toc-dynamictreecut">dynamicTreeCut</a></li>
<li><a href="#pvclust" id="toc-pvclust">pvclust</a></li>
<li><a href="#circlize" id="toc-circlize">circlize</a></li>
</ul></li>
<li><a href="#comparing-two-dendrograms" id="toc-comparing-two-dendrograms">Comparing two dendrograms</a>
<ul>
<li><a href="#dendlist" id="toc-dendlist">dendlist</a></li>
<li><a href="#dend_diff" id="toc-dend_diff">dend_diff</a></li>
<li><a href="#tanglegram" id="toc-tanglegram">tanglegram</a></li>
<li><a href="#correlation-measures" id="toc-correlation-measures">Correlation measures</a></li>
<li><a href="#the-fowlkes-mallows-index-and-the-bk-plot" id="toc-the-fowlkes-mallows-index-and-the-bk-plot">The Fowlkes-Mallows
Index and the Bk plot</a></li>
</ul></li>
<li><a href="#session-info" id="toc-session-info">Session info</a></li>
</ul>
</div>
<!--
%\VignetteEngine{knitr::rmarkdown}
%\VignetteIndexEntry{Introduction to dendextend}
-->
<p><strong>Author</strong>: Tal Galili (homepage: r-statistics.com,
e-mail: <a href="mailto:Tal.Galili@gmail.com" class="email">Tal.Galili@gmail.com</a> )</p>
<p><strong>tl;dr</strong>: the <a href="https://cran.r-project.org/package=dendextend"><em>dendextend
package</em></a> lets you create figures like this:</p>
<p><img role="img" src="
<div id="introduction" class="section level2">
<h2>Introduction</h2>
<p>The <a href="https://cran.r-project.org/package=dendextend"><strong><em>dendextend
package</em></strong></a> offers a set of functions for extending
dendrogram objects in R, letting you <strong>visualize</strong> and
<strong>compare</strong> trees of hierarchical clusterings, you can:</p>
<ul>
<li><strong>Adjust a trees graphical parameters</strong> - the color,
size, type, etc of its branches, nodes and labels.</li>
<li>Visually and statistically <strong>compare different
dendrograms</strong> to one another.</li>
</ul>
<p>The goal of this document is to introduce you to the basic functions
that dendextend provides, and show how they may be applied. We will make
extensive use of “chaining” (explained next).</p>
</div>
<div id="prerequisites" class="section level2">
<h2>Prerequisites</h2>
<div id="acknowledgement" class="section level3">
<h3>Acknowledgement</h3>
<p>This package was made possible by the the support of my thesis
adviser <a href="http://www.math.tau.ac.il/~ybenja/">Yoav Benjamini</a>,
as well as code contributions from many R users. They are:</p>
<pre><code>#&gt; [1] &quot;Tal Galili &lt;tal.galili@gmail.com&gt; [aut, cre, cph] (https://www.r-statistics.com)&quot;
#&gt; [2] &quot;Gavin Simpson [ctb]&quot;
#&gt; [3] &quot;Gregory Jefferis &lt;jefferis@gmail.com&gt; [ctb] (imported code from his dendroextras package)&quot;
#&gt; [4] &quot;Marco Gallotta [ctb] (a.k.a: marcog)&quot;
#&gt; [5] &quot;Johan Renaudie [ctb] (https://github.com/plannapus)&quot;
#&gt; [6] &quot;R core team [ctb] (Thanks for the Infastructure, and code in the examples)&quot;
#&gt; [7] &quot;Kurt Hornik [ctb]&quot;
#&gt; [8] &quot;Uwe Ligges [ctb]&quot;
#&gt; [9] &quot;Andrej-Nikolai Spiess [ctb]&quot;
#&gt; [10] &quot;Steve Horvath &lt;SHorvath@mednet.ucla.edu&gt; [ctb]&quot;
#&gt; [11] &quot;Peter Langfelder &lt;Peter.Langfelder@gmail.com&gt; [ctb]&quot;
#&gt; [12] &quot;skullkey [ctb]&quot;
#&gt; [13] &quot;Mark Van Der Loo &lt;mark.vanderloo@gmail.com&gt; [ctb] (https://github.com/markvanderloo d3dendrogram)&quot;
#&gt; [14] &quot;Yoav Benjamini [ths]&quot;</code></pre>
<p>The <strong>design</strong> of the dendextend package (and this
manual!) is heavily inspired by <a href="https://hadley.nz/">Hadley
Wickhams</a> work. Especially his text on <a href="https://r-pkgs.org/">writing an R package</a>, the <a href="https://cran.r-project.org/package=devtools">devtools package</a>,
and the dplyr package (specifically the use of chaining, and the <a href="https://CRAN.R-project.org/package=dplyr/vignettes/dplyr.html">Introduction
text to dplyr</a>).</p>
</div>
<div id="chaining" class="section level3">
<h3>Chaining</h3>
<p>Function calls in dendextend often get a dendrogram and returns a
(modified) dendrogram. This doesnt lead to particularly elegant code if
you want to do many operations at once. The same is true even in the
first stage of creating a dendrogram.</p>
<p>In order to construct a dendrogram, you will (often) need to go
through several steps. You can either do so while keeping the
intermediate results:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>d1 <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="co"># some data</span></span>
<span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a>d2 <span class="ot">&lt;-</span> <span class="fu">dist</span>(d1)</span>
<span id="cb2-3"><a href="#cb2-3" tabindex="-1"></a>d3 <span class="ot">&lt;-</span> <span class="fu">hclust</span>(d2, <span class="at">method =</span> <span class="st">&quot;average&quot;</span>)</span>
<span id="cb2-4"><a href="#cb2-4" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="fu">as.dendrogram</span>(d3)</span></code></pre></div>
<p>Or, you can also wrap the function calls inside each other:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="fu">as.dendrogram</span>(<span class="fu">hclust</span>(<span class="fu">dist</span>(<span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>)), <span class="at">method =</span> <span class="st">&quot;average&quot;</span>))</span></code></pre></div>
<p>However, both solutions are not ideal: the first solution includes
redundant intermediate objects, while the second is difficult to read
(since the order of the operations is from inside to out, while the
arguments are a long way away from the function).</p>
<p>To get around this problem, dendextend encourages the use of the
<code>%&gt;%</code> (“pipe” or “chaining”) operator (imported from the
magrittr package). This turns <code>x %&gt;% f(y)</code> into
<code>f(x, y)</code> so you can use it to rewrite (“chain”) multiple
operations such that they can be read from left-to-right,
top-to-bottom.</p>
<p>For example, the following will be written as it would be
explained:</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="sc">%&gt;%</span> <span class="co"># take the a vector from 1 to 5</span></span>
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a> dist <span class="sc">%&gt;%</span> <span class="co"># calculate a distance matrix, </span></span>
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">&quot;average&quot;</span>) <span class="sc">%&gt;%</span> <span class="co"># on it compute hierarchical clustering using the &quot;average&quot; method, </span></span>
<span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a> as.dendrogram <span class="co"># and lastly, turn that object into a dendrogram.</span></span></code></pre></div>
<p>For more details, you may look at:</p>
<ul>
<li><a href="https://cran.r-project.org/package=magrittr">magrittr on
CRAN</a></li>
<li><a href="https://CRAN.R-project.org/package=magrittr/vignettes/magrittr.html">Introduction
to the magrittr package</a></li>
<li><a href="https://www.r-statistics.com/2014/08/simpler-r-coding-with-pipes-the-present-and-future-of-the-magrittr-package/">Simpler
R coding with pipes &gt; the present and future of the magrittr
package</a></li>
</ul>
</div>
<div id="a-dendrogram-is-a-nested-list-of-lists-with-attributes" class="section level3">
<h3>A dendrogram is a nested list of lists with attributes</h3>
<p>The first step is working with dendrograms, is to understand that
they are just a <strong>nested list of lists with attributes</strong>.
Let us explore this for the following (tiny) tree:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a><span class="co"># Create a dend:</span></span>
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">2</span> <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a><span class="co"># and plot it:</span></span>
<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> plot</span></code></pre></div>
<p><img role="img" src="
<p>And here is its structure (a nested list of lists with
attributes):</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> unclass <span class="sc">%&gt;%</span> str</span></code></pre></div>
<pre><code>#&gt; List of 2
#&gt; $ : int 1
#&gt; ..- attr(*, &quot;label&quot;)= int 1
#&gt; ..- attr(*, &quot;members&quot;)= int 1
#&gt; ..- attr(*, &quot;height&quot;)= num 0
#&gt; ..- attr(*, &quot;leaf&quot;)= logi TRUE
#&gt; $ : int 2
#&gt; ..- attr(*, &quot;label&quot;)= int 2
#&gt; ..- attr(*, &quot;members&quot;)= int 1
#&gt; ..- attr(*, &quot;height&quot;)= num 0
#&gt; ..- attr(*, &quot;leaf&quot;)= logi TRUE
#&gt; - attr(*, &quot;members&quot;)= int 2
#&gt; - attr(*, &quot;midpoint&quot;)= num 0.5
#&gt; - attr(*, &quot;height&quot;)= num 1</code></pre>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> class</span></code></pre></div>
<pre><code>#&gt; [1] &quot;dendrogram&quot;</code></pre>
</div>
<div id="installation" class="section level3">
<h3>Installation</h3>
<p>To install the stable version on CRAN use:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a><span class="fu">install.packages</span>(<span class="st">&#39;dendextend&#39;</span>)</span></code></pre></div>
<p>To install the GitHub version:</p>
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a>require2 <span class="ot">&lt;-</span> <span class="cf">function</span> (package, ...) {</span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a> <span class="cf">if</span> (<span class="sc">!</span><span class="fu">require</span>(package)) <span class="fu">install.packages</span>(package); <span class="fu">library</span>(package)</span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a>}</span>
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a></span>
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a><span class="do">## require2(&#39;installr&#39;)</span></span>
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="do">## install.Rtools() # run this if you are using Windows and don&#39;t have Rtools installed</span></span>
<span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a></span>
<span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="co"># Load devtools:</span></span>
<span id="cb11-9"><a href="#cb11-9" tabindex="-1"></a><span class="fu">require2</span>(<span class="st">&quot;devtools&quot;</span>)</span>
<span id="cb11-10"><a href="#cb11-10" tabindex="-1"></a>devtools<span class="sc">::</span><span class="fu">install_github</span>(<span class="st">&#39;talgalili/dendextend&#39;</span>)</span>
<span id="cb11-11"><a href="#cb11-11" tabindex="-1"></a><span class="sc">&lt;!--</span> <span class="fu">require2</span>(<span class="st">&quot;Rcpp&quot;</span>) <span class="sc">-</span><span class="ot">-&gt;</span></span>
<span id="cb11-12"><a href="#cb11-12" tabindex="-1"></a></span>
<span id="cb11-13"><a href="#cb11-13" tabindex="-1"></a><span class="co"># Having colorspace is also useful, since it is used</span></span>
<span id="cb11-14"><a href="#cb11-14" tabindex="-1"></a><span class="co"># In various examples in the vignettes</span></span>
<span id="cb11-15"><a href="#cb11-15" tabindex="-1"></a><span class="fu">require2</span>(<span class="st">&quot;colorspace&quot;</span>)</span></code></pre></div>
<p>And then you may load the package using:</p>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a><span class="fu">library</span>(dendextend)</span></code></pre></div>
</div>
</div>
<div id="how-to-explore-a-dendrograms-parameters" class="section level2">
<h2>How to explore a dendrograms parameters</h2>
<div id="taking-a-first-look-at-a-dendrogram" class="section level3">
<h3>Taking a first look at a dendrogram</h3>
<p>For the following simple tree:</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="co"># Create a dend:</span></span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a><span class="co"># Plot it:</span></span>
<span id="cb13-4"><a href="#cb13-4" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> plot</span></code></pre></div>
<p><img role="img" src="
<p>Here are some basic parameters we can get:</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> labels <span class="co"># get the labels of the tree</span></span></code></pre></div>
<pre><code>#&gt; [1] 1 2 5 3 4</code></pre>
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> nleaves <span class="co"># get the number of leaves of the tree</span></span></code></pre></div>
<pre><code>#&gt; [1] 5</code></pre>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> nnodes <span class="co"># get the number of nodes in the tree (including leaves)</span></span></code></pre></div>
<pre><code>#&gt; [1] 9</code></pre>
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> head <span class="co"># A combination of &quot;str&quot; with &quot;head&quot;</span></span></code></pre></div>
<pre><code>#&gt; --[dendrogram w/ 2 branches and 5 members at h = 4]
#&gt; |--[dendrogram w/ 2 branches and 2 members at h = 1]
#&gt; | |--leaf 1
#&gt; | `--leaf 2
#&gt; `--[dendrogram w/ 2 branches and 3 members at h = 2]
#&gt; |--leaf 5
#&gt; `--[dendrogram w/ 2 branches and 2 members at h = 1]
#&gt; |--leaf 3
#&gt; `--leaf 4
#&gt; etc...</code></pre>
<p>Next let us look at more sophisticated outputs.</p>
</div>
<div id="getting-nodes-attributes-in-a-depth-first-search" class="section level3">
<h3>Getting nodes attributes in a depth-first search</h3>
<p>When extracting (or inserting) attributes from a dendrograms nodes,
it is often in a “depth-first search”. <a href="https://en.wikipedia.org/wiki/Depth-first_search">Depth-first
search</a> is when an algorithm for traversing or searching tree or
graph data structures. One starts at the root and explores as far as
possible along each branch before backtracking.</p>
<p>Here is a plot of a tree, illustrating the order in which you should
read the “nodes attributes”:</p>
<p><img role="img" src="
<p>We can get several nodes attributes using <code>get_nodes_attr</code>
(notice the order corresponds with what is shown in the above
figure):</p>
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" tabindex="-1"></a><span class="co"># Create a dend:</span></span>
<span id="cb22-2"><a href="#cb22-2" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb22-3"><a href="#cb22-3" tabindex="-1"></a><span class="co"># Get various attributes</span></span>
<span id="cb22-4"><a href="#cb22-4" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;height&quot;</span>) <span class="co"># node&#39;s height</span></span></code></pre></div>
<pre><code>#&gt; [1] 4 1 0 0 2 0 1 0 0</code></pre>
<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> hang.dendrogram <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;height&quot;</span>) <span class="co"># node&#39;s height (after raising the leaves)</span></span></code></pre></div>
<pre><code>#&gt; [1] 4.0 1.0 0.6 0.6 2.0 1.6 1.0 0.6 0.6</code></pre>
<div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;members&quot;</span>) <span class="co"># number of members (leaves) under that node</span></span></code></pre></div>
<pre><code>#&gt; [1] 5 2 1 1 3 1 2 1 1</code></pre>
<div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;members&quot;</span>, <span class="at">id =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">5</span>)) <span class="co"># number of members for nodes 2 and 5</span></span></code></pre></div>
<pre><code>#&gt; [1] 2 3</code></pre>
<div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;midpoint&quot;</span>) <span class="co"># how much &quot;left&quot; is this node from its left-most child&#39;s location</span></span></code></pre></div>
<pre><code>#&gt; [1] 1.625 0.500 NA NA 0.750 NA 0.500 NA NA</code></pre>
<div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;leaf&quot;</span>) <span class="co"># is this node a leaf</span></span></code></pre></div>
<pre><code>#&gt; [1] NA NA TRUE TRUE NA TRUE NA TRUE TRUE</code></pre>
<div class="sourceCode" id="cb34"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;label&quot;</span>) <span class="co"># what is the label on this node</span></span></code></pre></div>
<pre><code>#&gt; [1] NA NA 1 2 NA 5 NA 3 4</code></pre>
<div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;nodePar&quot;</span>) <span class="co"># empty (for now...)</span></span></code></pre></div>
<pre><code>#&gt; [1] NA NA NA NA NA NA NA NA NA</code></pre>
<div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;edgePar&quot;</span>) <span class="co"># empty (for now...)</span></span></code></pre></div>
<pre><code>#&gt; [1] NA NA NA NA NA NA NA NA NA</code></pre>
<p>A similar function for leaves only is
<code>get_leaves_attr</code></p>
</div>
</div>
<div id="how-to-change-a-dendrogram" class="section level2">
<h2>How to change a dendrogram</h2>
<div id="the-set-function" class="section level3">
<h3>The “set” function</h3>
<p>The fastest way to start changing parameters with dendextend is by
using the <code>set</code> function. It is written as:
<code>set(object, what, value)</code>, and accepts the following
parameters:</p>
<ol style="list-style-type: decimal">
<li><strong>object</strong>: a dendrogram object,</li>
<li><strong>what</strong>: a character indicating what is the property
of the tree that should be set/updated</li>
<li><strong>value</strong>: a vector with the value to set in the tree
(the type of the value depends on the “what”). Many times, vectors which
are too short are recycled.</li>
</ol>
<p>The <strong>what</strong> parameter accepts many options, each uses
some general function in the background. These options deal with labels,
nodes and branches. They are:</p>
<ul>
<li>labels - set the labels (using
<code>labels&lt;-.dendrogram</code>)</li>
<li>labels_colors - set the labels colors (using
<code>color_labels</code>)</li>
<li>labels_cex - set the labels size (using
<code>assign_values_to_leaves_nodePar</code>)</li>
<li>labels_to_character - set the labels to be characters</li>
<li>leaves_pch - set the leaves point type (using
<code>assign_values_to_leaves_nodePar</code>)</li>
<li>leaves_cex - set the leaves point size (using
<code>assign_values_to_leaves_nodePar</code>)</li>
<li>leaves_col - set the leaves point color (using
<code>assign_values_to_leaves_nodePar</code>)</li>
<li>leaves_bg - set the leaves point background color (available only
for pch 21-25, using <code>assign_values_to_nodes_nodePar</code>)</li>
<li>nodes_pch - set the nodes point type (using
<code>assign_values_to_nodes_nodePar</code>)</li>
<li>nodes_cex - set the nodes point size (using
<code>assign_values_to_nodes_nodePar</code>)</li>
<li>nodes_col - set the nodes point color (using
<code>assign_values_to_nodes_nodePar</code>)</li>
<li>nodes_bg - set the nodes point background color (available only for
pch 21-25, using <code>assign_values_to_nodes_nodePar</code>)</li>
<li>hang_leaves - hang the leaves (using
<code>hang.dendrogram</code>)</li>
<li>branches_k_color - color the branches (using
<code>color_branches</code>)</li>
<li>branches_col - set the color of branches (using
<code>assign_values_to_branches_edgePar</code>)</li>
<li>branches_lwd - set the line width of branches (using
<code>assign_values_to_branches_edgePar</code>)</li>
<li>branches_lty - set the line type of branches (using
<code>assign_values_to_branches_edgePar</code>)</li>
<li>by_labels_branches_col - set the color of branches with specific
labels (using <code>branches_attr_by_labels</code>)</li>
<li>by_labels_branches_lwd - set the line width of branches with
specific labels (using <code>branches_attr_by_labels</code>)</li>
<li>by_labels_branches_lty - set the line type of branches with specific
labels (using <code>branches_attr_by_labels</code>)</li>
<li>clear_branches - clear branches attributes (using
<code>remove_branches_edgePar</code>)</li>
<li>clear_leaves - clear leaves attributes (using
<code>remove_branches_edgePar</code>)</li>
</ul>
</div>
<div id="two-simple-trees-to-play-with" class="section level3">
<h3>Two simple trees to play with</h3>
<p>For illustration purposes, we will create several small tree, and
demonstrate these functions on them.</p>
<div class="sourceCode" id="cb40"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1" tabindex="-1"></a>dend13 <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>) <span class="sc">%&gt;%</span> <span class="co"># take some data</span></span>
<span id="cb40-2"><a href="#cb40-2" tabindex="-1"></a> dist <span class="sc">%&gt;%</span> <span class="co"># calculate a distance matrix, </span></span>
<span id="cb40-3"><a href="#cb40-3" tabindex="-1"></a> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">&quot;average&quot;</span>) <span class="sc">%&gt;%</span> <span class="co"># on it compute hierarchical clustering using the &quot;average&quot; method, </span></span>
<span id="cb40-4"><a href="#cb40-4" tabindex="-1"></a> as.dendrogram <span class="co"># and lastly, turn that object into a dendrogram.</span></span>
<span id="cb40-5"><a href="#cb40-5" tabindex="-1"></a><span class="co"># same, but for 5 leaves:</span></span>
<span id="cb40-6"><a href="#cb40-6" tabindex="-1"></a>dend15 <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">&quot;average&quot;</span>) <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb40-7"><a href="#cb40-7" tabindex="-1"></a></span>
<span id="cb40-8"><a href="#cb40-8" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb40-9"><a href="#cb40-9" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main=</span><span class="st">&quot;dend13&quot;</span>)</span>
<span id="cb40-10"><a href="#cb40-10" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main=</span><span class="st">&quot;dend15&quot;</span>)</span>
<span id="cb40-11"><a href="#cb40-11" tabindex="-1"></a><span class="co"># we could have also used plot(dend)</span></span></code></pre></div>
<p><img role="img" src="
</div>
<div id="setting-a-dendrograms-labels" class="section level3">
<h3>Setting a dendrograms labels</h3>
<p>We can get a vector with the trees labels:</p>
<div class="sourceCode" id="cb41"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1" tabindex="-1"></a><span class="co"># get the labels:</span></span>
<span id="cb41-2"><a href="#cb41-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> labels</span></code></pre></div>
<pre><code>#&gt; [1] 1 2 5 3 4</code></pre>
<div class="sourceCode" id="cb43"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" tabindex="-1"></a><span class="co"># this is just like labels(dend)</span></span></code></pre></div>
<p>Notice how the trees labels are not 1 to 5 by order, since the tree
happened to place them in a different order. We can change the names of
the labels:</p>
<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" tabindex="-1"></a><span class="co"># change the labels, and then print them:</span></span>
<span id="cb44-2"><a href="#cb44-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels&quot;</span>, <span class="fu">c</span>(<span class="dv">111</span><span class="sc">:</span><span class="dv">115</span>)) <span class="sc">%&gt;%</span> labels</span></code></pre></div>
<pre><code>#&gt; [1] 111 112 113 114 115</code></pre>
<div class="sourceCode" id="cb46"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" tabindex="-1"></a><span class="co"># could also be done using:</span></span>
<span id="cb46-2"><a href="#cb46-2" tabindex="-1"></a><span class="co"># labels(dend) &lt;- c(111:115)</span></span></code></pre></div>
<p>We can change the type of labels to be characters. Not doing so may
be a source of various bugs and problems in many functions.</p>
<div class="sourceCode" id="cb47"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb47-1"><a href="#cb47-1" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> labels</span></code></pre></div>
<pre><code>#&gt; [1] 1 2 5 3 4</code></pre>
<div class="sourceCode" id="cb49"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb49-1"><a href="#cb49-1" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_to_char&quot;</span>) <span class="sc">%&gt;%</span> labels</span></code></pre></div>
<pre><code>#&gt; [1] &quot;1&quot; &quot;2&quot; &quot;5&quot; &quot;3&quot; &quot;4&quot;</code></pre>
<p>We may also change their color and size:</p>
<div class="sourceCode" id="cb51"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb51-1"><a href="#cb51-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb51-2"><a href="#cb51-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_col&quot;</span>, <span class="st">&quot;blue&quot;</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Change label&#39;s color&quot;</span>) <span class="co"># change color </span></span>
<span id="cb51-3"><a href="#cb51-3" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Change label&#39;s size&quot;</span>) <span class="co"># change color </span></span></code></pre></div>
<p><img role="img" src="
<p>The function recycles, from left to right, the vector of values we
give it. We can use this to create more complex patterns:</p>
<div class="sourceCode" id="cb52"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" tabindex="-1"></a><span class="co"># Produce a more complex dendrogram:</span></span>
<span id="cb52-2"><a href="#cb52-2" tabindex="-1"></a>dend15_2 <span class="ot">&lt;-</span> dend15 <span class="sc">%&gt;%</span> </span>
<span id="cb52-3"><a href="#cb52-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels&quot;</span>, <span class="fu">c</span>(<span class="dv">111</span><span class="sc">:</span><span class="dv">115</span>)) <span class="sc">%&gt;%</span> <span class="co"># change labels</span></span>
<span id="cb52-4"><a href="#cb52-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels_col&quot;</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>)) <span class="sc">%&gt;%</span> <span class="co"># change color </span></span>
<span id="cb52-5"><a href="#cb52-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels_cex&quot;</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>)) <span class="co"># change size</span></span>
<span id="cb52-6"><a href="#cb52-6" tabindex="-1"></a></span>
<span id="cb52-7"><a href="#cb52-7" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb52-8"><a href="#cb52-8" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Before&quot;</span>)</span>
<span id="cb52-9"><a href="#cb52-9" tabindex="-1"></a>dend15_2 <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;After&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>Notice how these “labels parameters” are nested within the nodePar
attribute:</p>
<div class="sourceCode" id="cb53"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" tabindex="-1"></a><span class="co"># looking at only the left-most node of the &quot;after tree&quot;:</span></span>
<span id="cb53-2"><a href="#cb53-2" tabindex="-1"></a>dend15_2[[<span class="dv">1</span>]][[<span class="dv">1</span>]] <span class="sc">%&gt;%</span> unclass <span class="sc">%&gt;%</span> str </span></code></pre></div>
<pre><code>#&gt; int 1
#&gt; - attr(*, &quot;label&quot;)= int 111
#&gt; - attr(*, &quot;members&quot;)= int 1
#&gt; - attr(*, &quot;height&quot;)= num 0
#&gt; - attr(*, &quot;leaf&quot;)= logi TRUE
#&gt; - attr(*, &quot;nodePar&quot;)=List of 3
#&gt; ..$ lab.col: num 1
#&gt; ..$ pch : logi NA
#&gt; ..$ lab.cex: num 2</code></pre>
<div class="sourceCode" id="cb55"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb55-1"><a href="#cb55-1" tabindex="-1"></a><span class="co"># looking at only the nodePar attributes in this sub-tree:</span></span>
<span id="cb55-2"><a href="#cb55-2" tabindex="-1"></a>dend15_2[[<span class="dv">1</span>]][[<span class="dv">1</span>]] <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;nodePar&quot;</span>) </span></code></pre></div>
<pre><code>#&gt; [,1]
#&gt; lab.col 1
#&gt; pch NA
#&gt; lab.cex 2</code></pre>
<p>When it comes to color, we can also set the parameter “k”, which will
cut the tree into k clusters, and assign a different color to each label
(based on its cluster):</p>
<div class="sourceCode" id="cb57"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb57-1"><a href="#cb57-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb57-2"><a href="#cb57-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_col&quot;</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb57-3"><a href="#cb57-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Recycles color </span><span class="sc">\n</span><span class="st">from left to right&quot;</span>)</span>
<span id="cb57-4"><a href="#cb57-4" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_col&quot;</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>), <span class="at">k=</span><span class="dv">2</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb57-5"><a href="#cb57-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Color labels </span><span class="sc">\n</span><span class="st">per cluster&quot;</span>)</span>
<span id="cb57-6"><a href="#cb57-6" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">h =</span> <span class="dv">2</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="setting-a-dendrograms-nodesleaves-points" class="section level3">
<h3>Setting a dendrograms nodes/leaves (points)</h3>
<p>Each node in a tree can be represented and controlled using the
<code>assign_values_to_nodes_nodePar</code>, and for the special case of
the nodes of leaves, the <code>assign_values_to_leaves_nodePar</code>
function is more appropriate (and faster) to use. We can control the
following properties: pch (point type), cex (point size), and col (point
color). For pch we can additionally set bg (“background”, although its
really a fill for the shape). When bg is set, the outline of the point
is defined by col and the internal fill is determined by bg. For
example:</p>
<div class="sourceCode" id="cb58"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb58-1"><a href="#cb58-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">3</span>))</span>
<span id="cb58-2"><a href="#cb58-2" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;(1) Show the</span><span class="sc">\n</span><span class="st"> nodes (as a dot)&quot;</span>) <span class="co">#1</span></span>
<span id="cb58-3"><a href="#cb58-3" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb58-4"><a href="#cb58-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;(2) Show (larger)</span><span class="sc">\n</span><span class="st"> nodes&quot;</span>) <span class="co">#2</span></span>
<span id="cb58-5"><a href="#cb58-5" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_col&quot;</span>, <span class="dv">3</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb58-6"><a href="#cb58-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;(3) Show (larger+colored)</span><span class="sc">\n</span><span class="st"> nodes&quot;</span>) <span class="co">#3</span></span>
<span id="cb58-7"><a href="#cb58-7" tabindex="-1"></a></span>
<span id="cb58-8"><a href="#cb58-8" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_pch&quot;</span>, <span class="dv">21</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;(4) Show the leaves</span><span class="sc">\n</span><span class="st"> (as empty circles)&quot;</span>) <span class="co">#4</span></span>
<span id="cb58-9"><a href="#cb58-9" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_pch&quot;</span>, <span class="dv">21</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb58-10"><a href="#cb58-10" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;(5) Show (larger)</span><span class="sc">\n</span><span class="st"> leaf circles&quot;</span>) <span class="co">#5</span></span>
<span id="cb58-11"><a href="#cb58-11" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> </span>
<span id="cb58-12"><a href="#cb58-12" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;leaves_pch&quot;</span>, <span class="dv">21</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb58-13"><a href="#cb58-13" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;leaves_bg&quot;</span>, <span class="st">&quot;gold&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb58-14"><a href="#cb58-14" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;leaves_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb58-15"><a href="#cb58-15" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;leaves_col&quot;</span>, <span class="st">&quot;darkred&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb58-16"><a href="#cb58-16" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;(6) Show (larger+colored+filled)</span><span class="sc">\n</span><span class="st"> leaves&quot;</span>) <span class="co">#6</span></span></code></pre></div>
<p><img role="img" src="
<p>And with recycling we can produce more complex outputs:</p>
<div class="sourceCode" id="cb59"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb59-1"><a href="#cb59-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb59-2"><a href="#cb59-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_pch&quot;</span>, <span class="fu">c</span>(<span class="dv">19</span>,<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_cex&quot;</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_col&quot;</span>, <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb59-3"><a href="#cb59-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Adjust nodes&quot;</span>)</span>
<span id="cb59-4"><a href="#cb59-4" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_pch&quot;</span>, <span class="fu">c</span>(<span class="dv">19</span>,<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_cex&quot;</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_col&quot;</span>, <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb59-5"><a href="#cb59-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Adjust nodes</span><span class="sc">\n</span><span class="st">(but only for leaves)&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>Notice how recycling works in a depth-first order (which is just left
to right, when we only adjust the leaves). Here are the nodes
parameters after adjustment:</p>
<div class="sourceCode" id="cb60"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb60-1"><a href="#cb60-1" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_pch&quot;</span>, <span class="fu">c</span>(<span class="dv">19</span>,<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb60-2"><a href="#cb60-2" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;nodes_cex&quot;</span>, <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_col&quot;</span>, <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span> <span class="fu">get_nodes_attr</span>(<span class="st">&quot;nodePar&quot;</span>)</span></code></pre></div>
<pre><code>#&gt; [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
#&gt; pch 19 1 4 19 1 4 19 1 4
#&gt; cex 2 1 2 2 1 2 2 1 2
#&gt; col 3 4 3 4 3 4 3 4 3</code></pre>
<p>We can also change the height of of the leaves by using the
<code>hang.dendrogram</code> function:</p>
<div class="sourceCode" id="cb62"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb62-1"><a href="#cb62-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb62-2"><a href="#cb62-2" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_col&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="co"># adjust the leaves</span></span>
<span id="cb62-3"><a href="#cb62-3" tabindex="-1"></a> hang.dendrogram <span class="sc">%&gt;%</span> <span class="co"># hang the leaves</span></span>
<span id="cb62-4"><a href="#cb62-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Hanging a tree&quot;</span>)</span>
<span id="cb62-5"><a href="#cb62-5" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_col&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="co"># adjust the leaves</span></span>
<span id="cb62-6"><a href="#cb62-6" tabindex="-1"></a> <span class="fu">hang.dendrogram</span>(<span class="at">hang_height =</span> .<span class="dv">6</span>) <span class="sc">%&gt;%</span> <span class="co"># hang the leaves (at some height)</span></span>
<span id="cb62-7"><a href="#cb62-7" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Hanging a tree (but lower)&quot;</span>)</span>
<span id="cb62-8"><a href="#cb62-8" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_cex&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;leaves_col&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="co"># adjust the leaves</span></span>
<span id="cb62-9"><a href="#cb62-9" tabindex="-1"></a> hang.dendrogram <span class="sc">%&gt;%</span> <span class="co"># hang the leaves</span></span>
<span id="cb62-10"><a href="#cb62-10" tabindex="-1"></a> <span class="fu">hang.dendrogram</span>(<span class="at">hang =</span> <span class="sc">-</span><span class="dv">1</span>) <span class="sc">%&gt;%</span> <span class="co"># un-hanging the leaves</span></span>
<span id="cb62-11"><a href="#cb62-11" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Not hanging a tree&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>An example of what this function does to the leaves heights:</p>
<div class="sourceCode" id="cb63"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb63-1"><a href="#cb63-1" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">get_leaves_attr</span>(<span class="st">&quot;height&quot;</span>)</span></code></pre></div>
<pre><code>#&gt; [1] 0 0 0</code></pre>
<div class="sourceCode" id="cb65"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb65-1"><a href="#cb65-1" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> hang.dendrogram <span class="sc">%&gt;%</span> <span class="fu">get_leaves_attr</span>(<span class="st">&quot;height&quot;</span>)</span></code></pre></div>
<pre><code>#&gt; [1] 1.35 0.85 0.85</code></pre>
<p>We can also control the general heights of nodes using
<code>raise.dendrogram</code>:</p>
<div class="sourceCode" id="cb67"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb67-1"><a href="#cb67-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb67-2"><a href="#cb67-2" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;First tree&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
<span id="cb67-3"><a href="#cb67-3" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> </span>
<span id="cb67-4"><a href="#cb67-4" tabindex="-1"></a> <span class="fu">raise.dendrogram</span> (<span class="sc">-</span><span class="dv">1</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb67-5"><a href="#cb67-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;One point lower&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
<span id="cb67-6"><a href="#cb67-6" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> </span>
<span id="cb67-7"><a href="#cb67-7" tabindex="-1"></a> <span class="fu">raise.dendrogram</span> (<span class="dv">1</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb67-8"><a href="#cb67-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;One point higher&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span></code></pre></div>
<p><img role="img" src="
<p>If you wish to make the branches under the root have the same height,
you can use the <code>flatten.dendrogram</code> function.</p>
</div>
<div id="setting-a-dendrograms-branches" class="section level3">
<h3>Setting a dendrograms branches</h3>
<div id="adjusting-all-branches" class="section level4">
<h4>Adjusting all branches</h4>
<p>Similar to adjusting nodes, we can also control line width (lwd),
line type (lty), and color (col) for branches:</p>
<div class="sourceCode" id="cb68"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb68-1"><a href="#cb68-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb68-2"><a href="#cb68-2" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="dv">4</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Thick branches&quot;</span>)</span>
<span id="cb68-3"><a href="#cb68-3" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lty&quot;</span>, <span class="dv">3</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Dashed branches&quot;</span>)</span>
<span id="cb68-4"><a href="#cb68-4" tabindex="-1"></a>dend13 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_col&quot;</span>, <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Red branches&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>We may also use recycling to create more complex patterns:</p>
<div class="sourceCode" id="cb69"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb69-1"><a href="#cb69-1" tabindex="-1"></a><span class="co"># Produce a more complex dendrogram:</span></span>
<span id="cb69-2"><a href="#cb69-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> </span>
<span id="cb69-3"><a href="#cb69-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="fu">c</span>(<span class="dv">4</span>,<span class="dv">1</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb69-4"><a href="#cb69-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_lty&quot;</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">3</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb69-5"><a href="#cb69-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_col&quot;</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb69-6"><a href="#cb69-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Complex branches&quot;</span>, <span class="at">edge.root =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<p><img role="img" src="
<p>Notice how the first branch (the root) is considered when going
through and creating the tree, but it is <strong>ignored</strong> in the
actual plotting (this is actually a “missing feature” in
<code>plot.dendrogram</code>).</p>
</div>
<div id="coloring-branches-based-on-clustering" class="section level4">
<h4>Coloring branches based on clustering</h4>
<p>We may also control the colors of the branches based on using
clustering:</p>
<div class="sourceCode" id="cb70"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb70-1"><a href="#cb70-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb70-2"><a href="#cb70-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Nice defaults&quot;</span>)</span>
<span id="cb70-3"><a href="#cb70-3" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">value =</span> <span class="dv">3</span><span class="sc">:</span><span class="dv">1</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb70-4"><a href="#cb70-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Controlling branches&#39; colors</span><span class="sc">\n</span><span class="st">(via clustering)&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb71"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb71-1"><a href="#cb71-1" tabindex="-1"></a><span class="co"># This is like using the `color_branches` function</span></span></code></pre></div>
</div>
<div id="adjusting-branches-based-on-labels" class="section level4">
<h4>Adjusting branches based on labels</h4>
<p>The most powerful way to control branches is through the
<code>branches_attr_by_labels</code> function (with variations through
the <code>set</code> function). The function allows you to change
col/lwd/lty of branches if they match some “labels condition”. Follow
carefully:</p>
<div class="sourceCode" id="cb72"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb72-1"><a href="#cb72-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb72-2"><a href="#cb72-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;by_labels_branches_col&quot;</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb72-3"><a href="#cb72-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Adjust the branch</span><span class="sc">\n</span><span class="st"> if ALL (default) of its</span><span class="sc">\n</span><span class="st"> labels are in the list&quot;</span>)</span>
<span id="cb72-4"><a href="#cb72-4" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;by_labels_branches_col&quot;</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">type =</span> <span class="st">&quot;any&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb72-5"><a href="#cb72-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Adjust the branch</span><span class="sc">\n</span><span class="st"> if ANY of its</span><span class="sc">\n</span><span class="st"> labels are in the list&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>We can use this to change the size/type/color of the branches:</p>
<div class="sourceCode" id="cb73"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb73-1"><a href="#cb73-1" tabindex="-1"></a><span class="co"># Using &quot;Inf&quot; in &quot;TF_values&quot; means to let the parameters stay as they are.</span></span>
<span id="cb73-2"><a href="#cb73-2" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb73-3"><a href="#cb73-3" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;by_labels_branches_col&quot;</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">TF_values =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="cn">Inf</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb73-4"><a href="#cb73-4" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Change colors&quot;</span>)</span>
<span id="cb73-5"><a href="#cb73-5" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;by_labels_branches_lwd&quot;</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">TF_values =</span> <span class="fu">c</span>(<span class="dv">8</span>,<span class="dv">1</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb73-6"><a href="#cb73-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Change line width&quot;</span>)</span>
<span id="cb73-7"><a href="#cb73-7" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;by_labels_branches_lty&quot;</span>, <span class="at">value =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>), <span class="at">TF_values =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="cn">Inf</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb73-8"><a href="#cb73-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Change line type&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="highlighting-branches-different-heights-using-line-width-and-color" class="section level4">
<h4>Highlighting branches different heights using line width and
color</h4>
<p>The <code>highlight_branches</code> function helps to more easily see
the topological structure of a tree, by adjusting branches appearence
(color and line width) based on their height in the tree. For
example:</p>
<div class="sourceCode" id="cb74"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb74-1"><a href="#cb74-1" tabindex="-1"></a>dat <span class="ot">&lt;-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">20</span>,<span class="sc">-</span><span class="dv">5</span>]</span>
<span id="cb74-2"><a href="#cb74-2" tabindex="-1"></a>hca <span class="ot">&lt;-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(dat))</span>
<span id="cb74-3"><a href="#cb74-3" tabindex="-1"></a>hca2 <span class="ot">&lt;-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(dat), <span class="at">method =</span> <span class="st">&quot;single&quot;</span>)</span>
<span id="cb74-4"><a href="#cb74-4" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="fu">as.dendrogram</span>(hca)</span>
<span id="cb74-5"><a href="#cb74-5" tabindex="-1"></a>dend2 <span class="ot">&lt;-</span> <span class="fu">as.dendrogram</span>(hca2)</span>
<span id="cb74-6"><a href="#cb74-6" tabindex="-1"></a></span>
<span id="cb74-7"><a href="#cb74-7" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb74-8"><a href="#cb74-8" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> highlight_branches_col <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Coloring branches&quot;</span>)</span>
<span id="cb74-9"><a href="#cb74-9" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> highlight_branches_lwd <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Emphasizing line-width&quot;</span>)</span>
<span id="cb74-10"><a href="#cb74-10" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> highlight_branches <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Emphasizing color</span><span class="sc">\n</span><span class="st"> and line-width&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>Tanglegrams are even easier to compare when using</p>
<div class="sourceCode" id="cb75"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb75-1"><a href="#cb75-1" tabindex="-1"></a><span class="fu">library</span>(viridis)</span>
<span id="cb75-2"><a href="#cb75-2" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb75-3"><a href="#cb75-3" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> highlight_branches_col <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Coloring branches </span><span class="sc">\n</span><span class="st"> (default is reversed viridis)&quot;</span>)</span>
<span id="cb75-4"><a href="#cb75-4" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">highlight_branches_col</span>(<span class="fu">viridis</span>(<span class="dv">100</span>)) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;It is better to use </span><span class="sc">\n</span><span class="st"> lighter colors in the leaves&quot;</span>)</span>
<span id="cb75-5"><a href="#cb75-5" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">highlight_branches_col</span>(<span class="fu">rev</span>(<span class="fu">magma</span>(<span class="dv">1000</span>))) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;The magma color pallatte</span><span class="sc">\n</span><span class="st"> is also good&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb76"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb76-1"><a href="#cb76-1" tabindex="-1"></a>dl <span class="ot">&lt;-</span> <span class="fu">dendlist</span>(dend, dend2)</span>
<span id="cb76-2"><a href="#cb76-2" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">sort =</span> <span class="cn">TRUE</span>, <span class="at">common_subtrees_color_lines =</span> <span class="cn">FALSE</span>, <span class="at">highlight_distinct_edges =</span> <span class="cn">FALSE</span>, <span class="at">highlight_branches_lwd =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb77"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb77-1"><a href="#cb77-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dl)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb78"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb78-1"><a href="#cb78-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">fast =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb79"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb79-1"><a href="#cb79-1" tabindex="-1"></a>dl <span class="ot">&lt;-</span> <span class="fu">dendlist</span>(<span class="fu">highlight_branches</span>(dend), <span class="fu">highlight_branches</span>(dend2))</span>
<span id="cb79-2"><a href="#cb79-2" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">sort =</span> <span class="cn">TRUE</span>, <span class="at">common_subtrees_color_lines =</span> <span class="cn">FALSE</span>, <span class="at">highlight_distinct_edges =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb80"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb80-1"><a href="#cb80-1" tabindex="-1"></a><span class="co"># dend %&gt;% set(&quot;highlight_branches_col&quot;) %&gt;% plot</span></span>
<span id="cb80-2"><a href="#cb80-2" tabindex="-1"></a></span>
<span id="cb80-3"><a href="#cb80-3" tabindex="-1"></a>dl <span class="ot">&lt;-</span> <span class="fu">dendlist</span>(dend, dend2) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;highlight_branches_col&quot;</span>)</span>
<span id="cb80-4"><a href="#cb80-4" tabindex="-1"></a><span class="fu">tanglegram</span>(dl, <span class="at">sort =</span> <span class="cn">TRUE</span>, <span class="at">common_subtrees_color_lines =</span> <span class="cn">FALSE</span>, <span class="at">highlight_distinct_edges =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
</div>
</div>
<div id="changing-a-dendrograms-structure" class="section level3">
<h3>Changing a dendrograms structure</h3>
<div id="rotation" class="section level4">
<h4>Rotation</h4>
<p>A dendrogram is an object which can be rotated on its hinges without
changing its topology. Rotating a dendrogram in base R can be done using
the <code>reorder</code> function. The problem with this function is
that it is not very intuitive. For this reason the <code>rotate</code>
function was written. It has two main arguments: the “object” (a
dendrogram), and the “order” we wish to rotate it by. The “order”
parameter can be either a numeric vector, used in a similar way we would
order a simple character vector. Or, the order parameter can also be a
character vector of the labels of the tree, given in the new desired
order of the tree. It is also worth noting that some order are
impossible to achieve for a given trees topology. In such cases, the
function will do its “best” to get as close as possible to the requested
rotation.</p>
<div class="sourceCode" id="cb81"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb81-1"><a href="#cb81-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb81-2"><a href="#cb81-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> </span>
<span id="cb81-3"><a href="#cb81-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels_colors&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb81-4"><a href="#cb81-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb81-5"><a href="#cb81-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;First tree&quot;</span>)</span>
<span id="cb81-6"><a href="#cb81-6" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span></span>
<span id="cb81-7"><a href="#cb81-7" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels_colors&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb81-8"><a href="#cb81-8" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb81-9"><a href="#cb81-9" tabindex="-1"></a> <span class="fu">rotate</span>(<span class="fu">as.character</span>(<span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)) <span class="sc">%&gt;%</span> <span class="co">#rotate to match labels new order</span></span>
<span id="cb81-10"><a href="#cb81-10" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Rotated tree</span><span class="sc">\n</span><span class="st"> based on labels&quot;</span>)</span>
<span id="cb81-11"><a href="#cb81-11" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> </span>
<span id="cb81-12"><a href="#cb81-12" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels_colors&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb81-13"><a href="#cb81-13" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb81-14"><a href="#cb81-14" tabindex="-1"></a> <span class="fu">rotate</span>(<span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>) <span class="sc">%&gt;%</span> <span class="co"># the fifth label to go first is &quot;4&quot;</span></span>
<span id="cb81-15"><a href="#cb81-15" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Rotated tree</span><span class="sc">\n</span><span class="st"> based on order&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>A new convenience S3 function for <code>sort</code>
(<code>sort.dendrogram</code>) was added:</p>
<div class="sourceCode" id="cb82"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb82-1"><a href="#cb82-1" tabindex="-1"></a>dend110 <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span><span class="sc">:</span><span class="dv">5</span>, <span class="dv">7</span>,<span class="dv">9</span>,<span class="dv">10</span>) <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">&quot;average&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb82-2"><a href="#cb82-2" tabindex="-1"></a> as.dendrogram <span class="sc">%&gt;%</span> color_labels <span class="sc">%&gt;%</span> color_branches</span>
<span id="cb82-3"><a href="#cb82-3" tabindex="-1"></a></span>
<span id="cb82-4"><a href="#cb82-4" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb82-5"><a href="#cb82-5" tabindex="-1"></a>dend110 <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Original tree&quot;</span>)</span>
<span id="cb82-6"><a href="#cb82-6" tabindex="-1"></a>dend110 <span class="sc">%&gt;%</span> sort <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;labels sort&quot;</span>)</span>
<span id="cb82-7"><a href="#cb82-7" tabindex="-1"></a>dend110 <span class="sc">%&gt;%</span> <span class="fu">sort</span>(<span class="at">type =</span> <span class="st">&quot;nodes&quot;</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;nodes (ladderize) sort&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="unbranching" class="section level4">
<h4>Unbranching</h4>
<p>We can unbranch a tree:</p>
<div class="sourceCode" id="cb83"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb83-1"><a href="#cb83-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb83-2"><a href="#cb83-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;First tree&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
<span id="cb83-3"><a href="#cb83-3" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> </span>
<span id="cb83-4"><a href="#cb83-4" tabindex="-1"></a> unbranch <span class="sc">%&gt;%</span> </span>
<span id="cb83-5"><a href="#cb83-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Unbranched tree&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
<span id="cb83-6"><a href="#cb83-6" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> </span>
<span id="cb83-7"><a href="#cb83-7" tabindex="-1"></a> <span class="fu">unbranch</span>(<span class="dv">2</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb83-8"><a href="#cb83-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Unbranched tree (2)&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="pruning" class="section level4">
<h4>Pruning</h4>
<p>We can prune a tree based on the labels:</p>
<div class="sourceCode" id="cb84"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb84-1"><a href="#cb84-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb84-2"><a href="#cb84-2" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_colors&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb84-3"><a href="#cb84-3" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;First tree&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span>
<span id="cb84-4"><a href="#cb84-4" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_colors&quot;</span>) <span class="sc">%&gt;%</span></span>
<span id="cb84-5"><a href="#cb84-5" tabindex="-1"></a> <span class="fu">prune</span>(<span class="fu">c</span>(<span class="st">&quot;1&quot;</span>,<span class="st">&quot;5&quot;</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb84-6"><a href="#cb84-6" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Prunned tree&quot;</span>, <span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>,<span class="dv">3</span>))</span></code></pre></div>
<p><img role="img" src="
<p>For pruning two trees to have matching labels, we can use the
<code>intersect_trees</code> function:</p>
<div class="sourceCode" id="cb85"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb85-1"><a href="#cb85-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb85-2"><a href="#cb85-2" tabindex="-1"></a>dend_intersected <span class="ot">&lt;-</span> <span class="fu">intersect_trees</span>(dend13, dend15)</span>
<span id="cb85-3"><a href="#cb85-3" tabindex="-1"></a>dend_intersected[[<span class="dv">1</span>]] <span class="sc">%&gt;%</span> plot</span>
<span id="cb85-4"><a href="#cb85-4" tabindex="-1"></a>dend_intersected[[<span class="dv">2</span>]] <span class="sc">%&gt;%</span> plot</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="collapse-branches" class="section level4">
<h4>Collapse branches</h4>
<p>We can collapse branches under a tolerance level using the
<code>collapse_branch</code> function:</p>
<div class="sourceCode" id="cb86"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb86-1"><a href="#cb86-1" tabindex="-1"></a><span class="co"># ladderize is like sort(..., type = &quot;node&quot;)</span></span>
<span id="cb86-2"><a href="#cb86-2" tabindex="-1"></a>dend <span class="ot">&lt;-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb86-3"><a href="#cb86-3" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>))</span>
<span id="cb86-4"><a href="#cb86-4" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> ladderize <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>); <span class="fu">abline</span>(<span class="at">v =</span> .<span class="dv">2</span>, <span class="at">col =</span> <span class="dv">2</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span>
<span id="cb86-5"><a href="#cb86-5" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">collapse_branch</span>(<span class="at">tol =</span> <span class="fl">0.2</span>) <span class="sc">%&gt;%</span> ladderize <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>)</span>
<span id="cb86-6"><a href="#cb86-6" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">collapse_branch</span>(<span class="at">tol =</span> <span class="fl">0.2</span>) <span class="sc">%&gt;%</span> ladderize <span class="sc">%&gt;%</span> <span class="fu">hang.dendrogram</span>(<span class="at">hang =</span> <span class="dv">0</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
</div>
<div id="adding-extra-bars-and-rectangles" class="section level3">
<h3>Adding extra bars and rectangles</h3>
<div id="adding-colored-rectangles" class="section level4">
<h4>Adding colored rectangles</h4>
<p>Earlier we have seen how to highlight clusters in a dendrogram by
coloring branches. We can also draw rectangles around the branches of a
dendrogram in order to highlight the corresponding clusters. First the
dendrogram is cut at a certain level, then a rectangle is drawn around
selected branches. This is done using the <code>rect.dendrogram</code>,
which is modeled based on the <code>rect.hclust</code> function. One
advantage of <code>rect.dendrogram</code> over <code>rect.hclust</code>,
is that it also works on horizontally plotted trees:</p>
<div class="sourceCode" id="cb87"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb87-1"><a href="#cb87-1" tabindex="-1"></a><span class="fu">layout</span>(<span class="fu">t</span>(<span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">2</span>)))</span>
<span id="cb87-2"><a href="#cb87-2" tabindex="-1"></a></span>
<span id="cb87-3"><a href="#cb87-3" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>) <span class="sc">%&gt;%</span> plot</span>
<span id="cb87-4"><a href="#cb87-4" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">rect.dendrogram</span>(<span class="at">k=</span><span class="dv">3</span>, </span>
<span id="cb87-5"><a href="#cb87-5" tabindex="-1"></a> <span class="at">border =</span> <span class="dv">8</span>, <span class="at">lty =</span> <span class="dv">5</span>, <span class="at">lwd =</span> <span class="dv">2</span>)</span>
<span id="cb87-6"><a href="#cb87-6" tabindex="-1"></a></span>
<span id="cb87-7"><a href="#cb87-7" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">horiz =</span> <span class="cn">TRUE</span>)</span>
<span id="cb87-8"><a href="#cb87-8" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> <span class="fu">rect.dendrogram</span>(<span class="at">k=</span><span class="dv">3</span>, <span class="at">horiz =</span> <span class="cn">TRUE</span>,</span>
<span id="cb87-9"><a href="#cb87-9" tabindex="-1"></a> <span class="at">border =</span> <span class="dv">8</span>, <span class="at">lty =</span> <span class="dv">5</span>, <span class="at">lwd =</span> <span class="dv">2</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="adding-colored-bars" class="section level4">
<h4>Adding colored bars</h4>
<p>Adding colored bars to a dendrogram may be useful to show clusters or
some outside categorization of the items. For example:</p>
<div class="sourceCode" id="cb88"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb88-1"><a href="#cb88-1" tabindex="-1"></a>is_odd <span class="ot">&lt;-</span> <span class="fu">ifelse</span>(<span class="fu">labels</span>(dend15) <span class="sc">%%</span> <span class="dv">2</span>, <span class="dv">2</span>,<span class="dv">3</span>)</span>
<span id="cb88-2"><a href="#cb88-2" tabindex="-1"></a>is_345 <span class="ot">&lt;-</span> <span class="fu">ifelse</span>(<span class="fu">labels</span>(dend15) <span class="sc">&gt;</span> <span class="dv">2</span>, <span class="dv">3</span>,<span class="dv">4</span>)</span>
<span id="cb88-3"><a href="#cb88-3" tabindex="-1"></a>is_12 <span class="ot">&lt;-</span> <span class="fu">ifelse</span>(<span class="fu">labels</span>(dend15) <span class="sc">&lt;=</span> <span class="dv">2</span>, <span class="dv">3</span>,<span class="dv">4</span>)</span>
<span id="cb88-4"><a href="#cb88-4" tabindex="-1"></a>k_3 <span class="ot">&lt;-</span> <span class="fu">cutree</span>(dend15,<span class="at">k =</span> <span class="dv">3</span>, <span class="at">order_clusters_as_data =</span> <span class="cn">FALSE</span>) </span>
<span id="cb88-5"><a href="#cb88-5" tabindex="-1"></a><span class="co"># The FALSE above makes sure we get the clusters in the order of the</span></span>
<span id="cb88-6"><a href="#cb88-6" tabindex="-1"></a><span class="co"># dendrogram, and not in that of the original data. It is like:</span></span>
<span id="cb88-7"><a href="#cb88-7" tabindex="-1"></a><span class="co"># cutree(dend15, k = 3)[order.dendrogram(dend15)]</span></span>
<span id="cb88-8"><a href="#cb88-8" tabindex="-1"></a>the_bars <span class="ot">&lt;-</span> <span class="fu">cbind</span>(is_odd, is_345, is_12, k_3)</span>
<span id="cb88-9"><a href="#cb88-9" tabindex="-1"></a>the_bars[the_bars<span class="sc">==</span><span class="dv">2</span>] <span class="ot">&lt;-</span> <span class="dv">8</span></span>
<span id="cb88-10"><a href="#cb88-10" tabindex="-1"></a></span>
<span id="cb88-11"><a href="#cb88-11" tabindex="-1"></a>dend15 <span class="sc">%&gt;%</span> plot</span>
<span id="cb88-12"><a href="#cb88-12" tabindex="-1"></a><span class="fu">colored_bars</span>(<span class="at">colors =</span> the_bars, <span class="at">dend =</span> dend15, <span class="at">sort_by_labels_order =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb89"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb89-1"><a href="#cb89-1" tabindex="-1"></a><span class="co"># we use sort_by_labels_order = FALSE since &quot;the_bars&quot; were set based on the</span></span>
<span id="cb89-2"><a href="#cb89-2" tabindex="-1"></a><span class="co"># labels order. The more common use case is when the bars are based on a second variable</span></span>
<span id="cb89-3"><a href="#cb89-3" tabindex="-1"></a><span class="co"># from the same data.frame as dend was created from. Thus, the default </span></span>
<span id="cb89-4"><a href="#cb89-4" tabindex="-1"></a><span class="co"># sort_by_labels_order = TRUE would make more sense.</span></span></code></pre></div>
<p>Another example, based on mtcars (in which the default of
<code>sort_by_labels_order = TRUE</code> makes sense):</p>
<div class="sourceCode" id="cb90"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb90-1"><a href="#cb90-1" tabindex="-1"></a>dend_mtcars <span class="ot">&lt;-</span> mtcars[, <span class="fu">c</span>(<span class="st">&quot;mpg&quot;</span>, <span class="st">&quot;disp&quot;</span>)] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">&quot;average&quot;</span>) <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb90-2"><a href="#cb90-2" tabindex="-1"></a></span>
<span id="cb90-3"><a href="#cb90-3" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">c</span>(<span class="dv">10</span>,<span class="dv">2</span>,<span class="dv">1</span>,<span class="dv">1</span>))</span>
<span id="cb90-4"><a href="#cb90-4" tabindex="-1"></a><span class="fu">plot</span>(dend_mtcars)</span>
<span id="cb90-5"><a href="#cb90-5" tabindex="-1"></a>the_bars <span class="ot">&lt;-</span> <span class="fu">ifelse</span>(mtcars<span class="sc">$</span>am, <span class="st">&quot;grey&quot;</span>, <span class="st">&quot;gold&quot;</span>)</span>
<span id="cb90-6"><a href="#cb90-6" tabindex="-1"></a><span class="fu">colored_bars</span>(<span class="at">colors =</span> the_bars, <span class="at">dend =</span> dend_mtcars, <span class="at">rowLabels =</span> <span class="st">&quot;am&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
</div>
</div>
<div id="ggplot2-integration" class="section level2">
<h2>ggplot2 integration</h2>
<p>The core process is to transform a dendrogram into a
<code>ggdend</code> object using <code>as.ggdend</code>, and then plot
it using <code>ggplot</code> (a new S3 <code>ggplot.ggdend</code>
function is available). These two steps can be done in one command with
either the function <code>ggplot</code> or <code>ggdend</code>.</p>
<p>The reason we want to have <code>as.ggdend</code> (and not only
<code>ggplot.dendrogram</code>), is (1) so that you could create your
own mapping of <code>ggdend</code> and, (2) since <code>as.ggdend</code>
might be slow for large trees, it is probably better to be able to run
it only once for such cases.</p>
<p>A <code>ggdend</code> class object is a list with 3 components:
segments, labels, nodes. Each one contains the graphical parameters from
the original dendrogram, but in a tabular form that can be used by
<code>ggplot2+geom_segment+geom_text</code> to create a dendrogram
plot.</p>
<p>The function <code>prepare.ggdend</code> is used by
<code>plot.ggdend</code> to take the ggdend object and prepare it for
plotting. This is because the defaults of various parameters in
dendrograms are not always stored in the object itself, but are
built-in into the <code>plot.dendrogram</code> function. For example,
the color of the labels is not (by default) specified in the dendrogram
(only if we change it from black to something else). Hence, when taking
the object into a different plotting engine (say ggplot2), we want to
prepare the object by filling-in various defaults. This function is
automatically invoked within the <code>plot.ggdend</code> function. You
would probably use it only if youd wish to build your own ggplot2
mapping.</p>
<div class="sourceCode" id="cb91"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb91-1"><a href="#cb91-1" tabindex="-1"></a><span class="co"># Create a complex dend:</span></span>
<span id="cb91-2"><a href="#cb91-2" tabindex="-1"></a>dend <span class="ot">&lt;-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">30</span>,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram <span class="sc">%&gt;%</span></span>
<span id="cb91-3"><a href="#cb91-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">k=</span><span class="dv">3</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="fu">c</span>(<span class="fl">1.5</span>,<span class="dv">1</span>,<span class="fl">1.5</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb91-4"><a href="#cb91-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_lty&quot;</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">3</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb91-5"><a href="#cb91-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels_colors&quot;</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_cex&quot;</span>, <span class="fu">c</span>(.<span class="dv">9</span>,<span class="fl">1.2</span>)) <span class="sc">%&gt;%</span> </span>
<span id="cb91-6"><a href="#cb91-6" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;nodes_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_col&quot;</span>, <span class="fu">c</span>(<span class="st">&quot;orange&quot;</span>, <span class="st">&quot;black&quot;</span>, <span class="st">&quot;plum&quot;</span>, <span class="cn">NA</span>))</span>
<span id="cb91-7"><a href="#cb91-7" tabindex="-1"></a><span class="co"># plot the dend in usual &quot;base&quot; plotting engine:</span></span>
<span id="cb91-8"><a href="#cb91-8" tabindex="-1"></a><span class="fu">plot</span>(dend)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb92"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb92-1"><a href="#cb92-1" tabindex="-1"></a><span class="co"># Now let&#39;s do it in ggplot2 :)</span></span>
<span id="cb92-2"><a href="#cb92-2" tabindex="-1"></a>ggd1 <span class="ot">&lt;-</span> <span class="fu">as.ggdend</span>(dend)</span>
<span id="cb92-3"><a href="#cb92-3" tabindex="-1"></a><span class="fu">library</span>(ggplot2)</span>
<span id="cb92-4"><a href="#cb92-4" tabindex="-1"></a><span class="co"># the nodes are not implemented yet.</span></span>
<span id="cb92-5"><a href="#cb92-5" tabindex="-1"></a><span class="fu">ggplot</span>(ggd1) <span class="co"># reproducing the above plot in ggplot2 :)</span></span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb93"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb93-1"><a href="#cb93-1" tabindex="-1"></a><span class="fu">ggplot</span>(ggd1, <span class="at">horiz =</span> <span class="cn">TRUE</span>, <span class="at">theme =</span> <span class="cn">NULL</span>) <span class="co"># horiz plot (and let&#39;s remove theme) in ggplot2</span></span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb94"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb94-1"><a href="#cb94-1" tabindex="-1"></a><span class="co"># Adding some extra spice to it...</span></span>
<span id="cb94-2"><a href="#cb94-2" tabindex="-1"></a><span class="co"># creating a radial plot:</span></span>
<span id="cb94-3"><a href="#cb94-3" tabindex="-1"></a><span class="co"># ggplot(ggd1) + scale_y_reverse(expand = c(0.2, 0)) + coord_polar(theta=&quot;x&quot;)</span></span>
<span id="cb94-4"><a href="#cb94-4" tabindex="-1"></a><span class="co"># The text doesn&#39;t look so great, so let&#39;s remove it:</span></span>
<span id="cb94-5"><a href="#cb94-5" tabindex="-1"></a><span class="fu">ggplot</span>(ggd1, <span class="at">labels =</span> <span class="cn">FALSE</span>) <span class="sc">+</span> <span class="fu">scale_y_reverse</span>(<span class="at">expand =</span> <span class="fu">c</span>(<span class="fl">0.2</span>, <span class="dv">0</span>)) <span class="sc">+</span> <span class="fu">coord_polar</span>(<span class="at">theta=</span><span class="st">&quot;x&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p><strong>Credit:</strong> These functions are <em>extended</em>
versions of the functions <code>ggdendrogram</code>,
<code>dendro_data</code> (and the hidden <code>dendrogram_data</code>)
from Andrie de Vriess <a href="https://cran.r-project.org/package=ggdendro">ggdendro</a> package.
The motivation for this fork is the need to add more graphical
parameters to the plotted tree. This required a strong mixture of
functions from ggdendro and dendextend (to the point that it seemed
better to just fork the code into its current form).</p>
</div>
<div id="enhancing-other-packages" class="section level2">
<h2>Enhancing other packages</h2>
<p>The dendextend package aims to extend and enhance features from the R
ecosystem. Let us take a look at several examples.</p>
<div id="dendser" class="section level3">
<h3>DendSer</h3>
<p>The DendSer package helps in re-arranging a dendrogram to optimize
visualization-based cost functions. Until now it was only used for
<code>hclust</code> objects, but it can easily be connected to
<code>dendrogram</code> objects by trying to turn the dendrogram into
hclust, on which it runs DendSer. This can be used to rotate the
dendrogram easily by using the <code>rotate_DendSer</code> function:</p>
<div class="sourceCode" id="cb95"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb95-1"><a href="#cb95-1" tabindex="-1"></a><span class="cf">if</span>(<span class="fu">require</span>(DendSer)) {</span>
<span id="cb95-2"><a href="#cb95-2" tabindex="-1"></a> <span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb95-3"><a href="#cb95-3" tabindex="-1"></a> <span class="fu">DendSer.dendrogram</span>(dend15)</span>
<span id="cb95-4"><a href="#cb95-4" tabindex="-1"></a> </span>
<span id="cb95-5"><a href="#cb95-5" tabindex="-1"></a> dend15 <span class="sc">%&gt;%</span> color_branches <span class="sc">%&gt;%</span> plot</span>
<span id="cb95-6"><a href="#cb95-6" tabindex="-1"></a> dend15 <span class="sc">%&gt;%</span> color_branches <span class="sc">%&gt;%</span> rotate_DendSer <span class="sc">%&gt;%</span> plot</span>
<span id="cb95-7"><a href="#cb95-7" tabindex="-1"></a>}</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="gplots" class="section level3">
<h3>gplots</h3>
<p>The gplots package brings us the <code>heatmap.2</code> function. In
it, we can use our modified dendrograms to get more informative
heat-maps:</p>
<div class="sourceCode" id="cb96"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb96-1"><a href="#cb96-1" tabindex="-1"></a><span class="fu">library</span>(gplots)</span>
<span id="cb96-2"><a href="#cb96-2" tabindex="-1"></a></span>
<span id="cb96-3"><a href="#cb96-3" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">as.matrix</span>(datasets<span class="sc">::</span>mtcars)</span>
<span id="cb96-4"><a href="#cb96-4" tabindex="-1"></a></span>
<span id="cb96-5"><a href="#cb96-5" tabindex="-1"></a><span class="fu">heatmap.2</span>(x)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb97"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb97-1"><a href="#cb97-1" tabindex="-1"></a><span class="co"># now let&#39;s spice up the dendrograms a bit:</span></span>
<span id="cb97-2"><a href="#cb97-2" tabindex="-1"></a>Rowv <span class="ot">&lt;-</span> x <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram <span class="sc">%&gt;%</span></span>
<span id="cb97-3"><a href="#cb97-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="dv">4</span>) <span class="sc">%&gt;%</span></span>
<span id="cb97-4"><a href="#cb97-4" tabindex="-1"></a> ladderize</span>
<span id="cb97-5"><a href="#cb97-5" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(x))</span></span>
<span id="cb97-6"><a href="#cb97-6" tabindex="-1"></a>Colv <span class="ot">&lt;-</span> x <span class="sc">%&gt;%</span> t <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram <span class="sc">%&gt;%</span></span>
<span id="cb97-7"><a href="#cb97-7" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">k =</span> <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="dv">4</span>) <span class="sc">%&gt;%</span></span>
<span id="cb97-8"><a href="#cb97-8" tabindex="-1"></a> ladderize</span>
<span id="cb97-9"><a href="#cb97-9" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(t(x)))</span></span>
<span id="cb97-10"><a href="#cb97-10" tabindex="-1"></a></span>
<span id="cb97-11"><a href="#cb97-11" tabindex="-1"></a><span class="fu">heatmap.2</span>(x, <span class="at">Rowv =</span> Rowv, <span class="at">Colv =</span> Colv)</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="nmf" class="section level3">
<h3>NMF</h3>
<p>The same as gplots, NMF offers a heatmap function called
<code>aheatmap</code>. We can update it just as we would
<code>heatmap.2</code>.</p>
<p>Since NMF was removed from CRAN (it could still be installed from
source), the example code is still available but not ran in this
vignette.</p>
<div class="sourceCode" id="cb98"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb98-1"><a href="#cb98-1" tabindex="-1"></a><span class="co"># library(NMF)</span></span>
<span id="cb98-2"><a href="#cb98-2" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb98-3"><a href="#cb98-3" tabindex="-1"></a><span class="co"># x &lt;- as.matrix(datasets::mtcars)</span></span>
<span id="cb98-4"><a href="#cb98-4" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb98-5"><a href="#cb98-5" tabindex="-1"></a><span class="co"># # now let&#39;s spice up the dendrograms a bit:</span></span>
<span id="cb98-6"><a href="#cb98-6" tabindex="-1"></a><span class="co"># Rowv &lt;- x %&gt;% dist %&gt;% hclust %&gt;% as.dendrogram %&gt;%</span></span>
<span id="cb98-7"><a href="#cb98-7" tabindex="-1"></a><span class="co"># set(&quot;branches_k_color&quot;, k = 3) %&gt;% set(&quot;branches_lwd&quot;, 4) %&gt;%</span></span>
<span id="cb98-8"><a href="#cb98-8" tabindex="-1"></a><span class="co"># ladderize</span></span>
<span id="cb98-9"><a href="#cb98-9" tabindex="-1"></a><span class="co"># # rotate_DendSer(ser_weight = dist(x))</span></span>
<span id="cb98-10"><a href="#cb98-10" tabindex="-1"></a><span class="co"># Colv &lt;- x %&gt;% t %&gt;% dist %&gt;% hclust %&gt;% as.dendrogram %&gt;%</span></span>
<span id="cb98-11"><a href="#cb98-11" tabindex="-1"></a><span class="co"># set(&quot;branches_k_color&quot;, k = 2) %&gt;% set(&quot;branches_lwd&quot;, 4) %&gt;%</span></span>
<span id="cb98-12"><a href="#cb98-12" tabindex="-1"></a><span class="co"># ladderize</span></span>
<span id="cb98-13"><a href="#cb98-13" tabindex="-1"></a><span class="co"># # rotate_DendSer(ser_weight = dist(t(x)))</span></span>
<span id="cb98-14"><a href="#cb98-14" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb98-15"><a href="#cb98-15" tabindex="-1"></a><span class="co"># aheatmap(x, Rowv = Rowv, Colv = Colv)</span></span></code></pre></div>
</div>
<div id="heatmaply" class="section level3">
<h3>heatmaply</h3>
<p>The heatmaply package create interactive heat-maps that are usable
from the R console, in the RStudio viewer pane, in R Markdown
documents, and in Shiny apps. By hovering the mouse pointer over a
cell or a dendrogram to show details, drag a rectangle to zoom.</p>
<p>The use is very similar to what weve seen before, we just use
<code>heatmaply</code> instead of <code>heatmap.2</code>:</p>
<div class="sourceCode" id="cb99"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb99-1"><a href="#cb99-1" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">as.matrix</span>(datasets<span class="sc">::</span>mtcars)</span>
<span id="cb99-2"><a href="#cb99-2" tabindex="-1"></a><span class="co"># heatmaply(x)</span></span>
<span id="cb99-3"><a href="#cb99-3" tabindex="-1"></a><span class="co"># now let&#39;s spice up the dendrograms a bit:</span></span>
<span id="cb99-4"><a href="#cb99-4" tabindex="-1"></a>Rowv <span class="ot">&lt;-</span> x <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram <span class="sc">%&gt;%</span></span>
<span id="cb99-5"><a href="#cb99-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">k =</span> <span class="dv">3</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="dv">4</span>) <span class="sc">%&gt;%</span></span>
<span id="cb99-6"><a href="#cb99-6" tabindex="-1"></a> ladderize</span>
<span id="cb99-7"><a href="#cb99-7" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(x))</span></span>
<span id="cb99-8"><a href="#cb99-8" tabindex="-1"></a>Colv <span class="ot">&lt;-</span> x <span class="sc">%&gt;%</span> t <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram <span class="sc">%&gt;%</span></span>
<span id="cb99-9"><a href="#cb99-9" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">k =</span> <span class="dv">2</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="dv">4</span>) <span class="sc">%&gt;%</span></span>
<span id="cb99-10"><a href="#cb99-10" tabindex="-1"></a> ladderize</span>
<span id="cb99-11"><a href="#cb99-11" tabindex="-1"></a><span class="co"># rotate_DendSer(ser_weight = dist(t(x)))</span></span></code></pre></div>
<p>Here we need to use <code>cache=FALSe</code> in the markdown:</p>
<div class="sourceCode" id="cb100"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb100-1"><a href="#cb100-1" tabindex="-1"></a><span class="fu">library</span>(heatmaply)</span>
<span id="cb100-2"><a href="#cb100-2" tabindex="-1"></a><span class="fu">heatmaply</span>(x, <span class="at">Rowv =</span> Rowv, <span class="at">Colv =</span> Colv)</span></code></pre></div>
<p>I avoided running the code from above due to space issues on CRAN.
For live examples, please go to:</p>
<ul>
<li><a href="https://talgalili.github.io/heatmaply/articles/heatmaply.html" class="uri">https://talgalili.github.io/heatmaply/articles/heatmaply.html</a></li>
</ul>
</div>
<div id="dynamictreecut" class="section level3">
<h3>dynamicTreeCut</h3>
<p>The <code>cutreeDynamic</code> function offers a wrapper for two
methods of adaptive branch pruning of hierarchical clustering
dendrograms. The results of which can now be visualized by both updating
the branches, as well as using the <code>colored_bars</code> function
(which was adjusted for use with plots of dendrograms):</p>
<div class="sourceCode" id="cb101"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb101-1"><a href="#cb101-1" tabindex="-1"></a><span class="co"># let&#39;s get the clusters</span></span>
<span id="cb101-2"><a href="#cb101-2" tabindex="-1"></a><span class="fu">library</span>(dynamicTreeCut)</span>
<span id="cb101-3"><a href="#cb101-3" tabindex="-1"></a><span class="fu">data</span>(iris)</span>
<span id="cb101-4"><a href="#cb101-4" tabindex="-1"></a>x <span class="ot">&lt;-</span> iris[,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> as.matrix</span>
<span id="cb101-5"><a href="#cb101-5" tabindex="-1"></a>hc <span class="ot">&lt;-</span> x <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust</span>
<span id="cb101-6"><a href="#cb101-6" tabindex="-1"></a>dend <span class="ot">&lt;-</span> hc <span class="sc">%&gt;%</span> as.dendrogram </span>
<span id="cb101-7"><a href="#cb101-7" tabindex="-1"></a></span>
<span id="cb101-8"><a href="#cb101-8" tabindex="-1"></a><span class="co"># Find special clusters:</span></span>
<span id="cb101-9"><a href="#cb101-9" tabindex="-1"></a>clusters <span class="ot">&lt;-</span> <span class="fu">cutreeDynamic</span>(hc, <span class="at">distM =</span> <span class="fu">as.matrix</span>(<span class="fu">dist</span>(x)), <span class="at">method =</span> <span class="st">&quot;tree&quot;</span>)</span>
<span id="cb101-10"><a href="#cb101-10" tabindex="-1"></a><span class="co"># we need to sort them to the order of the dendrogram:</span></span>
<span id="cb101-11"><a href="#cb101-11" tabindex="-1"></a>clusters <span class="ot">&lt;-</span> clusters[<span class="fu">order.dendrogram</span>(dend)]</span>
<span id="cb101-12"><a href="#cb101-12" tabindex="-1"></a>clusters_numbers <span class="ot">&lt;-</span> <span class="fu">unique</span>(clusters) <span class="sc">-</span> (<span class="dv">0</span> <span class="sc">%in%</span> clusters)</span>
<span id="cb101-13"><a href="#cb101-13" tabindex="-1"></a>n_clusters <span class="ot">&lt;-</span> <span class="fu">length</span>(clusters_numbers)</span>
<span id="cb101-14"><a href="#cb101-14" tabindex="-1"></a></span>
<span id="cb101-15"><a href="#cb101-15" tabindex="-1"></a><span class="fu">library</span>(colorspace)</span>
<span id="cb101-16"><a href="#cb101-16" tabindex="-1"></a>cols <span class="ot">&lt;-</span> <span class="fu">rainbow_hcl</span>(n_clusters)</span>
<span id="cb101-17"><a href="#cb101-17" tabindex="-1"></a>true_species_cols <span class="ot">&lt;-</span> <span class="fu">rainbow_hcl</span>(<span class="dv">3</span>)[<span class="fu">as.numeric</span>(iris[,][<span class="fu">order.dendrogram</span>(dend),<span class="dv">5</span>])]</span>
<span id="cb101-18"><a href="#cb101-18" tabindex="-1"></a>dend2 <span class="ot">&lt;-</span> dend <span class="sc">%&gt;%</span> </span>
<span id="cb101-19"><a href="#cb101-19" tabindex="-1"></a> <span class="fu">branches_attr_by_clusters</span>(clusters, <span class="at">values =</span> cols) <span class="sc">%&gt;%</span> </span>
<span id="cb101-20"><a href="#cb101-20" tabindex="-1"></a> <span class="fu">color_labels</span>(<span class="at">col =</span> true_species_cols)</span>
<span id="cb101-21"><a href="#cb101-21" tabindex="-1"></a><span class="fu">plot</span>(dend2)</span>
<span id="cb101-22"><a href="#cb101-22" tabindex="-1"></a>clusters <span class="ot">&lt;-</span> <span class="fu">factor</span>(clusters)</span>
<span id="cb101-23"><a href="#cb101-23" tabindex="-1"></a><span class="fu">levels</span>(clusters)[<span class="sc">-</span><span class="dv">1</span>] <span class="ot">&lt;-</span> cols[<span class="sc">-</span><span class="dv">5</span>][<span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>,<span class="dv">2</span>,<span class="dv">3</span>)] </span>
<span id="cb101-24"><a href="#cb101-24" tabindex="-1"></a> <span class="co"># Get the clusters to have proper colors.</span></span>
<span id="cb101-25"><a href="#cb101-25" tabindex="-1"></a> <span class="co"># fix the order of the colors to match the branches.</span></span>
<span id="cb101-26"><a href="#cb101-26" tabindex="-1"></a><span class="fu">colored_bars</span>(clusters, dend, <span class="at">sort_by_labels_order =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb102"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb102-1"><a href="#cb102-1" tabindex="-1"></a><span class="co"># here we used sort_by_labels_order = FALSE since the clusters were already sorted based on the dendrogram&#39;s order</span></span></code></pre></div>
</div>
<div id="pvclust" class="section level3">
<h3>pvclust</h3>
<p>The pvclust library calculates “p-values”” for hierarchical
clustering via multiscale bootstrap re-sampling. Hierarchical clustering
is done for given data and p-values are computed for each of the
clusters. The dendextend package lets us reproduce the plot from
pvclust, but with a dendrogram (instead of an hclust object), which also
lets us extend the visualization.</p>
<div class="sourceCode" id="cb103"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb103-1"><a href="#cb103-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>))</span>
<span id="cb103-2"><a href="#cb103-2" tabindex="-1"></a></span>
<span id="cb103-3"><a href="#cb103-3" tabindex="-1"></a><span class="fu">library</span>(pvclust)</span>
<span id="cb103-4"><a href="#cb103-4" tabindex="-1"></a><span class="fu">data</span>(lung) <span class="co"># 916 genes for 73 subjects</span></span>
<span id="cb103-5"><a href="#cb103-5" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">13134</span>)</span>
<span id="cb103-6"><a href="#cb103-6" tabindex="-1"></a>result <span class="ot">&lt;-</span> <span class="fu">pvclust</span>(lung[<span class="dv">1</span><span class="sc">:</span><span class="dv">100</span>, <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span>], </span>
<span id="cb103-7"><a href="#cb103-7" tabindex="-1"></a> <span class="at">method.dist=</span><span class="st">&quot;cor&quot;</span>, <span class="at">method.hclust=</span><span class="st">&quot;average&quot;</span>, <span class="at">nboot=</span><span class="dv">10</span>)</span>
<span id="cb103-8"><a href="#cb103-8" tabindex="-1"></a></span>
<span id="cb103-9"><a href="#cb103-9" tabindex="-1"></a><span class="co"># with pvrect</span></span>
<span id="cb103-10"><a href="#cb103-10" tabindex="-1"></a><span class="fu">plot</span>(result)</span>
<span id="cb103-11"><a href="#cb103-11" tabindex="-1"></a><span class="fu">pvrect</span>(result)</span>
<span id="cb103-12"><a href="#cb103-12" tabindex="-1"></a></span>
<span id="cb103-13"><a href="#cb103-13" tabindex="-1"></a><span class="co"># with a dendrogram of pvrect</span></span>
<span id="cb103-14"><a href="#cb103-14" tabindex="-1"></a>dend <span class="ot">&lt;-</span> <span class="fu">as.dendrogram</span>(result)</span>
<span id="cb103-15"><a href="#cb103-15" tabindex="-1"></a>result <span class="sc">%&gt;%</span> as.dendrogram <span class="sc">%&gt;%</span> </span>
<span id="cb103-16"><a href="#cb103-16" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> reproduced plot with dendrogram&quot;</span>)</span>
<span id="cb103-17"><a href="#cb103-17" tabindex="-1"></a>result <span class="sc">%&gt;%</span> text</span>
<span id="cb103-18"><a href="#cb103-18" tabindex="-1"></a>result <span class="sc">%&gt;%</span> pvrect</span></code></pre></div>
<p><img role="img" src="
<p>Lets color and thicken the branches based on the p-values:</p>
<div class="sourceCode" id="cb104"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb104-1"><a href="#cb104-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">2</span>))</span>
<span id="cb104-2"><a href="#cb104-2" tabindex="-1"></a></span>
<span id="cb104-3"><a href="#cb104-3" tabindex="-1"></a><span class="co"># with a modified dendrogram of pvrect</span></span>
<span id="cb104-4"><a href="#cb104-4" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">pvclust_show_signif</span>(result) <span class="sc">%&gt;%</span> </span>
<span id="cb104-5"><a href="#cb104-5" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Cluster dendrogram </span><span class="sc">\n</span><span class="st"> bp values are highlighted by signif&quot;</span>)</span>
<span id="cb104-6"><a href="#cb104-6" tabindex="-1"></a></span>
<span id="cb104-7"><a href="#cb104-7" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">pvclust_show_signif</span>(result, <span class="at">show_type =</span> <span class="st">&quot;lwd&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb104-8"><a href="#cb104-8" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> bp values are highlighted by signif&quot;</span>)</span>
<span id="cb104-9"><a href="#cb104-9" tabindex="-1"></a>result <span class="sc">%&gt;%</span> text</span>
<span id="cb104-10"><a href="#cb104-10" tabindex="-1"></a>result <span class="sc">%&gt;%</span> <span class="fu">pvrect</span>(<span class="at">alpha=</span><span class="fl">0.95</span>)</span>
<span id="cb104-11"><a href="#cb104-11" tabindex="-1"></a></span>
<span id="cb104-12"><a href="#cb104-12" tabindex="-1"></a></span>
<span id="cb104-13"><a href="#cb104-13" tabindex="-1"></a>dend <span class="sc">%&gt;%</span> <span class="fu">pvclust_show_signif_gradient</span>(result) <span class="sc">%&gt;%</span> </span>
<span id="cb104-14"><a href="#cb104-14" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> bp values are colored by signif&quot;</span>)</span>
<span id="cb104-15"><a href="#cb104-15" tabindex="-1"></a></span>
<span id="cb104-16"><a href="#cb104-16" tabindex="-1"></a>dend <span class="sc">%&gt;%</span></span>
<span id="cb104-17"><a href="#cb104-17" tabindex="-1"></a> <span class="fu">pvclust_show_signif_gradient</span>(result) <span class="sc">%&gt;%</span></span>
<span id="cb104-18"><a href="#cb104-18" tabindex="-1"></a> <span class="fu">pvclust_show_signif</span>(result) <span class="sc">%&gt;%</span></span>
<span id="cb104-19"><a href="#cb104-19" tabindex="-1"></a> <span class="fu">plot</span>(<span class="at">main =</span> <span class="st">&quot;Cluster dendrogram with AU/BP values (%)</span><span class="sc">\n</span><span class="st"> bp values are colored+highlighted by signif&quot;</span>)</span>
<span id="cb104-20"><a href="#cb104-20" tabindex="-1"></a>result <span class="sc">%&gt;%</span> text</span>
<span id="cb104-21"><a href="#cb104-21" tabindex="-1"></a>result <span class="sc">%&gt;%</span> <span class="fu">pvrect</span>(<span class="at">alpha=</span><span class="fl">0.95</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="circlize" class="section level3">
<h3>circlize</h3>
<p>Circular layout is an efficient way for the visualization of huge
amounts of information. The circlize package provides an implementation
of circular layout generation in R, including a solution for dendrogram
objects produced using dendextend:</p>
<div class="sourceCode" id="cb105"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb105-1"><a href="#cb105-1" tabindex="-1"></a><span class="fu">library</span>(circlize)</span>
<span id="cb105-2"><a href="#cb105-2" tabindex="-1"></a></span>
<span id="cb105-3"><a href="#cb105-3" tabindex="-1"></a>dend <span class="ot">&lt;-</span> iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">40</span>,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram <span class="sc">%&gt;%</span></span>
<span id="cb105-4"><a href="#cb105-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_k_color&quot;</span>, <span class="at">k=</span><span class="dv">3</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;branches_lwd&quot;</span>, <span class="fu">c</span>(<span class="dv">5</span>,<span class="dv">2</span>,<span class="fl">1.5</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb105-5"><a href="#cb105-5" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;branches_lty&quot;</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">3</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">2</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb105-6"><a href="#cb105-6" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;labels_colors&quot;</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_cex&quot;</span>, <span class="fu">c</span>(.<span class="dv">6</span>,<span class="fl">1.5</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb105-7"><a href="#cb105-7" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">&quot;nodes_pch&quot;</span>, <span class="dv">19</span>) <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;nodes_col&quot;</span>, <span class="fu">c</span>(<span class="st">&quot;orange&quot;</span>, <span class="st">&quot;black&quot;</span>, <span class="st">&quot;plum&quot;</span>, <span class="cn">NA</span>))</span>
<span id="cb105-8"><a href="#cb105-8" tabindex="-1"></a></span>
<span id="cb105-9"><a href="#cb105-9" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">rep</span>(<span class="dv">0</span>,<span class="dv">4</span>))</span>
<span id="cb105-10"><a href="#cb105-10" tabindex="-1"></a><span class="fu">circlize_dendrogram</span>(dend)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb106"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb106-1"><a href="#cb106-1" tabindex="-1"></a><span class="co"># circlize_dendrogram(dend, labels = FALSE)</span></span>
<span id="cb106-2"><a href="#cb106-2" tabindex="-1"></a><span class="co"># circlize_dendrogram(dend, facing = &quot;inside&quot;, labels = FALSE)</span></span></code></pre></div>
<p>The above is a wrapper for functions in circlize. An advantage for
using the circlize package directly is for plotting a circular
dendrogram so that you can add more graphics for the elements in the
tree just by adding more tracks using . For example:</p>
<div class="sourceCode" id="cb107"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb107-1"><a href="#cb107-1" tabindex="-1"></a><span class="co"># dend &lt;- iris[1:40,-5] %&gt;% dist %&gt;% hclust %&gt;% as.dendrogram %&gt;%</span></span>
<span id="cb107-2"><a href="#cb107-2" tabindex="-1"></a><span class="co"># set(&quot;branches_k_color&quot;, k=3) %&gt;% set(&quot;branches_lwd&quot;, c(5,2,1.5)) %&gt;%</span></span>
<span id="cb107-3"><a href="#cb107-3" tabindex="-1"></a><span class="co"># set(&quot;branches_lty&quot;, c(1,1,3,1,1,2)) %&gt;%</span></span>
<span id="cb107-4"><a href="#cb107-4" tabindex="-1"></a><span class="co"># set(&quot;labels_colors&quot;) %&gt;% set(&quot;labels_cex&quot;, c(.9,1.2)) %&gt;%</span></span>
<span id="cb107-5"><a href="#cb107-5" tabindex="-1"></a><span class="co"># set(&quot;nodes_pch&quot;, 19) %&gt;% set(&quot;nodes_col&quot;, c(&quot;orange&quot;, &quot;black&quot;, &quot;plum&quot;, NA))</span></span>
<span id="cb107-6"><a href="#cb107-6" tabindex="-1"></a></span>
<span id="cb107-7"><a href="#cb107-7" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">2015-07-10</span>) </span>
<span id="cb107-8"><a href="#cb107-8" tabindex="-1"></a><span class="co"># In the following we get the dendrogram but can also get extra information on top of it</span></span>
<span id="cb107-9"><a href="#cb107-9" tabindex="-1"></a><span class="fu">circos.initialize</span>(<span class="st">&quot;foo&quot;</span>, <span class="at">xlim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">40</span>))</span>
<span id="cb107-10"><a href="#cb107-10" tabindex="-1"></a><span class="fu">circos.track</span>(<span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="at">panel.fun =</span> <span class="cf">function</span>(x, y) {</span>
<span id="cb107-11"><a href="#cb107-11" tabindex="-1"></a> <span class="fu">circos.rect</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">40</span><span class="fl">-0.8</span>, <span class="fu">rep</span>(<span class="dv">0</span>, <span class="dv">40</span>), <span class="dv">1</span><span class="sc">:</span><span class="dv">40</span><span class="fl">-0.2</span>, <span class="fu">runif</span>(<span class="dv">40</span>), <span class="at">col =</span> <span class="fu">rand_color</span>(<span class="dv">40</span>), <span class="at">border =</span> <span class="cn">NA</span>)</span>
<span id="cb107-12"><a href="#cb107-12" tabindex="-1"></a>}, <span class="at">bg.border =</span> <span class="cn">NA</span>)</span>
<span id="cb107-13"><a href="#cb107-13" tabindex="-1"></a><span class="fu">circos.track</span>(<span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="at">panel.fun =</span> <span class="cf">function</span>(x, y) {</span>
<span id="cb107-14"><a href="#cb107-14" tabindex="-1"></a> <span class="fu">circos.text</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">40</span><span class="fl">-0.5</span>, <span class="fu">rep</span>(<span class="dv">0</span>, <span class="dv">40</span>), <span class="fu">labels</span>(dend), <span class="at">col =</span> <span class="fu">labels_colors</span>(dend),</span>
<span id="cb107-15"><a href="#cb107-15" tabindex="-1"></a> <span class="at">facing =</span> <span class="st">&quot;clockwise&quot;</span>, <span class="at">niceFacing =</span> <span class="cn">TRUE</span>, <span class="at">adj =</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="fl">0.5</span>))</span>
<span id="cb107-16"><a href="#cb107-16" tabindex="-1"></a>}, <span class="at">bg.border =</span> <span class="cn">NA</span>, <span class="at">track.height =</span> <span class="fl">0.1</span>)</span>
<span id="cb107-17"><a href="#cb107-17" tabindex="-1"></a>max_height <span class="ot">=</span> <span class="fu">attr</span>(dend, <span class="st">&quot;height&quot;</span>)</span>
<span id="cb107-18"><a href="#cb107-18" tabindex="-1"></a><span class="fu">circos.track</span>(<span class="at">ylim =</span> <span class="fu">c</span>(<span class="dv">0</span>, max_height), <span class="at">panel.fun =</span> <span class="cf">function</span>(x, y) {</span>
<span id="cb107-19"><a href="#cb107-19" tabindex="-1"></a> <span class="fu">circos.dendrogram</span>(dend, <span class="at">max_height =</span> max_height)</span>
<span id="cb107-20"><a href="#cb107-20" tabindex="-1"></a>}, <span class="at">track.height =</span> <span class="fl">0.5</span>, <span class="at">bg.border =</span> <span class="cn">NA</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb108"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb108-1"><a href="#cb108-1" tabindex="-1"></a><span class="fu">circos.clear</span>()</span></code></pre></div>
</div>
</div>
<div id="comparing-two-dendrograms" class="section level2">
<h2>Comparing two dendrograms</h2>
<div id="dendlist" class="section level3">
<h3>dendlist</h3>
<p>A <code>dendlist</code> is a function which produces the dendlist
class. It accepts several dendrograms and/or dendlist objects and chain
them all together. This function aim to help with the usability of
comparing two or more dendrograms.</p>
<div class="sourceCode" id="cb109"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb109-1"><a href="#cb109-1" tabindex="-1"></a>dend15 <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>) <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">&quot;average&quot;</span>) <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb109-2"><a href="#cb109-2" tabindex="-1"></a>dend15 <span class="ot">&lt;-</span> dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels_to_char&quot;</span>)</span>
<span id="cb109-3"><a href="#cb109-3" tabindex="-1"></a>dend51 <span class="ot">&lt;-</span> dend15 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels&quot;</span>, <span class="fu">as.character</span>(<span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)) <span class="sc">%&gt;%</span> <span class="fu">match_order_by_labels</span>(dend15)</span>
<span id="cb109-4"><a href="#cb109-4" tabindex="-1"></a>dends_15_51 <span class="ot">&lt;-</span> <span class="fu">dendlist</span>(dend15, dend51)</span>
<span id="cb109-5"><a href="#cb109-5" tabindex="-1"></a>dends_15_51</span></code></pre></div>
<pre><code>#&gt; [[1]]
#&gt; &#39;dendrogram&#39; with 2 branches and 5 members total, at height 2.5
#&gt;
#&gt; [[2]]
#&gt; &#39;dendrogram&#39; with 2 branches and 5 members total, at height 2.5
#&gt;
#&gt; attr(,&quot;class&quot;)
#&gt; [1] &quot;dendlist&quot;</code></pre>
<div class="sourceCode" id="cb111"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb111-1"><a href="#cb111-1" tabindex="-1"></a><span class="fu">head</span>(dends_15_51)</span></code></pre></div>
<pre><code>#&gt; ============
#&gt; dend 1
#&gt; ---------
#&gt; --[dendrogram w/ 2 branches and 5 members at h = 2.5]
#&gt; |--[dendrogram w/ 2 branches and 2 members at h = 1]
#&gt; | |--leaf &quot;1&quot;
#&gt; | `--leaf &quot;2&quot;
#&gt; `--[dendrogram w/ 2 branches and 3 members at h = 1.5]
#&gt; |--leaf &quot;5&quot;
#&gt; `--[dendrogram w/ 2 branches and 2 members at h = 1]
#&gt; |--leaf &quot;3&quot;
#&gt; `--leaf &quot;4&quot;
#&gt; etc...
#&gt; ============
#&gt; dend 2
#&gt; ---------
#&gt; --[dendrogram w/ 2 branches and 5 members at h = 2.5]
#&gt; |--[dendrogram w/ 2 branches and 2 members at h = 1]
#&gt; | |--leaf &quot;5&quot;
#&gt; | `--leaf &quot;4&quot;
#&gt; `--[dendrogram w/ 2 branches and 3 members at h = 1.5]
#&gt; |--leaf &quot;3&quot;
#&gt; `--[dendrogram w/ 2 branches and 2 members at h = 1]
#&gt; |--leaf &quot;2&quot;
#&gt; `--leaf &quot;1&quot;
#&gt; etc...</code></pre>
<p>The function <code>match_order_by_labels</code> makes sure that the
order in the leaves corresponds to the same labels in both trees.</p>
</div>
<div id="dend_diff" class="section level3">
<h3>dend_diff</h3>
<p>The <code>dend_diff</code> function plots two trees side by side,
highlighting edges unique to each tree in red, it relies on the
<code>distinct_edges</code> function.</p>
<p>For example:</p>
<div class="sourceCode" id="cb113"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb113-1"><a href="#cb113-1" tabindex="-1"></a><span class="co"># example 1</span></span>
<span id="cb113-2"><a href="#cb113-2" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb113-3"><a href="#cb113-3" tabindex="-1"></a>y <span class="ot">&lt;-</span> <span class="fu">set</span>(x, <span class="st">&quot;labels&quot;</span>, <span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)</span>
<span id="cb113-4"><a href="#cb113-4" tabindex="-1"></a></span>
<span id="cb113-5"><a href="#cb113-5" tabindex="-1"></a><span class="co"># example 2</span></span>
<span id="cb113-6"><a href="#cb113-6" tabindex="-1"></a>dend1 <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span> <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb113-7"><a href="#cb113-7" tabindex="-1"></a>dend2 <span class="ot">&lt;-</span> dend1 <span class="sc">%&gt;%</span> <span class="fu">set</span>(<span class="st">&quot;labels&quot;</span>, <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">3</span>,<span class="dv">2</span>,<span class="dv">4</span>, <span class="dv">5</span><span class="sc">:</span><span class="dv">10</span>) )</span>
<span id="cb113-8"><a href="#cb113-8" tabindex="-1"></a><span class="fu">dend_diff</span>(dend1, dend2)</span></code></pre></div>
<p><img role="img" src="
<p>See the <code>highlight_distinct_edges</code> function for more
control over how to create the distinction (color, line width, line
type).</p>
</div>
<div id="tanglegram" class="section level3">
<h3>tanglegram</h3>
<p>A tanglegram plot gives two dendrogram (with the same set of labels),
one facing the other, and having their labels connected by lines.
Tanglegram can be used for visually comparing two methods of
Hierarchical clustering, and are sometimes used in biology when
comparing two phylogenetic trees.</p>
<p>Here is an example of creating a tanglegram using dendextend:</p>
<div class="sourceCode" id="cb114"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb114-1"><a href="#cb114-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dends_15_51)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb115"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb115-1"><a href="#cb115-1" tabindex="-1"></a><span class="co"># Same as using:</span></span>
<span id="cb115-2"><a href="#cb115-2" tabindex="-1"></a><span class="co"># plot(dends_15_51) # since there is a plot method for dendlist</span></span>
<span id="cb115-3"><a href="#cb115-3" tabindex="-1"></a><span class="co"># and also: </span></span>
<span id="cb115-4"><a href="#cb115-4" tabindex="-1"></a><span class="co"># tanglegram(dend15, dend51)</span></span></code></pre></div>
<p>Notice how “unique” nodes are highlighted with dashed lines (i.e.:
nodes which contains a combination of labels/items, which are not
present in the other tree). This can be turned off using
<code>highlight_distinct_edges = FALSE</code>. Also notice how the
connecting lines are colored to highlight two sub-trees which are
present in both dendrograms. This can be turned off by setting
<code>common_subtrees_color_lines = FALSE</code>. We can also color the
branches of the trees to show the two common sub-trees using
<code>common_subtrees_color_branches = TRUE</code>:</p>
<div class="sourceCode" id="cb116"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb116-1"><a href="#cb116-1" tabindex="-1"></a><span class="fu">tanglegram</span>(dends_15_51, <span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<p><img role="img" src="
<p>We may wish to improve the layout of the trees. For this we have the
<code>entanglement</code>, to measure the quality of the alignment of
the two trees in the tanglegram layout, and the <code>untangle</code>
function, for improving it.</p>
<div class="sourceCode" id="cb117"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb117-1"><a href="#cb117-1" tabindex="-1"></a>dends_15_51 <span class="sc">%&gt;%</span> entanglement <span class="co"># lower is better</span></span></code></pre></div>
<pre><code>#&gt; [1] 0.9167078</code></pre>
<div class="sourceCode" id="cb119"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb119-1"><a href="#cb119-1" tabindex="-1"></a><span class="co"># dends_15_51 %&gt;% untangle(method = &quot;DendSer&quot;) %&gt;% entanglement # lower is better</span></span>
<span id="cb119-2"><a href="#cb119-2" tabindex="-1"></a>dends_15_51 <span class="sc">%&gt;%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">&quot;step1side&quot;</span>) <span class="sc">%&gt;%</span> entanglement <span class="co"># lower is better</span></span></code></pre></div>
<pre><code>#&gt; [1] 0</code></pre>
<p>Notice that just because we can get two trees to have horizontal
connecting lines, it doesnt mean these trees are identical (or even
very similar topologically):</p>
<div class="sourceCode" id="cb121"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb121-1"><a href="#cb121-1" tabindex="-1"></a>dends_15_51 <span class="sc">%&gt;%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">&quot;step1side&quot;</span>) <span class="sc">%&gt;%</span> </span>
<span id="cb121-2"><a href="#cb121-2" tabindex="-1"></a> <span class="fu">tanglegram</span>(<span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<p><img role="img" src="
<p>Entanglement is measured by giving the left trees labels the values
of 1 till tree size, and than match these numbers with the right tree.
Now, entanglement is the L norm distance between these two vectors. That
is, we take the sum of the absolute difference (each one in the power of
L). e.g: <code>sum(abs(x-y)**L)</code>. And this is divided by the
“worst case” entanglement level (e.g: when the right tree is the
complete reverse of the left tree).</p>
<p>L tells us which penalty level we are at (L0, L1, L2, partial Ls
etc). L&gt;1 means that we give a big penalty for sharp angles. While
L-&gt;0 means that any time something is not a straight horizontal line,
it gets a large penalty If L=0.1 it means that we much prefer straight
lines over non straight lines</p>
<p>Finding an optimal rotation for the tanglegram of two dendrogram is a
hard problem. This problem is also harder for larger trees.</p>
<p>Lets see how well some untangle methods can do.</p>
<p>Without doing anything:</p>
<div class="sourceCode" id="cb122"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb122-1"><a href="#cb122-1" tabindex="-1"></a>x <span class="ot">&lt;-</span> dends_15_51 </span>
<span id="cb122-2"><a href="#cb122-2" tabindex="-1"></a>x <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;entanglement =&quot;</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
<p><img role="img" src="
<p>Using DendSer:</p>
<div class="sourceCode" id="cb123"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb123-1"><a href="#cb123-1" tabindex="-1"></a><span class="co"># x &lt;- dends_15_51 %&gt;% untangle(method = &quot;DendSer&quot;) </span></span>
<span id="cb123-2"><a href="#cb123-2" tabindex="-1"></a>x <span class="ot">&lt;-</span> dends_15_51 <span class="sc">%&gt;%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">&quot;ladderize&quot;</span>) </span>
<span id="cb123-3"><a href="#cb123-3" tabindex="-1"></a>x <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;entanglement =&quot;</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
<p><img role="img" src="
<p>One solution for improving the tanglegram would be to randomly search
the rotated tree space for a better solution. Here is how to use a
random search:</p>
<div class="sourceCode" id="cb124"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb124-1"><a href="#cb124-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">3958</span>)</span>
<span id="cb124-2"><a href="#cb124-2" tabindex="-1"></a>x <span class="ot">&lt;-</span> dends_15_51 <span class="sc">%&gt;%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">&quot;random&quot;</span>, <span class="at">R =</span> <span class="dv">10</span>) </span>
<span id="cb124-3"><a href="#cb124-3" tabindex="-1"></a>x <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;entanglement =&quot;</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
<p><img role="img" src="
<p>We can see we already got something better. An advantage of the
random search is the ability to create many many trees and compare them
to find the best pair.</p>
<p>Lets use a greedy forward step wise rotation of the two trees (first
the left, then the right, and so on), to see if we can find a better
solution for comparing the two trees. Notice that this may take some
time to run (the larger the tree, the longer it would take), but we can
limit the search for smaller ks, and see what improvement that can
bring us using step2side (slowest):</p>
<div class="sourceCode" id="cb125"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb125-1"><a href="#cb125-1" tabindex="-1"></a>x <span class="ot">&lt;-</span> dends_15_51 <span class="sc">%&gt;%</span> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">&quot;step2side&quot;</span>) </span>
<span id="cb125-2"><a href="#cb125-2" tabindex="-1"></a>x <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">main =</span> <span class="fu">paste</span>(<span class="st">&quot;entanglement =&quot;</span>, <span class="fu">round</span>(<span class="fu">entanglement</span>(x), <span class="dv">2</span>)))</span></code></pre></div>
<p><img role="img" src="
<p>We got perfect entanglement (0).</p>
</div>
<div id="correlation-measures" class="section level3">
<h3>Correlation measures</h3>
<p>We shall use the following for the upcoming examples:</p>
<div class="sourceCode" id="cb126"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb126-1"><a href="#cb126-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23235</span>)</span>
<span id="cb126-2"><a href="#cb126-2" tabindex="-1"></a>ss <span class="ot">&lt;-</span> <span class="fu">sample</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">150</span>, <span class="dv">10</span> )</span>
<span id="cb126-3"><a href="#cb126-3" tabindex="-1"></a>dend1 <span class="ot">&lt;-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="st">&quot;com&quot;</span>) <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb126-4"><a href="#cb126-4" tabindex="-1"></a>dend2 <span class="ot">&lt;-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="st">&quot;single&quot;</span>) <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb126-5"><a href="#cb126-5" tabindex="-1"></a>dend3 <span class="ot">&lt;-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="st">&quot;ave&quot;</span>) <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb126-6"><a href="#cb126-6" tabindex="-1"></a>dend4 <span class="ot">&lt;-</span> iris[ss,<span class="sc">-</span><span class="dv">5</span>] <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> <span class="fu">hclust</span>(<span class="st">&quot;centroid&quot;</span>) <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb126-7"><a href="#cb126-7" tabindex="-1"></a></span>
<span id="cb126-8"><a href="#cb126-8" tabindex="-1"></a>dend1234 <span class="ot">&lt;-</span> <span class="fu">dendlist</span>(<span class="st">&quot;Complete&quot;</span> <span class="ot">=</span> dend1, <span class="st">&quot;Single&quot;</span> <span class="ot">=</span> dend2, <span class="st">&quot;Average&quot;</span> <span class="ot">=</span> dend3, <span class="st">&quot;Centroid&quot;</span> <span class="ot">=</span> dend4)</span>
<span id="cb126-9"><a href="#cb126-9" tabindex="-1"></a></span>
<span id="cb126-10"><a href="#cb126-10" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">2</span>))</span>
<span id="cb126-11"><a href="#cb126-11" tabindex="-1"></a><span class="fu">plot</span>(dend1, <span class="at">main =</span> <span class="st">&quot;Complete&quot;</span>)</span>
<span id="cb126-12"><a href="#cb126-12" tabindex="-1"></a><span class="fu">plot</span>(dend2, <span class="at">main =</span> <span class="st">&quot;Single&quot;</span>)</span>
<span id="cb126-13"><a href="#cb126-13" tabindex="-1"></a><span class="fu">plot</span>(dend3, <span class="at">main =</span> <span class="st">&quot;Average&quot;</span>)</span>
<span id="cb126-14"><a href="#cb126-14" tabindex="-1"></a><span class="fu">plot</span>(dend4, <span class="at">main =</span> <span class="st">&quot;Centroid&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<div id="global-comparison-of-two-or-more-dendrograms" class="section level4">
<h4>Global Comparison of two (or more) dendrograms</h4>
<p>The <code>all.equal.dendrogram</code> function makes a global
comparison of two or more dendrograms trees.</p>
<div class="sourceCode" id="cb127"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb127-1"><a href="#cb127-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend1)</span></code></pre></div>
<pre><code>#&gt; [1] TRUE</code></pre>
<div class="sourceCode" id="cb129"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb129-1"><a href="#cb129-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend2)</span></code></pre></div>
<pre><code>#&gt; [1] &quot;Difference in branch heights - Mean relative difference: 0.4932164&quot;</code></pre>
<div class="sourceCode" id="cb131"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb131-1"><a href="#cb131-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend2, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<pre><code>#&gt; [1] &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 2, 7, 13 | Unique edges in current: 7, 9, 11&quot;</code></pre>
<div class="sourceCode" id="cb133"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb133-1"><a href="#cb133-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1, dend2, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>, <span class="at">use.topology =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<pre><code>#&gt; [1] TRUE</code></pre>
<div class="sourceCode" id="cb135"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb135-1"><a href="#cb135-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend2, dend4, <span class="at">use.edge.length =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<pre><code>#&gt; [1] &quot;Difference in branch heights - Mean relative difference: 0.1969642&quot;</code></pre>
<div class="sourceCode" id="cb137"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb137-1"><a href="#cb137-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend2, dend4, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<pre><code>#&gt; [1] &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 11 | Unique edges in current: 13&quot;</code></pre>
<div class="sourceCode" id="cb139"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb139-1"><a href="#cb139-1" tabindex="-1"></a><span class="fu">all.equal</span>(<span class="fu">dendlist</span>(dend1, dend1, dend1))</span></code></pre></div>
<pre><code>#&gt; [1] TRUE</code></pre>
<div class="sourceCode" id="cb141"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb141-1"><a href="#cb141-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1234)</span></code></pre></div>
<pre><code>#&gt; 1==2
#&gt; &quot;Difference in branch heights - Mean relative difference: 0.4932164&quot;
#&gt; 1==3
#&gt; &quot;Difference in branch heights - Mean relative difference: 0.2767035&quot;
#&gt; 1==4
#&gt; &quot;Difference in branch heights - Mean relative difference: 0.4081231&quot;
#&gt; 2==3
#&gt; &quot;Difference in branch heights - Mean relative difference: 0.4545673&quot;
#&gt; 2==4
#&gt; &quot;Difference in branch heights - Mean relative difference: 0.1969642&quot;
#&gt; 3==4
#&gt; &quot;Difference in branch heights - Mean relative difference: 0.1970749&quot;</code></pre>
<div class="sourceCode" id="cb143"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb143-1"><a href="#cb143-1" tabindex="-1"></a><span class="fu">all.equal</span>(dend1234, <span class="at">use.edge.length =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<pre><code>#&gt; 1==2
#&gt; &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 2, 7, 13 | Unique edges in current: 7, 9, 11&quot;
#&gt; 1==3
#&gt; &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 7 | Unique edges in current: 7&quot;
#&gt; 1==4
#&gt; &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 2, 7 | Unique edges in current: 7, 9&quot;
#&gt; 2==3
#&gt; &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 9, 11 | Unique edges in current: 8, 15&quot;
#&gt; 2==4
#&gt; &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 11 | Unique edges in current: 13&quot;
#&gt; 3==4
#&gt; &quot;Dendrograms contain diffreent edges (i.e.: topology). Unique edges in target: | 15 | Unique edges in current: 9&quot;</code></pre>
</div>
<div id="distance-matrix-using-dist.dendlist" class="section level4">
<h4>Distance matrix using dist.dendlist</h4>
<p>The <code>dist.dendlist</code> function computes the Robinson-Foulds
distance (also known as symmetric difference) between two dendrograms.
This is the sum of edges in both trees with labels that exist in only
one of the two trees (i.e.: the length of
<code>distinct_edges</code>).</p>
<div class="sourceCode" id="cb145"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb145-1"><a href="#cb145-1" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span> <span class="sc">%&gt;%</span> dist <span class="sc">%&gt;%</span> hclust <span class="sc">%&gt;%</span> as.dendrogram</span>
<span id="cb145-2"><a href="#cb145-2" tabindex="-1"></a>y <span class="ot">&lt;-</span> <span class="fu">set</span>(x, <span class="st">&quot;labels&quot;</span>, <span class="dv">5</span><span class="sc">:</span><span class="dv">1</span>)</span>
<span id="cb145-3"><a href="#cb145-3" tabindex="-1"></a></span>
<span id="cb145-4"><a href="#cb145-4" tabindex="-1"></a><span class="fu">dist.dendlist</span>(<span class="fu">dendlist</span>(<span class="at">x1 =</span> x,<span class="at">x2 =</span> x,<span class="at">y1 =</span> y))</span></code></pre></div>
<pre><code>#&gt; x1 x2
#&gt; x2 0
#&gt; y1 4 4</code></pre>
<div class="sourceCode" id="cb147"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb147-1"><a href="#cb147-1" tabindex="-1"></a><span class="fu">dend_diff</span>(x,y)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb148"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb148-1"><a href="#cb148-1" tabindex="-1"></a><span class="fu">dist.dendlist</span>(dend1234)</span></code></pre></div>
<pre><code>#&gt; Complete Single Average
#&gt; Single 6
#&gt; Average 2 4
#&gt; Centroid 4 2 2</code></pre>
<p>This function might implement other topological distances in the
future.</p>
</div>
<div id="correlation-matrix-using-cor.dendlist" class="section level4">
<h4>Correlation matrix using cor.dendlist</h4>
<p>Both Bakers Gamma and cophenetic correlation (Which will be
introduced shortly), can be calculated to create a correlation matrix
using the <code>cor.dendlist</code> function (the default method is
cophenetic correlation):</p>
<div class="sourceCode" id="cb150"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb150-1"><a href="#cb150-1" tabindex="-1"></a><span class="fu">cor.dendlist</span>(dend1234)</span></code></pre></div>
<pre><code>#&gt; Complete Single Average Centroid
#&gt; Complete 1.0000000 0.4272001 0.5635291 0.4466374
#&gt; Single 0.4272001 1.0000000 0.9508998 0.9910913
#&gt; Average 0.5635291 0.9508998 1.0000000 0.9556376
#&gt; Centroid 0.4466374 0.9910913 0.9556376 1.0000000</code></pre>
<p>The corrplot library offers a nice visualization:</p>
<div class="sourceCode" id="cb152"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb152-1"><a href="#cb152-1" tabindex="-1"></a><span class="fu">library</span>(corrplot)</span>
<span id="cb152-2"><a href="#cb152-2" tabindex="-1"></a><span class="fu">corrplot</span>(<span class="fu">cor.dendlist</span>(dend1234), <span class="st">&quot;pie&quot;</span>, <span class="st">&quot;lower&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
<p>Which easily tells us that single, average and centroid give similar
results, while complete is somewhat different.</p>
<div class="sourceCode" id="cb153"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb153-1"><a href="#cb153-1" tabindex="-1"></a><span class="co"># same subtrees, so there is no need to color the branches</span></span>
<span id="cb153-2"><a href="#cb153-2" tabindex="-1"></a>dend1234 <span class="sc">%&gt;%</span> <span class="fu">tanglegram</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">3</span>)) </span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb154"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb154-1"><a href="#cb154-1" tabindex="-1"></a><span class="co"># Here the branches colors are very helpful:</span></span>
<span id="cb154-2"><a href="#cb154-2" tabindex="-1"></a>dend1234 <span class="sc">%&gt;%</span> <span class="fu">tanglegram</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">2</span>), </span>
<span id="cb154-3"><a href="#cb154-3" tabindex="-1"></a> <span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
<div id="bakers-gamma-index" class="section level4">
<h4>Bakers Gamma Index</h4>
<p>Bakers Gamma Index (see bakers paper from 1974) is a measure of
association (similarity) between two trees of Hierarchical clustering
(dendrograms). It is defined as the rank correlation between the stages
at which pairs of objects combine in each of the two trees.</p>
<p>Or more detailed: It is calculated by taking two items, and see what
is the highest possible level of k (number of cluster groups created
when cutting the tree) for which the two item still belongs to the same
tree. That k is returned, and the same is done for these two items for
the second tree. There are n over 2 combinations of such pairs of items
from the items in the tree, and all of these numbers are calculated for
each of the two trees. Then, these two sets of numbers (a set for the
items in each tree) are paired according to the pairs of items compared,
and a Spearman correlation is calculated.</p>
<p>The value can range between -1 to 1. With near 0 values meaning that
the two trees are not statistically similar. For exact p-value one
should use a permutation test. One such option will be to permute over
the labels of one tree many times, calculating the distribution under
the null hypothesis (keeping the trees topologies constant).</p>
<p>Notice that this measure is not affected by the height of a branch
but only of its relative position compared with other branches.</p>
<div class="sourceCode" id="cb155"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb155-1"><a href="#cb155-1" tabindex="-1"></a><span class="fu">cor_bakers_gamma</span>(dend15, dend51)</span></code></pre></div>
<pre><code>#&gt; [1] 0.2751938</code></pre>
<p>Even that we can reach perfect entanglement, Bakers gamma shows us
that the trees topology is not identical. As opposed with the
correlation of a tree with itself:</p>
<div class="sourceCode" id="cb157"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb157-1"><a href="#cb157-1" tabindex="-1"></a><span class="fu">cor_bakers_gamma</span>(dend15, dend15)</span></code></pre></div>
<pre><code>#&gt; [1] 1</code></pre>
<p>Since the observations creating the Bakers Gamma Index of such a
measure are correlated, we need to perform a permutation test for the
calculation of the statistical significance of the index. Lets look at
the distribution of Bakers Gamma Index under the null hypothesis
(assuming fixed tree topologies). This will be different for different
tree structures and sizes. Here are the results when the compared tree
is itself (after shuffling its own labels), and when comparing tree 1 to
the shuffled tree 2:</p>
<div class="sourceCode" id="cb159"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb159-1"><a href="#cb159-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23235</span>)</span>
<span id="cb159-2"><a href="#cb159-2" tabindex="-1"></a>the_cor <span class="ot">&lt;-</span> <span class="fu">cor_bakers_gamma</span>(dend15, dend15)</span>
<span id="cb159-3"><a href="#cb159-3" tabindex="-1"></a>the_cor2 <span class="ot">&lt;-</span> <span class="fu">cor_bakers_gamma</span>(dend15, dend51)</span>
<span id="cb159-4"><a href="#cb159-4" tabindex="-1"></a>the_cor</span></code></pre></div>
<pre><code>#&gt; [1] 1</code></pre>
<div class="sourceCode" id="cb161"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb161-1"><a href="#cb161-1" tabindex="-1"></a>the_cor2</span></code></pre></div>
<pre><code>#&gt; [1] 0.2751938</code></pre>
<div class="sourceCode" id="cb163"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb163-1"><a href="#cb163-1" tabindex="-1"></a>R <span class="ot">&lt;-</span> <span class="dv">100</span></span>
<span id="cb163-2"><a href="#cb163-2" tabindex="-1"></a>cor_bakers_gamma_results <span class="ot">&lt;-</span> <span class="fu">numeric</span>(R)</span>
<span id="cb163-3"><a href="#cb163-3" tabindex="-1"></a>dend_mixed <span class="ot">&lt;-</span> dend15</span>
<span id="cb163-4"><a href="#cb163-4" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span>R) {</span>
<span id="cb163-5"><a href="#cb163-5" tabindex="-1"></a> dend_mixed <span class="ot">&lt;-</span> <span class="fu">sample.dendrogram</span>(dend_mixed, <span class="at">replace =</span> <span class="cn">FALSE</span>)</span>
<span id="cb163-6"><a href="#cb163-6" tabindex="-1"></a> cor_bakers_gamma_results[i] <span class="ot">&lt;-</span> <span class="fu">cor_bakers_gamma</span>(dend15, dend_mixed)</span>
<span id="cb163-7"><a href="#cb163-7" tabindex="-1"></a>}</span>
<span id="cb163-8"><a href="#cb163-8" tabindex="-1"></a><span class="fu">plot</span>(<span class="fu">density</span>(cor_bakers_gamma_results),</span>
<span id="cb163-9"><a href="#cb163-9" tabindex="-1"></a> <span class="at">main =</span> <span class="st">&quot;Baker&#39;s gamma distribution under H0&quot;</span>,</span>
<span id="cb163-10"><a href="#cb163-10" tabindex="-1"></a> <span class="at">xlim =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">1</span>,<span class="dv">1</span>))</span>
<span id="cb163-11"><a href="#cb163-11" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="dv">0</span>, <span class="at">lty =</span> <span class="dv">2</span>)</span>
<span id="cb163-12"><a href="#cb163-12" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> the_cor, <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">2</span>)</span>
<span id="cb163-13"><a href="#cb163-13" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> the_cor2, <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">4</span>)</span>
<span id="cb163-14"><a href="#cb163-14" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">&quot;topleft&quot;</span>, <span class="at">legend =</span> <span class="fu">c</span>(<span class="st">&quot;cor&quot;</span>, <span class="st">&quot;cor2&quot;</span>), <span class="at">fill =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">4</span>))</span>
<span id="cb163-15"><a href="#cb163-15" tabindex="-1"></a><span class="fu">round</span>(<span class="fu">sum</span>(the_cor2 <span class="sc">&lt;</span> cor_bakers_gamma_results)<span class="sc">/</span> R, <span class="dv">4</span>)</span></code></pre></div>
<pre><code>#&gt; [1] 0.17</code></pre>
<div class="sourceCode" id="cb165"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb165-1"><a href="#cb165-1" tabindex="-1"></a><span class="fu">title</span>(<span class="at">sub =</span> <span class="fu">paste</span>(<span class="st">&quot;One sided p-value:&quot;</span>,</span>
<span id="cb165-2"><a href="#cb165-2" tabindex="-1"></a> <span class="st">&quot;cor =&quot;</span>, <span class="fu">round</span>(<span class="fu">sum</span>(the_cor <span class="sc">&lt;</span> cor_bakers_gamma_results)<span class="sc">/</span> R, <span class="dv">4</span>),</span>
<span id="cb165-3"><a href="#cb165-3" tabindex="-1"></a> <span class="st">&quot; ; cor2 =&quot;</span>, <span class="fu">round</span>(<span class="fu">sum</span>(the_cor2 <span class="sc">&lt;</span> cor_bakers_gamma_results)<span class="sc">/</span> R, <span class="dv">4</span>)</span>
<span id="cb165-4"><a href="#cb165-4" tabindex="-1"></a> ))</span></code></pre></div>
<p><img role="img" src="
<p>We can see that we do not have enough evidence that dend15 and dend51
are significantly “similar” (i.e.: with a correlation larger than
0).</p>
<p>We can also build a bootstrap confidence interval, using
<code>sample.dendrogram</code>, for the correlation. This function can
be very slow for larger trees, so make sure you use if carefully:</p>
<div class="sourceCode" id="cb166"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb166-1"><a href="#cb166-1" tabindex="-1"></a>dend1 <span class="ot">&lt;-</span> dend15</span>
<span id="cb166-2"><a href="#cb166-2" tabindex="-1"></a>dend2 <span class="ot">&lt;-</span> dend51</span>
<span id="cb166-3"><a href="#cb166-3" tabindex="-1"></a></span>
<span id="cb166-4"><a href="#cb166-4" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23801</span>)</span>
<span id="cb166-5"><a href="#cb166-5" tabindex="-1"></a></span>
<span id="cb166-6"><a href="#cb166-6" tabindex="-1"></a>R <span class="ot">&lt;-</span> <span class="dv">100</span></span>
<span id="cb166-7"><a href="#cb166-7" tabindex="-1"></a>dend1_labels <span class="ot">&lt;-</span> <span class="fu">labels</span>(dend1)</span>
<span id="cb166-8"><a href="#cb166-8" tabindex="-1"></a>dend2_labels <span class="ot">&lt;-</span> <span class="fu">labels</span>(dend2)</span>
<span id="cb166-9"><a href="#cb166-9" tabindex="-1"></a>cor_bakers_gamma_results <span class="ot">&lt;-</span> <span class="fu">numeric</span>(R)</span>
<span id="cb166-10"><a href="#cb166-10" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span>R) {</span>
<span id="cb166-11"><a href="#cb166-11" tabindex="-1"></a> sampled_labels <span class="ot">&lt;-</span> <span class="fu">sample</span>(dend1_labels, <span class="at">replace =</span> <span class="cn">TRUE</span>)</span>
<span id="cb166-12"><a href="#cb166-12" tabindex="-1"></a> <span class="co"># members needs to be fixed since it will be later used in nleaves</span></span>
<span id="cb166-13"><a href="#cb166-13" tabindex="-1"></a> dend_mixed1 <span class="ot">&lt;-</span> <span class="fu">sample.dendrogram</span>(dend1, </span>
<span id="cb166-14"><a href="#cb166-14" tabindex="-1"></a> <span class="at">dend_labels=</span>dend1_labels,</span>
<span id="cb166-15"><a href="#cb166-15" tabindex="-1"></a> <span class="at">fix_members=</span><span class="cn">TRUE</span>,<span class="at">fix_order=</span><span class="cn">TRUE</span>,<span class="at">fix_midpoint=</span><span class="cn">FALSE</span>,</span>
<span id="cb166-16"><a href="#cb166-16" tabindex="-1"></a> <span class="at">replace =</span> <span class="cn">TRUE</span>, <span class="at">sampled_labels=</span>sampled_labels</span>
<span id="cb166-17"><a href="#cb166-17" tabindex="-1"></a> )</span>
<span id="cb166-18"><a href="#cb166-18" tabindex="-1"></a> dend_mixed2 <span class="ot">&lt;-</span> <span class="fu">sample.dendrogram</span>(dend2, <span class="at">dend_labels=</span>dend2_labels,</span>
<span id="cb166-19"><a href="#cb166-19" tabindex="-1"></a> <span class="at">fix_members=</span><span class="cn">TRUE</span>,<span class="at">fix_order=</span><span class="cn">TRUE</span>,<span class="at">fix_midpoint=</span><span class="cn">FALSE</span>,</span>
<span id="cb166-20"><a href="#cb166-20" tabindex="-1"></a> <span class="at">replace =</span> <span class="cn">TRUE</span>, <span class="at">sampled_labels=</span>sampled_labels</span>
<span id="cb166-21"><a href="#cb166-21" tabindex="-1"></a> ) </span>
<span id="cb166-22"><a href="#cb166-22" tabindex="-1"></a> cor_bakers_gamma_results[i] <span class="ot">&lt;-</span> <span class="fu">cor_bakers_gamma</span>(dend_mixed1, dend_mixed2, <span class="at">warn =</span> <span class="cn">FALSE</span>)</span>
<span id="cb166-23"><a href="#cb166-23" tabindex="-1"></a>}</span>
<span id="cb166-24"><a href="#cb166-24" tabindex="-1"></a></span>
<span id="cb166-25"><a href="#cb166-25" tabindex="-1"></a></span>
<span id="cb166-26"><a href="#cb166-26" tabindex="-1"></a><span class="co"># here is the tanglegram</span></span>
<span id="cb166-27"><a href="#cb166-27" tabindex="-1"></a><span class="fu">tanglegram</span>(dend1, dend2)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb167"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb167-1"><a href="#cb167-1" tabindex="-1"></a><span class="co"># And here is the tanglegram for one sample of our trees:</span></span>
<span id="cb167-2"><a href="#cb167-2" tabindex="-1"></a>dend_mixed1 <span class="ot">&lt;-</span> <span class="fu">rank_order.dendrogram</span>(dend_mixed1)</span>
<span id="cb167-3"><a href="#cb167-3" tabindex="-1"></a>dend_mixed2 <span class="ot">&lt;-</span> <span class="fu">rank_order.dendrogram</span>(dend_mixed2)</span>
<span id="cb167-4"><a href="#cb167-4" tabindex="-1"></a>dend_mixed1 <span class="ot">&lt;-</span> <span class="fu">fix_members_attr.dendrogram</span>(dend_mixed1)</span>
<span id="cb167-5"><a href="#cb167-5" tabindex="-1"></a>dend_mixed2 <span class="ot">&lt;-</span> <span class="fu">fix_members_attr.dendrogram</span>(dend_mixed2)</span>
<span id="cb167-6"><a href="#cb167-6" tabindex="-1"></a><span class="fu">tanglegram</span>(dend_mixed1, dend_mixed2)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb168"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb168-1"><a href="#cb168-1" tabindex="-1"></a><span class="fu">cor_bakers_gamma</span>(dend_mixed1, dend_mixed2, <span class="at">warn =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<pre><code>#&gt; [1] 1</code></pre>
<div class="sourceCode" id="cb170"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb170-1"><a href="#cb170-1" tabindex="-1"></a>CI95 <span class="ot">&lt;-</span> <span class="fu">quantile</span>(cor_bakers_gamma_results, <span class="at">probs=</span><span class="fu">c</span>(.<span class="dv">025</span>,.<span class="dv">975</span>))</span>
<span id="cb170-2"><a href="#cb170-2" tabindex="-1"></a>CI95</span></code></pre></div>
<pre><code>#&gt; 2.5% 97.5%
#&gt; 0.2751938 1.0000000</code></pre>
<div class="sourceCode" id="cb172"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb172-1"><a href="#cb172-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">1</span>))</span>
<span id="cb172-2"><a href="#cb172-2" tabindex="-1"></a><span class="fu">plot</span>(<span class="fu">density</span>(cor_bakers_gamma_results),</span>
<span id="cb172-3"><a href="#cb172-3" tabindex="-1"></a> <span class="at">main =</span> <span class="st">&quot;Baker&#39;s gamma bootstrap distribution&quot;</span>,</span>
<span id="cb172-4"><a href="#cb172-4" tabindex="-1"></a> <span class="at">xlim =</span> <span class="fu">c</span>(<span class="sc">-</span><span class="dv">1</span>,<span class="dv">1</span>))</span>
<span id="cb172-5"><a href="#cb172-5" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> CI95, <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">3</span>)</span>
<span id="cb172-6"><a href="#cb172-6" tabindex="-1"></a><span class="fu">abline</span>(<span class="at">v =</span> <span class="fu">cor_bakers_gamma</span>(dend1, dend2), <span class="at">lty =</span> <span class="dv">2</span>, <span class="at">col =</span> <span class="dv">2</span>)</span>
<span id="cb172-7"><a href="#cb172-7" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">&quot;topleft&quot;</span>, <span class="at">legend =</span><span class="fu">c</span>(<span class="st">&quot;95% CI&quot;</span>, <span class="st">&quot;Baker&#39;s Gamma Index&quot;</span>), <span class="at">fill =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">2</span>))</span></code></pre></div>
<p><img role="img" src="
<p>The bootstrap sampling can do weird things with small trees. In this
case we had many times that the two trees got perfect correlation. The
usage and interpretation should be done carefully!</p>
</div>
<div id="cophenetic-correlation" class="section level4">
<h4>Cophenetic correlation</h4>
<p>The cophenetic distance between two observations that have been
clustered is defined to be the inter-group dissimilarity at which the
two observations are first combined into a single cluster. This distance
has many ties and restrictions. The cophenetic correlation (see sokal
1962) is the correlation between two cophenetic distance matrices of two
trees.</p>
<p>The value can range between -1 to 1. With near 0 values meaning that
the two trees are not statistically similar. For exact p-value one
should result to a permutation test. One such option will be to permute
over the labels of one tree many times, and calculating the distribution
under the null hypothesis (keeping the trees topologies constant).</p>
<div class="sourceCode" id="cb173"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb173-1"><a href="#cb173-1" tabindex="-1"></a><span class="fu">cor_cophenetic</span>(dend15, dend51)</span></code></pre></div>
<pre><code>#&gt; [1] 0.3125</code></pre>
<p>The function <code>cor_cophenetic</code> is faster than
<code>cor_bakers_gamma</code>, and might be preferred for that
reason.</p>
</div>
</div>
<div id="the-fowlkes-mallows-index-and-the-bk-plot" class="section level3">
<h3>The Fowlkes-Mallows Index and the Bk plot</h3>
<div id="the-fowlkes-mallows-index" class="section level4">
<h4>The Fowlkes-Mallows Index</h4>
<p>The Fowlkes-Mallows Index (see fowlkes 1983) (FM Index, or Bk) is a
measure of similarity between two clusterings. The FM index ranges from
0 to 1, a higher value indicates a greater similarity between the two
clusters.</p>
<p>The dendextend package allows the calculation of FM-Index, its
expectancy and variance under the null hypothesis, and a creation of
permutations of the FM-Index under H0. Thanks to the profdpm package, we
have another example of calculating the FM (though it does not offer the
expectancy and variance under H0):</p>
<div class="sourceCode" id="cb175"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb175-1"><a href="#cb175-1" tabindex="-1"></a>hc1 <span class="ot">&lt;-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">&quot;com&quot;</span>)</span>
<span id="cb175-2"><a href="#cb175-2" tabindex="-1"></a>hc2 <span class="ot">&lt;-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">&quot;single&quot;</span>)</span>
<span id="cb175-3"><a href="#cb175-3" tabindex="-1"></a></span>
<span id="cb175-4"><a href="#cb175-4" tabindex="-1"></a><span class="co"># FM index of a cluster with himself is 1:</span></span>
<span id="cb175-5"><a href="#cb175-5" tabindex="-1"></a><span class="fu">FM_index</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>))</span></code></pre></div>
<pre><code>#&gt; [1] 1
#&gt; attr(,&quot;E_FM&quot;)
#&gt; [1] 0.37217
#&gt; attr(,&quot;V_FM&quot;)
#&gt; [1] 5.985372e-05</code></pre>
<div class="sourceCode" id="cb177"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb177-1"><a href="#cb177-1" tabindex="-1"></a><span class="co"># FM index of two clusterings:</span></span>
<span id="cb177-2"><a href="#cb177-2" tabindex="-1"></a><span class="fu">FM_index</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc2, <span class="at">k=</span><span class="dv">3</span>)) </span></code></pre></div>
<pre><code>#&gt; [1] 0.8059522
#&gt; attr(,&quot;E_FM&quot;)
#&gt; [1] 0.4462325
#&gt; attr(,&quot;V_FM&quot;)
#&gt; [1] 6.464092e-05</code></pre>
<div class="sourceCode" id="cb179"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb179-1"><a href="#cb179-1" tabindex="-1"></a><span class="co"># we got a value far above the expected under H0</span></span>
<span id="cb179-2"><a href="#cb179-2" tabindex="-1"></a> </span>
<span id="cb179-3"><a href="#cb179-3" tabindex="-1"></a><span class="co"># Using the R code:</span></span>
<span id="cb179-4"><a href="#cb179-4" tabindex="-1"></a><span class="fu">FM_index_R</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc2, <span class="at">k=</span><span class="dv">3</span>))</span></code></pre></div>
<pre><code>#&gt; [1] 0.8059522
#&gt; attr(,&quot;E_FM&quot;)
#&gt; [1] 0.4462325
#&gt; attr(,&quot;V_FM&quot;)
#&gt; [1] 6.464092e-05</code></pre>
<p>The E_FM and V_FM are the values expected under the null hypothesis
that the two trees have the same topology but one is a random shuffle of
the labels of the other (i.e.: “no connection” between the trees).</p>
<p>So for the values:</p>
<div class="sourceCode" id="cb181"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb181-1"><a href="#cb181-1" tabindex="-1"></a><span class="fu">FM_index</span>(<span class="fu">cutree</span>(hc1, <span class="at">k=</span><span class="dv">3</span>), <span class="fu">cutree</span>(hc2, <span class="at">k=</span><span class="dv">3</span>)) </span></code></pre></div>
<pre><code>#&gt; [1] 0.8059522
#&gt; attr(,&quot;E_FM&quot;)
#&gt; [1] 0.4462325
#&gt; attr(,&quot;V_FM&quot;)
#&gt; [1] 6.464092e-05</code></pre>
<p>We can take (under a normal asymptotic distribution)</p>
<div class="sourceCode" id="cb183"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb183-1"><a href="#cb183-1" tabindex="-1"></a><span class="fl">0.4462</span> <span class="sc">+</span> <span class="fl">1.645</span> <span class="sc">*</span> <span class="fu">sqrt</span>(<span class="fl">6.464092e-05</span>)</span></code></pre></div>
<pre><code>#&gt; [1] 0.4594257</code></pre>
<p>And since 0.8059 (our value) &gt; 0.4594 (the critical value under
H0, with alpha=5% for a one sided test) - then we can say that we
significantly reject the hypothesis that the two trees are
“not-similar”.</p>
</div>
<div id="the-bk-plot" class="section level4">
<h4>The Bk plot</h4>
<p>In the Bk method we calculate the FM Index (Bk) for each k
(k=2,3,…,n-1) number of clusters, giving the association between the two
trees when each is cut to have k groups. The similarity between two
hierarchical clustering dendrograms, can be investigated, using the
(k,Bk) plot: For every level of splitting of the two dendrograms which
produces k clusters in each tree, the plot shows the number Bk, and
therefore enables the investigation of potential nuances in the
structure of similarity. The Bk measures the number of pairs of items
which are in the same cluster in both dendrograms, one of the clusters
in one of the trees and one of the clusters in the other tree, divided
by the geometric mean of the number of pairs of items which are in the
same cluster in each tree. Namely, <span class="math inline">\({a_{uv}}
= 1\left( {or{\rm{ }}{{\rm{b}}_{uv}} = 1} \right)\)</span> if the items
u and v are in the same cluster in the first tree (second tree), when it
is cut so to give k clusters, and otherwise 0:</p>
<p><span class="math display">\[{FM_k} = {B_k} =
\frac{{\sum\limits_{}^{} {{a_{uv}}{b_{uv}}} }}{{\sqrt {\sum\limits_{}^{}
{{a_{uv}}} \sum\limits_{}^{} {{b_{uv}}} } }}\]</span></p>
<p>The Bk measure can be plotted for every value of k (except k=n) in
order to create the “(k,Bk) plot”. The plot compares the similarity of
the two trees for different cuts. The mean and variance of Bk, under the
null hypothesis (that the two trees are not “similar”), and under the
assumption that the margins of the matching matrix are fixed, are given
in Fowlkes and Mallows (see fowlkes 1983). They allow making inference
on whether the results obtained are different from what would have been
expected under the null hypothesis (of now particular order of the
trees labels).</p>
<p>The <code>Bk</code> and the <code>Bk_plot</code> functions allow the
calculation of the FM-Index for a range of k values on two trees. Here
are examples:</p>
<div class="sourceCode" id="cb185"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb185-1"><a href="#cb185-1" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">23235</span>)</span>
<span id="cb185-2"><a href="#cb185-2" tabindex="-1"></a>ss <span class="ot">&lt;-</span> <span class="cn">TRUE</span> <span class="co"># sample(1:150, 30 ) # TRUE #</span></span>
<span id="cb185-3"><a href="#cb185-3" tabindex="-1"></a>hc1 <span class="ot">&lt;-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[ss,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">&quot;com&quot;</span>)</span>
<span id="cb185-4"><a href="#cb185-4" tabindex="-1"></a>hc2 <span class="ot">&lt;-</span> <span class="fu">hclust</span>(<span class="fu">dist</span>(iris[ss,<span class="sc">-</span><span class="dv">5</span>]), <span class="st">&quot;single&quot;</span>)</span>
<span id="cb185-5"><a href="#cb185-5" tabindex="-1"></a>dend1 <span class="ot">&lt;-</span> <span class="fu">as.dendrogram</span>(hc1)</span>
<span id="cb185-6"><a href="#cb185-6" tabindex="-1"></a>dend2 <span class="ot">&lt;-</span> <span class="fu">as.dendrogram</span>(hc2)</span>
<span id="cb185-7"><a href="#cb185-7" tabindex="-1"></a><span class="co"># cutree(tree1) </span></span>
<span id="cb185-8"><a href="#cb185-8" tabindex="-1"></a></span>
<span id="cb185-9"><a href="#cb185-9" tabindex="-1"></a><span class="co"># It works the same for hclust and dendrograms:</span></span>
<span id="cb185-10"><a href="#cb185-10" tabindex="-1"></a><span class="fu">Bk</span>(hc1, hc2, <span class="at">k =</span> <span class="dv">3</span>)</span></code></pre></div>
<pre><code>#&gt; $`3`
#&gt; [1] 0.8059522
#&gt; attr(,&quot;E_FM&quot;)
#&gt; [1] 0.4462325
#&gt; attr(,&quot;V_FM&quot;)
#&gt; [1] 6.464092e-05</code></pre>
<div class="sourceCode" id="cb187"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb187-1"><a href="#cb187-1" tabindex="-1"></a><span class="fu">Bk</span>(dend1, dend2, <span class="at">k =</span> <span class="dv">3</span>)</span></code></pre></div>
<pre><code>#&gt; $`3`
#&gt; [1] 0.8059522
#&gt; attr(,&quot;E_FM&quot;)
#&gt; [1] 0.4462325
#&gt; attr(,&quot;V_FM&quot;)
#&gt; [1] 6.464092e-05</code></pre>
<p>The Bk plot:</p>
<div class="sourceCode" id="cb189"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb189-1"><a href="#cb189-1" tabindex="-1"></a><span class="fu">Bk_plot</span>(hc1, hc2, <span class="at">main =</span> <span class="st">&quot;WRONG Bk plot </span><span class="sc">\n</span><span class="st">(due to the way cutree works with ties in hclust)&quot;</span>, <span class="at">warn =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<p><img role="img" src="
<div class="sourceCode" id="cb190"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb190-1"><a href="#cb190-1" tabindex="-1"></a><span class="fu">Bk_plot</span>(dend1, dend2, <span class="at">main =</span> <span class="st">&quot;CORRECT Bk plot </span><span class="sc">\n</span><span class="st">(based on dendrograms)&quot;</span>)</span></code></pre></div>
<p><img role="img" src="
</div>
</div>
</div>
<div id="session-info" class="section level1">
<h1>Session info</h1>
<div class="sourceCode" id="cb191"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb191-1"><a href="#cb191-1" tabindex="-1"></a><span class="fu">sessionInfo</span>()</span></code></pre></div>
<pre><code>#&gt; R version 4.4.1 (2024-06-14)
#&gt; Platform: x86_64-apple-darwin20
#&gt; Running under: macOS Big Sur 11.7.10
#&gt;
#&gt; Matrix products: default
#&gt; BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
#&gt; LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
#&gt;
#&gt; locale:
#&gt; [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#&gt;
#&gt; time zone: America/New_York
#&gt; tzcode source: internal
#&gt;
#&gt; attached base packages:
#&gt; [1] stats graphics grDevices utils datasets methods base
#&gt;
#&gt; other attached packages:
#&gt; [1] corrplot_0.94 pvclust_2.2-0 dynamicTreeCut_1.63-1
#&gt; [4] gplots_3.1.3.1 DendSer_1.0.2 seriation_1.5.6
#&gt; [7] gclus_1.3.2 cluster_2.1.6 viridis_0.6.5
#&gt; [10] viridisLite_0.4.2 ggplot2_3.5.1 circlize_0.4.16
#&gt; [13] colorspace_2.1-1 knitr_1.48 dendextend_1.19.0
#&gt;
#&gt; loaded via a namespace (and not attached):
#&gt; [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3
#&gt; [4] bitops_1.0-8 KernSmooth_2.23-24 shape_1.4.6.1
#&gt; [7] gtools_3.9.5 digest_0.6.37 magrittr_2.0.3
#&gt; [10] caTools_1.18.2 evaluate_0.24.0 grid_4.4.1
#&gt; [13] iterators_1.0.14 fastmap_1.2.0 foreach_1.5.2
#&gt; [16] jsonlite_1.8.8 GlobalOptions_0.1.2 gridExtra_2.3
#&gt; [19] fansi_1.0.6 scales_1.3.0 codetools_0.2-20
#&gt; [22] jquerylib_0.1.4 registry_0.5-1 cli_3.6.3
#&gt; [25] rlang_1.1.4 munsell_0.5.1 withr_3.0.1
#&gt; [28] cachem_1.1.0 yaml_2.3.10 tools_4.4.1
#&gt; [31] dplyr_1.1.4 ca_0.71.1 TSP_1.2-4
#&gt; [34] vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4
#&gt; [37] MASS_7.3-60.2 pkgconfig_2.0.3 pillar_1.9.0
#&gt; [40] bslib_0.8.0 gtable_0.3.5 glue_1.7.0
#&gt; [43] xfun_0.47 tibble_3.2.1 tidyselect_1.2.1
#&gt; [46] highr_0.11 rstudioapi_0.16.0 farver_2.1.2
#&gt; [49] htmltools_0.5.8.1 rmarkdown_2.28 labeling_0.4.3
#&gt; [52] compiler_4.4.1</code></pre>
</div>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>