1145 lines
2.5 MiB
HTML
1145 lines
2.5 MiB
HTML
|
<!DOCTYPE html>
|
|||
|
|
|||
|
<html>
|
|||
|
|
|||
|
<head>
|
|||
|
|
|||
|
<meta charset="utf-8" />
|
|||
|
<meta name="generator" content="pandoc" />
|
|||
|
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
|
|||
|
|
|||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|||
|
|
|||
|
<meta name="author" content="Tal Galili" />
|
|||
|
|
|||
|
<meta name="date" content="2024-11-15" />
|
|||
|
|
|||
|
<title>Hierarchical cluster analysis on famous data sets - enhanced with the dendextend package</title>
|
|||
|
|
|||
|
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
|
|||
|
// be compatible with the behavior of Pandoc < 2.8).
|
|||
|
document.addEventListener('DOMContentLoaded', function(e) {
|
|||
|
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
|
|||
|
var i, h, a;
|
|||
|
for (i = 0; i < hs.length; i++) {
|
|||
|
h = hs[i];
|
|||
|
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
|
|||
|
a = h.attributes;
|
|||
|
while (a.length > 0) h.removeAttribute(a[0].name);
|
|||
|
}
|
|||
|
});
|
|||
|
</script>
|
|||
|
|
|||
|
<style type="text/css">
|
|||
|
code{white-space: pre-wrap;}
|
|||
|
span.smallcaps{font-variant: small-caps;}
|
|||
|
span.underline{text-decoration: underline;}
|
|||
|
div.column{display: inline-block; vertical-align: top; width: 50%;}
|
|||
|
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
|||
|
ul.task-list{list-style: none;}
|
|||
|
</style>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style type="text/css">
|
|||
|
code {
|
|||
|
white-space: pre;
|
|||
|
}
|
|||
|
.sourceCode {
|
|||
|
overflow: visible;
|
|||
|
}
|
|||
|
</style>
|
|||
|
<style type="text/css" data-origin="pandoc">
|
|||
|
pre > code.sourceCode { white-space: pre; position: relative; }
|
|||
|
pre > code.sourceCode > span { line-height: 1.25; }
|
|||
|
pre > code.sourceCode > span:empty { height: 1.2em; }
|
|||
|
.sourceCode { overflow: visible; }
|
|||
|
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
|||
|
div.sourceCode { margin: 1em 0; }
|
|||
|
pre.sourceCode { margin: 0; }
|
|||
|
@media screen {
|
|||
|
div.sourceCode { overflow: auto; }
|
|||
|
}
|
|||
|
@media print {
|
|||
|
pre > code.sourceCode { white-space: pre-wrap; }
|
|||
|
pre > code.sourceCode > span { display: inline-block; text-indent: -5em; padding-left: 5em; }
|
|||
|
}
|
|||
|
pre.numberSource code
|
|||
|
{ counter-reset: source-line 0; }
|
|||
|
pre.numberSource code > span
|
|||
|
{ position: relative; left: -4em; counter-increment: source-line; }
|
|||
|
pre.numberSource code > span > a:first-child::before
|
|||
|
{ content: counter(source-line);
|
|||
|
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
|||
|
border: none; display: inline-block;
|
|||
|
-webkit-touch-callout: none; -webkit-user-select: none;
|
|||
|
-khtml-user-select: none; -moz-user-select: none;
|
|||
|
-ms-user-select: none; user-select: none;
|
|||
|
padding: 0 4px; width: 4em;
|
|||
|
color: #aaaaaa;
|
|||
|
}
|
|||
|
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
|
|||
|
div.sourceCode
|
|||
|
{ }
|
|||
|
@media screen {
|
|||
|
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
|||
|
}
|
|||
|
code span.al { color: #ff0000; font-weight: bold; }
|
|||
|
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.at { color: #7d9029; }
|
|||
|
code span.bn { color: #40a070; }
|
|||
|
code span.bu { color: #008000; }
|
|||
|
code span.cf { color: #007020; font-weight: bold; }
|
|||
|
code span.ch { color: #4070a0; }
|
|||
|
code span.cn { color: #880000; }
|
|||
|
code span.co { color: #60a0b0; font-style: italic; }
|
|||
|
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.do { color: #ba2121; font-style: italic; }
|
|||
|
code span.dt { color: #902000; }
|
|||
|
code span.dv { color: #40a070; }
|
|||
|
code span.er { color: #ff0000; font-weight: bold; }
|
|||
|
code span.ex { }
|
|||
|
code span.fl { color: #40a070; }
|
|||
|
code span.fu { color: #06287e; }
|
|||
|
code span.im { color: #008000; font-weight: bold; }
|
|||
|
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.kw { color: #007020; font-weight: bold; }
|
|||
|
code span.op { color: #666666; }
|
|||
|
code span.ot { color: #007020; }
|
|||
|
code span.pp { color: #bc7a00; }
|
|||
|
code span.sc { color: #4070a0; }
|
|||
|
code span.ss { color: #bb6688; }
|
|||
|
code span.st { color: #4070a0; }
|
|||
|
code span.va { color: #19177c; }
|
|||
|
code span.vs { color: #4070a0; }
|
|||
|
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
</style>
|
|||
|
<script>
|
|||
|
// apply pandoc div.sourceCode style to pre.sourceCode instead
|
|||
|
(function() {
|
|||
|
var sheets = document.styleSheets;
|
|||
|
for (var i = 0; i < sheets.length; i++) {
|
|||
|
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
|
|||
|
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
|
|||
|
var j = 0;
|
|||
|
while (j < rules.length) {
|
|||
|
var rule = rules[j];
|
|||
|
// check if there is a div.sourceCode rule
|
|||
|
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
|
|||
|
j++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
var style = rule.style.cssText;
|
|||
|
// check if color or background-color is set
|
|||
|
if (rule.style.color === '' && rule.style.backgroundColor === '') {
|
|||
|
j++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
// replace div.sourceCode by a pre.sourceCode rule
|
|||
|
sheets[i].deleteRule(j);
|
|||
|
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
|
|||
|
}
|
|||
|
}
|
|||
|
})();
|
|||
|
</script>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style type="text/css">body {
|
|||
|
background-color: #fff;
|
|||
|
margin: 1em auto;
|
|||
|
max-width: 700px;
|
|||
|
overflow: visible;
|
|||
|
padding-left: 2em;
|
|||
|
padding-right: 2em;
|
|||
|
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
|||
|
font-size: 14px;
|
|||
|
line-height: 1.35;
|
|||
|
}
|
|||
|
#TOC {
|
|||
|
clear: both;
|
|||
|
margin: 0 0 10px 10px;
|
|||
|
padding: 4px;
|
|||
|
width: 400px;
|
|||
|
border: 1px solid #CCCCCC;
|
|||
|
border-radius: 5px;
|
|||
|
background-color: #f6f6f6;
|
|||
|
font-size: 13px;
|
|||
|
line-height: 1.3;
|
|||
|
}
|
|||
|
#TOC .toctitle {
|
|||
|
font-weight: bold;
|
|||
|
font-size: 15px;
|
|||
|
margin-left: 5px;
|
|||
|
}
|
|||
|
#TOC ul {
|
|||
|
padding-left: 40px;
|
|||
|
margin-left: -1.5em;
|
|||
|
margin-top: 5px;
|
|||
|
margin-bottom: 5px;
|
|||
|
}
|
|||
|
#TOC ul ul {
|
|||
|
margin-left: -2em;
|
|||
|
}
|
|||
|
#TOC li {
|
|||
|
line-height: 16px;
|
|||
|
}
|
|||
|
table {
|
|||
|
margin: 1em auto;
|
|||
|
border-width: 1px;
|
|||
|
border-color: #DDDDDD;
|
|||
|
border-style: outset;
|
|||
|
border-collapse: collapse;
|
|||
|
}
|
|||
|
table th {
|
|||
|
border-width: 2px;
|
|||
|
padding: 5px;
|
|||
|
border-style: inset;
|
|||
|
}
|
|||
|
table td {
|
|||
|
border-width: 1px;
|
|||
|
border-style: inset;
|
|||
|
line-height: 18px;
|
|||
|
padding: 5px 5px;
|
|||
|
}
|
|||
|
table, table th, table td {
|
|||
|
border-left-style: none;
|
|||
|
border-right-style: none;
|
|||
|
}
|
|||
|
table thead, table tr.even {
|
|||
|
background-color: #f7f7f7;
|
|||
|
}
|
|||
|
p {
|
|||
|
margin: 0.5em 0;
|
|||
|
}
|
|||
|
blockquote {
|
|||
|
background-color: #f6f6f6;
|
|||
|
padding: 0.25em 0.75em;
|
|||
|
}
|
|||
|
hr {
|
|||
|
border-style: solid;
|
|||
|
border: none;
|
|||
|
border-top: 1px solid #777;
|
|||
|
margin: 28px 0;
|
|||
|
}
|
|||
|
dl {
|
|||
|
margin-left: 0;
|
|||
|
}
|
|||
|
dl dd {
|
|||
|
margin-bottom: 13px;
|
|||
|
margin-left: 13px;
|
|||
|
}
|
|||
|
dl dt {
|
|||
|
font-weight: bold;
|
|||
|
}
|
|||
|
ul {
|
|||
|
margin-top: 0;
|
|||
|
}
|
|||
|
ul li {
|
|||
|
list-style: circle outside;
|
|||
|
}
|
|||
|
ul ul {
|
|||
|
margin-bottom: 0;
|
|||
|
}
|
|||
|
pre, code {
|
|||
|
background-color: #f7f7f7;
|
|||
|
border-radius: 3px;
|
|||
|
color: #333;
|
|||
|
white-space: pre-wrap;
|
|||
|
}
|
|||
|
pre {
|
|||
|
border-radius: 3px;
|
|||
|
margin: 5px 0px 10px 0px;
|
|||
|
padding: 10px;
|
|||
|
}
|
|||
|
pre:not([class]) {
|
|||
|
background-color: #f7f7f7;
|
|||
|
}
|
|||
|
code {
|
|||
|
font-family: Consolas, Monaco, 'Courier New', monospace;
|
|||
|
font-size: 85%;
|
|||
|
}
|
|||
|
p > code, li > code {
|
|||
|
padding: 2px 0px;
|
|||
|
}
|
|||
|
div.figure {
|
|||
|
text-align: center;
|
|||
|
}
|
|||
|
img {
|
|||
|
background-color: #FFFFFF;
|
|||
|
padding: 2px;
|
|||
|
border: 1px solid #DDDDDD;
|
|||
|
border-radius: 3px;
|
|||
|
border: 1px solid #CCCCCC;
|
|||
|
margin: 0 5px;
|
|||
|
}
|
|||
|
h1 {
|
|||
|
margin-top: 0;
|
|||
|
font-size: 35px;
|
|||
|
line-height: 40px;
|
|||
|
}
|
|||
|
h2 {
|
|||
|
border-bottom: 4px solid #f7f7f7;
|
|||
|
padding-top: 10px;
|
|||
|
padding-bottom: 2px;
|
|||
|
font-size: 145%;
|
|||
|
}
|
|||
|
h3 {
|
|||
|
border-bottom: 2px solid #f7f7f7;
|
|||
|
padding-top: 10px;
|
|||
|
font-size: 120%;
|
|||
|
}
|
|||
|
h4 {
|
|||
|
border-bottom: 1px solid #f7f7f7;
|
|||
|
margin-left: 8px;
|
|||
|
font-size: 105%;
|
|||
|
}
|
|||
|
h5, h6 {
|
|||
|
border-bottom: 1px solid #ccc;
|
|||
|
font-size: 105%;
|
|||
|
}
|
|||
|
a {
|
|||
|
color: #0033dd;
|
|||
|
text-decoration: none;
|
|||
|
}
|
|||
|
a:hover {
|
|||
|
color: #6666ff; }
|
|||
|
a:visited {
|
|||
|
color: #800080; }
|
|||
|
a:visited:hover {
|
|||
|
color: #BB00BB; }
|
|||
|
a[href^="http:"] {
|
|||
|
text-decoration: underline; }
|
|||
|
a[href^="https:"] {
|
|||
|
text-decoration: underline; }
|
|||
|
|
|||
|
code > span.kw { color: #555; font-weight: bold; }
|
|||
|
code > span.dt { color: #902000; }
|
|||
|
code > span.dv { color: #40a070; }
|
|||
|
code > span.bn { color: #d14; }
|
|||
|
code > span.fl { color: #d14; }
|
|||
|
code > span.ch { color: #d14; }
|
|||
|
code > span.st { color: #d14; }
|
|||
|
code > span.co { color: #888888; font-style: italic; }
|
|||
|
code > span.ot { color: #007020; }
|
|||
|
code > span.al { color: #ff0000; font-weight: bold; }
|
|||
|
code > span.fu { color: #900; font-weight: bold; }
|
|||
|
code > span.er { color: #a61717; background-color: #e3d2d2; }
|
|||
|
</style>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
</head>
|
|||
|
|
|||
|
<body>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<h1 class="title toc-ignore">Hierarchical cluster analysis on famous
|
|||
|
data sets - enhanced with the <em>dendextend</em> package</h1>
|
|||
|
<h4 class="author">Tal Galili</h4>
|
|||
|
<h4 class="date">2024-11-15</h4>
|
|||
|
|
|||
|
|
|||
|
<div id="TOC">
|
|||
|
<ul>
|
|||
|
<li><a href="#introduction" id="toc-introduction">Introduction</a></li>
|
|||
|
<li><a href="#iris---edgar-andersons-iris-data" id="toc-iris---edgar-andersons-iris-data">iris - Edgar Anderson’s Iris
|
|||
|
Data</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#background" id="toc-background">Background</a></li>
|
|||
|
<li><a href="#the-3-clusters-from-the-complete-method-vs-the-real-species-category" id="toc-the-3-clusters-from-the-complete-method-vs-the-real-species-category">The
|
|||
|
3 clusters from the “complete” method vs the real species
|
|||
|
category</a></li>
|
|||
|
<li><a href="#similaritydifference-between-various-clustering-algorithms" id="toc-similaritydifference-between-various-clustering-algorithms">Similarity/difference
|
|||
|
between various clustering algorithms</a></li>
|
|||
|
<li><a href="#clustering-prediction-of-the-3-species-classes" id="toc-clustering-prediction-of-the-3-species-classes">Clustering
|
|||
|
prediction of the 3 species classes</a></li>
|
|||
|
<li><a href="#conclusion" id="toc-conclusion">Conclusion</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#khan---microarray-gene-expression-data-set-from-khan-et-al.-2001.-subset-of-306-genes." id="toc-khan---microarray-gene-expression-data-set-from-khan-et-al.-2001.-subset-of-306-genes.">khan
|
|||
|
- Microarray gene expression data set from Khan et al., 2001. Subset of
|
|||
|
306 genes.</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#background-1" id="toc-background-1">Background</a></li>
|
|||
|
<li><a href="#comparing-the-train-vs-test-dendrograms" id="toc-comparing-the-train-vs-test-dendrograms">Comparing the train vs
|
|||
|
test dendrograms</a></li>
|
|||
|
<li><a href="#conclusion-1" id="toc-conclusion-1">Conclusion</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#votes.repub---votes-for-republican-candidate-in-presidential-elections" id="toc-votes.repub---votes-for-republican-candidate-in-presidential-elections">votes.repub
|
|||
|
- Votes for Republican Candidate in Presidential Elections</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#background-2" id="toc-background-2">Background</a></li>
|
|||
|
<li><a href="#heatmap" id="toc-heatmap">Heatmap</a></li>
|
|||
|
</ul></li>
|
|||
|
<li><a href="#animals---attributes-of-animals" id="toc-animals---attributes-of-animals">animals - Attributes of
|
|||
|
Animals</a>
|
|||
|
<ul>
|
|||
|
<li><a href="#background-3" id="toc-background-3">Background</a></li>
|
|||
|
<li><a href="#heatmap-1" id="toc-heatmap-1">Heatmap</a></li>
|
|||
|
</ul></li>
|
|||
|
</ul>
|
|||
|
</div>
|
|||
|
|
|||
|
<!--
|
|||
|
%\VignetteEngine{knitr::rmarkdown}
|
|||
|
%\VignetteIndexEntry{Hierarchical cluster analysis on famous data sets - enhanced with the _dendextend_ package}
|
|||
|
-->
|
|||
|
<div id="introduction" class="section level2">
|
|||
|
<h2>Introduction</h2>
|
|||
|
<p>This document demonstrates, on several famous data sets, how the
|
|||
|
<em>dendextend</em> R package can be used to enhance Hierarchical
|
|||
|
Cluster Analysis (through better visualization and sensitivity
|
|||
|
analysis).</p>
|
|||
|
</div>
|
|||
|
<div id="iris---edgar-andersons-iris-data" class="section level2">
|
|||
|
<h2>iris - Edgar Anderson’s Iris Data</h2>
|
|||
|
<div id="background" class="section level3">
|
|||
|
<h3>Background</h3>
|
|||
|
<blockquote>
|
|||
|
<p>The famous (Fisher’s or Anderson’s) iris data set gives the
|
|||
|
measurements in centimeters of the variables sepal length and width and
|
|||
|
petal length and width, respectively, for 50 flowers from each of 3
|
|||
|
species of iris. The species are Iris setosa, versicolor, and virginica.
|
|||
|
(from <code>?iris</code>)</p>
|
|||
|
</blockquote>
|
|||
|
<p>The <a href="https://en.wikipedia.org/wiki/Iris_flower_data_set">Iris
|
|||
|
flower data set</a> is fun for learning supervised classification
|
|||
|
algorithms, and is known as a difficult case for unsupervised learning.
|
|||
|
This is easily seen through the following Scatter Plot Matrix
|
|||
|
(SPLOM):</p>
|
|||
|
<p>Define variables:</p>
|
|||
|
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a>iris <span class="ot"><-</span> datasets<span class="sc">::</span>iris</span>
|
|||
|
<span id="cb1-2"><a href="#cb1-2" tabindex="-1"></a>iris2 <span class="ot"><-</span> iris[,<span class="sc">-</span><span class="dv">5</span>]</span>
|
|||
|
<span id="cb1-3"><a href="#cb1-3" tabindex="-1"></a>species_labels <span class="ot"><-</span> iris[,<span class="dv">5</span>]</span>
|
|||
|
<span id="cb1-4"><a href="#cb1-4" tabindex="-1"></a><span class="fu">library</span>(colorspace) <span class="co"># get nice colors</span></span>
|
|||
|
<span id="cb1-5"><a href="#cb1-5" tabindex="-1"></a>species_col <span class="ot"><-</span> <span class="fu">rev</span>(<span class="fu">rainbow_hcl</span>(<span class="dv">3</span>))[<span class="fu">as.numeric</span>(species_labels)]</span></code></pre></div>
|
|||
|
<p>SPLOM:</p>
|
|||
|
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a><span class="co"># Plot a SPLOM:</span></span>
|
|||
|
<span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a><span class="fu">pairs</span>(iris2, <span class="at">col =</span> species_col,</span>
|
|||
|
<span id="cb2-3"><a href="#cb2-3" tabindex="-1"></a> <span class="at">lower.panel =</span> <span class="cn">NULL</span>,</span>
|
|||
|
<span id="cb2-4"><a href="#cb2-4" tabindex="-1"></a> <span class="at">cex.labels=</span><span class="dv">2</span>, <span class="at">pch=</span><span class="dv">19</span>, <span class="at">cex =</span> <span class="fl">1.2</span>)</span>
|
|||
|
<span id="cb2-5"><a href="#cb2-5" tabindex="-1"></a></span>
|
|||
|
<span id="cb2-6"><a href="#cb2-6" tabindex="-1"></a><span class="co"># Add a legend</span></span>
|
|||
|
<span id="cb2-7"><a href="#cb2-7" tabindex="-1"></a><span class="fu">par</span>(<span class="at">xpd =</span> <span class="cn">TRUE</span>)</span>
|
|||
|
<span id="cb2-8"><a href="#cb2-8" tabindex="-1"></a><span class="fu">legend</span>(<span class="at">x =</span> <span class="fl">0.05</span>, <span class="at">y =</span> <span class="fl">0.4</span>, <span class="at">cex =</span> <span class="dv">2</span>,</span>
|
|||
|
<span id="cb2-9"><a href="#cb2-9" tabindex="-1"></a> <span class="at">legend =</span> <span class="fu">as.character</span>(<span class="fu">levels</span>(species_labels)),</span>
|
|||
|
<span id="cb2-10"><a href="#cb2-10" tabindex="-1"></a> <span class="at">fill =</span> <span class="fu">unique</span>(species_col))</span>
|
|||
|
<span id="cb2-11"><a href="#cb2-11" tabindex="-1"></a><span class="fu">par</span>(<span class="at">xpd =</span> <span class="cn">NA</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqMAAAKjCAYAAAApnwZoAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAKjoAMABAAAAAEAAAKjAAAAANgC0G8AAEAASURBVHgB7F0HfBVV1j/pvSckIZAQegdBBFSKSBMBRcXeu66urmVX17XX/XR37WtbO3axgghKFUF67y0BQkjvPXnf+d+XeZl57yUkeSUv4ZzfL3kzd+7c8p87d8499xQvExMJCQKCgCAgCAgCgoAgIAgIAm2AgHcb1ClVCgKCgCAgCAgCgoAgIAgIAgoBYUZlIAgCgoAgIAgIAoKAICAItBkCwoy2GfRSsSAgCAgCgoAgIAgIAoKAMKMyBgQBQUAQEAQEAUFAEBAE2gwBYUbbDHqpWBAQBAQBQUAQEAQEAUFAmFEZA4KAICAICAKCgCAgCAgCbYaAMKNtBr1ULAgIAoKAICAICAKCgCAgzKiMAUFAEBAEBAFBQBAQBASBNkNAmNE2g14qFgQEAUFAEBAEBAFBQBAQZlTGgCAgCAgCgoAgIAgIAoJAmyEgzGibQS8VCwKCgCAgCAgCgoAgIAgIMypjQBAQBAQBQUAQEAQEAUGgzRAQZrTNoJeKBQFBQBAQBAQBQUAQEASEGZUxIAgIAoKAICAICAKCgCDQZggIM9pm0EvFgoAgIAgIAoKAICAICALCjMoYEAQEAUFAEBAEBAFBQBBoMwSEGW0z6KViQUAQEAQEAUFAEBAEBAFhRmUMCAKCgCAgCAgCgoAgIAi0GQLCjLYZ9FKxICAICAKCgCAgCAgCgoAwozIGBAFBQBAQBAQBQUAQEATaDAFhRtsMeqlYEBAEBAFBQBAQBAQBQUCYURkDgoAgIAgIAoKAICAICAJthoAwo20GvVQsCAgCgoAgIAgIAoKAICDMqIwBQUAQEAQEAUFAEBAEBIE2Q0CY0TaDXioWBAQBQUAQEAQEAUFAEBBmVMaAICAICAKCgCAgCAgCgkCbIdAhmdHnnnuOPvnkEwOoGzdupLPOOovOO+88+utf/2q45qqTpuqcM2cOTZ06VbXnrbfeclUTDOVu3bqVHnjgAbryyitp+/bthmsPPfSQagvw+fXXXw3XXHGyd+9emjlzpvqbMGECPf3004ZqLrnkEkt70tLSDNecfbJlyxa64YYb1N9PP/1kKP63334jtAVtzcjIMFxrzckPP/yg6rnxxhttnoGzx8Qrr7yi6rr99tupqqrK0NwPP/xQ9evqq6+myspKw7XWnNx333103XXX2TxHlOWKZzl//nz629/+Zmgq+og2YAwDS08jzDtLlixxWrPy8vLo5ptvpptuuon++9//Oq3cbdu2KRzx3NavX++0clFQWVkZjRo1yqllumru+vnnn+nee++la665hoqKipzSZswv2rx3xhln0DfffOOUch0p5OGHH6bZs2cTfvVUUVFBZ555pnqfLrzwQv2lVh9XV1fT9OnTKT8/31DGkSNH1Dxx7rnn0po1awzXWnuSnZ1N06ZNs7m9qe+yTeZmJDQ1pzv7+4E5Hd8OzOnWGLrqPWgGBM7JYupgxAPZNGLECNPbb79t6BlP1qYff/zRkObqk6bq5AFlOnbsmKubYCifJwFTcXGx6fjx46Yvv/zScI0nAcO5u07q6upM559/vunAgQOWKpk5Ml1wwQWWc1cf/P3vfzfxxGWqra01MWNsqI4XDCaeQE2bNm0y3XXXXYZrrTlhxkHdxgy2iRlBQxHOHBPl5eWmZ555RpXPTJtp6dKllrowBoA56H//+5+JGVPLtdYc7Nmzx/TRRx+pWydNmmTiD7elGFc8y6ysLBOey6233mqpBwevv/666euvvzZhTOE6f0wN19vyhBkaEzMfJmZwnNaMjz/+2LRs2TJVHjPgpsLCQqeUzR88EzDev3+/6bLLLnNKmVohvGgx9enTRzt1yq8r5i6M4RkzZqj2rV271oTvijMJz2ry5MkmZs6dWWyLy/rjjz9MmP9AzOCYWEhhKQP9fuyxxyznjh5g3pkyZYpp4MCBptzcXENxmPsOHjxoKikpMTnjeaIfmAPAC1hTU99l67zNOW9qTnfm9wNj8p577lFNwjz38ssvG5rnDNwMBbr5pENJRktLS4kfEP3lL3+x4dSZmaCvvvqKsMJbsGCBzXVXJDRVJySTzz77rFoNQhLhasKKFJKUu+++mx588EE69dRTLVXi2s6dO+mWW25RkrScnBzLNVcfMBND48aNo9TUVEtVwAbSUEjt7r//fuJ3wnLNFQeQysbGxhKeQ1hYmKUKZmrIx8eHfH19afDgwQojy8VWHmhScEjIunfvbijFmWMiMDBQPed3331XSbdOOeUUS13M+BMzBOoc6Y6Ov169eilp+yOPPELR0dEGDF3xLPnjSY8++qilP9oB+oH+eHl5Ubdu3QjSFk+hF198kWbNmqXa5qw2XXHFFTR27Fj1XmdmZlJwcLBTir7jjjsIUqU777yTUIezaN68edSzZ09KSUlxVpHkqrkL86G/vz8xM06ffvop9evXz2ltRkGQQv7jH/+goKAgp5bb0sLwfg4dOlTdZj0XQIIIKeXFF1/sFMl7QEAAzZ07lwYNGmTTTLyreGdDQkJsdnFsM
|
|||
|
<p>We can see that the <em>Setosa</em> species are distinctly different
|
|||
|
from <em>Versicolor</em> and <em>Virginica</em> (they have lower petal
|
|||
|
length and width). But <em>Versicolor</em> and <em>Virginica</em> cannot
|
|||
|
easily be separated based on measurements of their sepal and petal
|
|||
|
width/length.</p>
|
|||
|
<p>The same conclusion can be made by looking at the parallel
|
|||
|
coordinates plot of the data:</p>
|
|||
|
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a><span class="co"># http://blog.safaribooksonline.com/2014/03/31/mastering-parallel-coordinate-charts-r/</span></span>
|
|||
|
<span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a><span class="fu">par</span>(<span class="at">las =</span> <span class="dv">1</span>, <span class="at">mar =</span> <span class="fu">c</span>(<span class="fl">4.5</span>, <span class="dv">3</span>, <span class="dv">3</span>, <span class="dv">2</span>) <span class="sc">+</span> <span class="fl">0.1</span>, <span class="at">cex =</span> .<span class="dv">8</span>)</span>
|
|||
|
<span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a>MASS<span class="sc">::</span><span class="fu">parcoord</span>(iris2, <span class="at">col =</span> species_col, <span class="at">var.label =</span> <span class="cn">TRUE</span>, <span class="at">lwd =</span> <span class="dv">2</span>)</span>
|
|||
|
<span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb3-5"><a href="#cb3-5" tabindex="-1"></a><span class="co"># Add Title</span></span>
|
|||
|
<span id="cb3-6"><a href="#cb3-6" tabindex="-1"></a><span class="fu">title</span>(<span class="st">"Parallel coordinates plot of the Iris data"</span>)</span>
|
|||
|
<span id="cb3-7"><a href="#cb3-7" tabindex="-1"></a><span class="co"># Add a legend</span></span>
|
|||
|
<span id="cb3-8"><a href="#cb3-8" tabindex="-1"></a><span class="fu">par</span>(<span class="at">xpd =</span> <span class="cn">TRUE</span>)</span>
|
|||
|
<span id="cb3-9"><a href="#cb3-9" tabindex="-1"></a><span class="fu">legend</span>(<span class="at">x =</span> <span class="fl">1.75</span>, <span class="at">y =</span> <span class="sc">-</span>.<span class="dv">25</span>, <span class="at">cex =</span> <span class="dv">1</span>,</span>
|
|||
|
<span id="cb3-10"><a href="#cb3-10" tabindex="-1"></a> <span class="at">legend =</span> <span class="fu">as.character</span>(<span class="fu">levels</span>(species_labels)),</span>
|
|||
|
<span id="cb3-11"><a href="#cb3-11" tabindex="-1"></a> <span class="at">fill =</span> <span class="fu">unique</span>(species_col), <span class="at">horiz =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAADhCAYAAABbV7VpAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAADhAAAAAKk8O2IAAEAASURBVHgB7L0HlF3Xdd+9p/eO3kECIAA2kCAJdoq9iSpWb5Y+y5bsxPJyke0kK3GyVlYSx3Icx5LVbblIVpfYRIqixCZWsBMAid7LANN7n/f9f/u98+bOw3szAxAgB8A9wJvbzz1333PP/+yel1CxuMQUiCkQUyCmQEyBM5QC+Wfoc8ePHVMgpkBMgZgCMQWcAjEQxh0hpkBMgZgCMQXOaArEQHhGv/744WMKxBSIKRBTIAbCuA/EFIgpEFMgpsAZTYEYCM/o1x8/fEyBmAIxBWIKxEAY94GYAjEFYgrEFDijKRAD4Rn9+uOHjykQUyCmQEyBGAjjPhBTIKZATIGYAmc0BWIgPKNff/zwMQViCsQUiCkQA2HcB2IKxBSIKRBT4IymQAyEZ/Trjx8+pkBMgZgCMQViIIz7QEyBmAIxBWIKnNEUiIHwjH798cPHFDh+CjQ3N9vQ0NDxVxBfGVNgmlAgBsJp8iJORjMuvfRSy8vL819hYaHV1dXZH/7hH1pfX98Jvd3FF19sq1at8jr/8i//0u+3cePGCe+xY8cOP++//tf/OuF5b+XBr3zlK96mJ5980k5U+1paWuw//+f//JY9RvRdTHbTF154wf7hH/5hstOOOt7T02OXXHKJzZw50xYsWHDU8Wi927dvd5r+t//2344671h2rFy50hYuXJjzkmN57lyVRN9/rnPYPzo6at/85jft5Zdfnui0+NgpRIEYCE+hl3U8TS0rK/PB7otf/KJdf/319v/+3/8z1uMyMQVmzJhh//f//l+77bbbJj5xkqMf//jH7atf/eokZ731hzs7O+2yyy4zQOtYyyuvvGIvvvii/dmf/Zk999xz4y5/M/WOq+gYNz7/+c/bf/yP//EYrzq+0//5n//Zfud3fsfa29uPr4L4qmlHgRgIp90rObENKi4utt/6rd+y3/u937Ovf/3rXvkjjzziy1dffdXuuusu5xSZcf+f//N/fP+GDRucw/uf//N/2ty5c+1zn/ucDQ4O2mc/+1lbsmSJ7/vQhz5kXV1dkzb2oYcestWrV1t9fb195CMfsY6Ojkmv4QTA+qyzzrL58+f7fcO9SJ/5n/7Tf7Lly5d7O/7gD/7A+vv703Xee++9dvXVV1ttba3dfPPNtm3bNj+W7ZlGRkaMAbShocHWrl1rmzZtStfDgP61r30tDRTvec977DOf+YwPtrNmzbILL7zQoB8lF20AwCeeeMIHTDjm3t5eO3TokN1xxx1WXV1tcOzr169P3xNumnZAc+h78ODB9LGwsnnzZn83X/7yl+3aa6/1tv/xH/+x8SzZSi56ANDQ8nvf+54/V+a1uej8xhtv+Hvk/H/913+1b33rW+MuzVXv4cOH7dZbb/X3wjkDAwN+3fH0j7/+67+28847z/7H//gfxrt44IEH7Mc//rF9//vf9zoPHDhgv/u7v+t95/zzz8/JkU/0/qkIbnndunVWU1Njl19+ub+r3bt3e//j+Kc+9Sn7yU9+kvP9c05cThEKqMPH5TSlgMRXiaqqqoTEfAnN/BN/+qd/ShLmhEAxIfFOQh95YtGiRQkNKIkLLrjAj2nASmhw9vWioqLEO97xjoRAKfGFL3whITFr4s///M8TAjQ//qUvfckpd9FFFyUEpL7+v/7X//JjAp7Evn37EiUlJQmBQOJP/uRPEhUVFQkNgn6eRGZ+3l/8xV8cRf177rnHjwkUEv/+3/97v6+4Dz+P+/MMV155pbeN9d/+7d/2Y08//bTfb86cOYmPfexjvi4gTXR3d2d9pn/8x3/0uqATNNGkwbd//etfJzLbB30kXk5oME9IvOzn3XnnnX7fXLThOQRq3o7f/M3fTAiwE+LKE5WVlU4PaKYJgrfvmWee8ToF8om/+7u/S5SXlyfEdRxFG3Fjfh5t5T2sWbPGtx988EE/N/ouJqIH9IR2K1asSAhYjrpPLjrv2bMnIS7Zr73uuusS//Iv/zLu2sx6NRHxcwsKChIcu+WWW3z729/+9oT9Y1yl2jjnnHMSEsP67tCPBYIJcbWJXbt2JaLP/Ud/9EeJefPmJTQRcRrynALczCoTE73/rVu3en/gWSW+9/fB82oik7jiiiv8GXgWTXQm/DaOumm8Y1pSgFlhXE5TCjDAMwhEf+KUEgxOFHEcPoiwZNDlPIm60qDxgQ98IE0ZwEQcUEJcTeIHP/iBnwu4UaKDUBQI/+qv/srPEwfq5wFYgIl0lEcBjZ+Q+vPe977Xr2PQoUgfk9CsP6EZfEKzcwft1Kk+KOXn5yckpkqI8/XrxDX5YYk2fVsz+6zPBJDxzOE+PC/buYAQIAfMKOKME+eee66vT0QbcaUJcZx+nrgJrx/QpTA4c79/+7d/S/z0pz/1dXGCCXFxicbGRj8n808AQnGVfqi1tdUnCuKMfTv6LiaiB/Ti3qEt0ftMRuef//znfq1EhNHLfD2z3gCEPBdFumO/9r//9/+emKh/+MmRP9mAkH4YSvS5P/rRj/pk47/8l/+SkAg3A
|
|||
|
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">xpd =</span> <span class="cn">NA</span>)</span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="the-3-clusters-from-the-complete-method-vs-the-real-species-category" class="section level3">
|
|||
|
<h3>The 3 clusters from the “complete” method vs the real species
|
|||
|
category</h3>
|
|||
|
<p>The default hierarchical clustering method in <code>hclust</code> is
|
|||
|
“complete”. We can visualize the result of running it by turning the
|
|||
|
object to a dendrogram and making several adjustments to the object,
|
|||
|
such as: changing the labels, coloring the labels based on the real
|
|||
|
species category, and coloring the branches based on cutting the tree
|
|||
|
into three clusters.</p>
|
|||
|
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a>d_iris <span class="ot"><-</span> <span class="fu">dist</span>(iris2) <span class="co"># method="man" # is a bit better</span></span>
|
|||
|
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a>hc_iris <span class="ot"><-</span> <span class="fu">hclust</span>(d_iris, <span class="at">method =</span> <span class="st">"complete"</span>)</span>
|
|||
|
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a>iris_species <span class="ot"><-</span> <span class="fu">rev</span>(<span class="fu">levels</span>(iris[,<span class="dv">5</span>]))</span>
|
|||
|
<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-5"><a href="#cb5-5" tabindex="-1"></a><span class="fu">library</span>(dendextend)</span>
|
|||
|
<span id="cb5-6"><a href="#cb5-6" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">as.dendrogram</span>(hc_iris)</span>
|
|||
|
<span id="cb5-7"><a href="#cb5-7" tabindex="-1"></a><span class="co"># order it the closest we can to the order of the observations:</span></span>
|
|||
|
<span id="cb5-8"><a href="#cb5-8" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">rotate</span>(dend, <span class="dv">1</span><span class="sc">:</span><span class="dv">150</span>)</span>
|
|||
|
<span id="cb5-9"><a href="#cb5-9" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-10"><a href="#cb5-10" tabindex="-1"></a><span class="co"># Color the branches based on the clusters:</span></span>
|
|||
|
<span id="cb5-11"><a href="#cb5-11" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">color_branches</span>(dend, <span class="at">k=</span><span class="dv">3</span>) <span class="co">#, groupLabels=iris_species)</span></span>
|
|||
|
<span id="cb5-12"><a href="#cb5-12" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-13"><a href="#cb5-13" tabindex="-1"></a><span class="co"># Manually match the labels, as much as possible, to the real classification of the flowers:</span></span>
|
|||
|
<span id="cb5-14"><a href="#cb5-14" tabindex="-1"></a><span class="fu">labels_colors</span>(dend) <span class="ot"><-</span></span>
|
|||
|
<span id="cb5-15"><a href="#cb5-15" tabindex="-1"></a> <span class="fu">rainbow_hcl</span>(<span class="dv">3</span>)[<span class="fu">sort_levels_values</span>(</span>
|
|||
|
<span id="cb5-16"><a href="#cb5-16" tabindex="-1"></a> <span class="fu">as.numeric</span>(iris[,<span class="dv">5</span>])[<span class="fu">order.dendrogram</span>(dend)]</span>
|
|||
|
<span id="cb5-17"><a href="#cb5-17" tabindex="-1"></a> )]</span>
|
|||
|
<span id="cb5-18"><a href="#cb5-18" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-19"><a href="#cb5-19" tabindex="-1"></a><span class="co"># We shall add the flower type to the labels:</span></span>
|
|||
|
<span id="cb5-20"><a href="#cb5-20" tabindex="-1"></a><span class="fu">labels</span>(dend) <span class="ot"><-</span> <span class="fu">paste</span>(<span class="fu">as.character</span>(iris[,<span class="dv">5</span>])[<span class="fu">order.dendrogram</span>(dend)],</span>
|
|||
|
<span id="cb5-21"><a href="#cb5-21" tabindex="-1"></a> <span class="st">"("</span>,<span class="fu">labels</span>(dend),<span class="st">")"</span>, </span>
|
|||
|
<span id="cb5-22"><a href="#cb5-22" tabindex="-1"></a> <span class="at">sep =</span> <span class="st">""</span>)</span>
|
|||
|
<span id="cb5-23"><a href="#cb5-23" tabindex="-1"></a><span class="co"># We hang the dendrogram a bit:</span></span>
|
|||
|
<span id="cb5-24"><a href="#cb5-24" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">hang.dendrogram</span>(dend,<span class="at">hang_height=</span><span class="fl">0.1</span>)</span>
|
|||
|
<span id="cb5-25"><a href="#cb5-25" tabindex="-1"></a><span class="co"># reduce the size of the labels:</span></span>
|
|||
|
<span id="cb5-26"><a href="#cb5-26" tabindex="-1"></a><span class="co"># dend <- assign_values_to_leaves_nodePar(dend, 0.5, "lab.cex")</span></span>
|
|||
|
<span id="cb5-27"><a href="#cb5-27" tabindex="-1"></a>dend <span class="ot"><-</span> <span class="fu">set</span>(dend, <span class="st">"labels_cex"</span>, <span class="fl">0.5</span>)</span>
|
|||
|
<span id="cb5-28"><a href="#cb5-28" tabindex="-1"></a><span class="co"># And plot:</span></span>
|
|||
|
<span id="cb5-29"><a href="#cb5-29" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">3</span>,<span class="dv">3</span>,<span class="dv">7</span>))</span>
|
|||
|
<span id="cb5-30"><a href="#cb5-30" tabindex="-1"></a><span class="fu">plot</span>(dend, </span>
|
|||
|
<span id="cb5-31"><a href="#cb5-31" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"Clustered Iris data set</span></span>
|
|||
|
<span id="cb5-32"><a href="#cb5-32" tabindex="-1"></a><span class="st"> (the labels give the true flower species)"</span>, </span>
|
|||
|
<span id="cb5-33"><a href="#cb5-33" tabindex="-1"></a> <span class="at">horiz =</span> <span class="cn">TRUE</span>, <span class="at">nodePar =</span> <span class="fu">list</span>(<span class="at">cex =</span> .<span class="dv">007</span>))</span>
|
|||
|
<span id="cb5-34"><a href="#cb5-34" tabindex="-1"></a><span class="fu">legend</span>(<span class="st">"topleft"</span>, <span class="at">legend =</span> iris_species, <span class="at">fill =</span> <span class="fu">rainbow_hcl</span>(<span class="dv">3</span>))</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAg0AAALuCAYAAAAtyCW4AAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAINoAMABAAAAAEAAALuAAAAAK1qTRMAAEAASURBVHgB7F0FYFzXsR2TLFmyGGxLtmWSmRkSxw440DTMSdNgMW2KKf02/W2gbf5P2uY3KaRN00ADDaMDThwwM5MsoxgsBtv7zxn5bXbl3dUKVuS5yWp337vvvrvnrffOmzlzppsLTawZAoaAIWAIGAKGgCHQCALdG9lvuw0BQ8AQMAQMAUPAEFAEzGiwL4IhYAgYAoaAIWAIBIWAGQ1BwWSdDAFDwBAwBAwBQ8CMBvsOGAKGgCFgCBgChkBQCJjREBRM1skQMAQMAUPAEDAEzGiw74AhYAgYAoaAIWAIBIWAGQ1BwWSdWhsBZvru379fysrKWntoG88QMAQMAUMgRAiY0RAiYG1Y3wgUFhbKddddJ7GxsTJ48GCJjo6WsWPHyttvv+0+YMmSJdKtWzeZOnWqe1trvdi0aZM8/fTTrTVck8cZNWqUfrZVq1b5PPb666/X/T/5yU987nc2fu1rX9N+v/nNb5xNrfLc2Px8naS0tFTuu+8+X7vadFtHmUebfmg7mSHQxgiY0dDGgJ/KpysqKpIJEyboon306FGZOXOmhIWFydatW+W8886TtWvXhhSeJ598UiZPnizLly8P6XnaYvAePXpIz549pXv39v0nfPjwYRkxYoT84Q9/aIuP7fccHWUefidoOwyBLoJA+/7idBEQ7WMEhwDvRvnjPnLkSNm5c6cu3tnZ2Wo8cIS77747uIGa2SszM1OOHTvWzKM71mEPP/yw1NXVyQ9+8IN2ndiRI0ckLy+vXefAk3eUebQ7EDYBQyDECJjREGKAbfh6BI4fPy5/+tOf9M1vf/tbSU1N1dfx8fHyxz/+Ub7//e/Lbbfd5heu6dOnq5eC4Q2nzZo1S7c5i9bixYtlxowZ0rdvX4mJiZE5c+bIZ599pt3pZfi///s/fc3whGfo47HHHpNx48bpcTz+9ddfd06hYRN6R37961/L5ZdfLklJSTpfdmCo48wzz9RzZWRkyE9/+lNdyJ2DuZDddNNNesywYcP0/E1Vbf/ud7+rn/GZZ55RY4t39Zs3b9b5cF5///vf9XSHDh2SK664QlJSUiQ8PFyGDx8u//u//+tMxedzMPOjkfWzn/1M5s6dq/gQp2984xtSU1OjxsLFF1+sYxcUFOg8GVpi+89//iMXXXSRJCYmyqBBg9STtH37dt3n60+ga+f093edeP39zcM51p4NAUOglRBg7QlrhkCoEdi3bx9rnOijpKQk4Ok++OAD7TdlyhR3PyyEui0nJ8e9LSoqSrcdPHjQVVVV5cKC6UpOTnZhUXPdeOONLrjuXTBOXAiLuJ544gndxznAUHFNmjRJx3n00Ud1jD59+rguvPBCV2RkpL5/6623dD8Wa30fERHhQjjAxX6vvvqqCyROd98zzjjDNXToUO138803u+d3/vnn6zaOyde9evVygauh21auXOnu5/kCfA/d/+Mf/1g3X3rppfqe54cx5AIXxFVdXe366le/qtvvv/9+7Xfrrbfq+6uvvtp155136ufmZ4UB5Dm81+tg5vfAAw/ouKNHj3ZxbOLLce+66y5Xbm6uC8aJvic24Ka4eO127Njh4vWC4abHTJw4UfsQI1+tsWvHYwJdJ3/z8HUu22YIGAItQ0BadrgdbQgEh8CHH36oC0fv3r0bPaA5RgMXeS5m8CC4VqxY4cIdsuvNN990ffLJJ67Kyko95y9/+Uvtc8cdd7jn4CyC77//vm5zxsGdtb53jAYuilu2bHGVl5e7EBZQw4Tn4+LNVltbq8YIuAZqUGzbtk3PxT4IxWifd955x72tqUYDPxfP6xhcDY0GGhTgh7ief/55FwiBOlcaDJ5Glk7ixJ9g5/fuu++64GVxwZOgR/7lL3/Rz/DFL35R34OPou/79evnHp6fF94jNSC4Ed4hNeCIBTwU7n7OCwfzQNeusevkax7O+PZsCBgCrYdAT/xDtmYIhBwBuqnZ6NauqKgQ3H23+Jz4Z+Aeg+EGuubXrFmjHAlmZ5x77rnCLAPcpbv7eb7AguqOx2OxlZdeeskdXtiwYYNnV83wGDNmjHubs3/Pnj0CI0S3MyxCsieMC/2c3Ig7dCUK8vX8+fM1dABPAd82qV1yySVKfGTYxVeD10CeffZZDVGQIMnQzDXXXCPnnHOOr+4Cb4Bub2x+Z511ls4ZHgeBASYwyPQ4Xkd/jSEUeFXklVdekYceekg+/fRTYXiKjceR/OrZGrt2TblOnuPaa0PAEGh9BIzT0PqY2og+EGCM3WH6N8ySYDybnAW42un58nH055uYdeE03N07L5U3wKwIuOaF/AHckcu///1vXajJAfDVmNbJB+dFguaBAweECxRj8eQqeI6fkJDgNQSzF9jIC+BxfCDkocfCm+ImXDr92JeLOfkGzWkNz99wjMcff1wzGGbPnq27li5dqgYTuRi+mkMIbWx+3/72t+W0004TciqmTZum+HI8z+Majg/vhBpL5HjQYCOHhUYEm/Md8DyGPJFA164p18lzXHttCBgCrY+AG
|
|||
|
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a><span class="do">#### BTW, notice that:</span></span>
|
|||
|
<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a><span class="co"># labels(hc_iris) # no labels, because "iris" has no row names</span></span>
|
|||
|
<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a><span class="co"># is.integer(labels(dend)) # this could cause problems...</span></span>
|
|||
|
<span id="cb6-4"><a href="#cb6-4" tabindex="-1"></a><span class="co"># is.character(labels(dend)) # labels are no longer "integer"</span></span></code></pre></div>
|
|||
|
<p>The same can be presented in a circular layout:</p>
|
|||
|
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a><span class="co"># Requires that the circlize package will be installed</span></span>
|
|||
|
<span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mar =</span> <span class="fu">rep</span>(<span class="dv">0</span>,<span class="dv">4</span>))</span>
|
|||
|
<span id="cb7-3"><a href="#cb7-3" tabindex="-1"></a><span class="fu">circlize_dendrogram</span>(dend)</span></code></pre></div>
|
|||
|
<pre><code>## Loading required namespace: circlize</code></pre>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAg0AAAINCAYAAAC9GEyUAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAINoAMABAAAAAEAAAINAAAAAMIe8BwAAEAASURBVHgB7J0HfFXXle63TUd0AUIC0Xvvtmk2GHDBBZc4seM0v7SZZDKTzJuZl5nMTJLpLdPitJn0uPcCxmB67713EJKQEKKJIorf99/ywZejva8ESKiwt3/4Xt176jrnnrX2t771rVs+1DBhBAsECwQLBAsECwQLBAuUYYFby/g+fB0sECwQLBAsECwQLBAsYC0QgoZwIwQLBAsECwQLBAsEC5TLAiFoKJeZwkLBAsECwQLBAsECwQIhaAj3QLBAsECwQLBAsECwQLksEIKGcpkpLBQsECwQLBAsECwQLBCChnAPBAsECwQLBAsECwQLlMsCIWgol5nCQsECwQLBAsECwQLBAiFoCPdAsECwQLBAsECwQLBAuSwQgoZymSksFCwQLBAsECwQLBAsEIKGcA8ECwQLBAsECwQLBAuUywIhaCiXmcJCwQLBAsECwQLBAsECIWgI90CwQLBAsECwQLBAsEC5LFC3XEuFhYIFggVqnQWOnT5vGta71Rw/fcGkNW+Q9PyOFZ03BwrOmLp1bjWdWzcyjRvUSbp8+DJYIFigdlrgltDlsnZe2HBWN6cF9h85Y84WXzQbDp4yGS0amNG9WjoNcUKBwuIdhebM+YuGPrf3DWrjDQSW7zpmLmmZtOb1zfkLl0zW0XOmYf1bzeieV24778Q5s+HASXP63CVz/uIlU08Bxm3dm5cZkDgPMHwYLBAsUC0tEJCGanlZwkEFC1ybBXbmFJnBnZuZVk3qmeNnLng3Uiynzrhw8UPTQGjDrUkSlcO7Njd5J4rNRUUOHVo1NL0ymtj38Y3XvfUWM7F/68sfa0Ji1u0/aZo1qmsa1Q/IxGXDhDfBAjXYAgFpqMEXLxx6zbdAzrFzBme7Zt9x63Dr6P31jINKIczbctRM6J9qTiho6CMH7xvbs08JEfjQdFK6oakcu2+8seqwadG4rtmTd8a0aVrfDOvazLRv2bDU4meEcIAu1K1zfeeQuOFDhWfNRqEXZ85fMpkKWAZ2bGrq100S4SSuHN4HCwQLVLgF/E+KCt9V2GCwwM1lASD6c3J2TRq6f2bMxIH+Gb0zUsyuw6dNr/SU6zJShpx5i5R6ZuaGI2aQHKxvgDCs3nfCIg1bs4vM4yPTzC23lHb2l3SM9RQE3NmnlYKQPDNpQKpZtee4M2hYuK3QnkO7FvXtbts2a2DGeNIjvuNK/PzU2Qv2+CYPaK1jM+JUnDWzNnJezUyH1NJBy3FxNAiCWun8b73O4CvxOML7YIFggY8t4H6affx9eBcsECxwlRY4K57Akh3HDETD8xc+NFOHp9kUgGszxeIInNYM/UzxJc34SzvCaB1SA4UiI+YdLxYqUMdkpjaKvrridbcCD4KFnbmnzVEt7xscW892KZYEKXDAFOs4G9QrHTTcKm/dLa2xeXPlYTNEaY+duUVmaJfmzs0SUJAWIZ2RbBCIbM46ZUBZGik1MiCzqQ104uuAWuQUnjPNhXIcOnrWdE9LsajISQUTiQOEY73SIB/qP4KhfKVS2orYeT0BS+L2w/tggWCBjy0QcL6PbRHeBQtUiAWKzl60eXwcfaqcKGiDazCzv39wG+u8CTSS5f2X7jwmx35JBMeTlmzo2h6ftRVZcZMcMqREHL5vtFaaAefdSIRGEAF4Db7B+UCEbKlz6ZXexOzNO+1clPOJAoaCk8XOZfgQsmRrbWusUIieQlbgPSzaXlhq+WVCYdq3amCeX5xjv5uxPt++No0hN9htaJdmNvgY27uVeXRkO9OjXWMFJidLbTN8ECwQLHB9FghIw/XZL6wdLFDKAqlyyDjCbm0bK+3QxFuVwIprlSJgRn1BXpnAYEK/1FLb4wNKI1mWcsf8JA65ReN6ZhJ8Bs3GD6nKYY8cfFcdR3wQgBxTBYWttJAT35Zzyjw4tK09lsRlCSxAOB4Y2sa8tuKwuVvHBynSNX45P0upmDo2WNmXf8ZMHtjaOvL4soM7Nbv8EQ6fck/QgviAQLlqzwlxGG4xXdo0tlUb8WX4G/Rjzd4TNug6caYEQRnZvUWo2nAZK3wWLHCdFghBw3UaMKweLOCywPi+rczy3cfMQs2gb+vW3Am/sx4z+K5pjczczUdNm2YlXADX9pjB75BjLyy6YIZrVu0blFK+vDzHOllSEO1UdknFQ5w8eFIkya5tG9nlIBu2k+MmeIkP0IpTQhooo3xEaZYXl+bYdEV8Of7+xG3tzFyRMEljgGSQdogP0JffLDxkmisgqCdCIwRLAivXubOd/h2amJPa/4JtR01isJG43VylOcb0bnmZZ/HOmrzEr8P7YIFggQq0QKieqEBjhk3VbAtkKW+utLiTZHe1Z7ZEGggQIIvOXbBOd5LIfK5B6oL9UgLZpmk907RhPW+q4CU5bCoTqHQgEGkuVCE+cMpnxY/Ym3/aBgw4b98AuSDVwHFOFDrhIg9yfHAIWopcSGUHCEXRuRJtB/gL0bik/bI+rzNEwiRQodLBNdgvKEKmyIwIS6H5EE85sN7K3ce13
|
|||
|
<p>These visualizations easily demonstrates how the separation of the
|
|||
|
hierarchical clustering is very good with the “Setosa” species, but
|
|||
|
misses in labeling many “Versicolor” species as “Virginica”.</p>
|
|||
|
<p>The hanging of the tree also helps to locate extreme observations.
|
|||
|
For example, we can see that observation “virginica (107)” is not very
|
|||
|
similar to the Versicolor species, but still, it is among them. Also,
|
|||
|
“Versicolor (71)” is located too much “within” the group of Virginica
|
|||
|
flowers.</p>
|
|||
|
<p>We can also explore the data using a heatmap. The rows are ordered
|
|||
|
based on the order of the hierarchical clustering (using the “complete”
|
|||
|
method). The colored bar indicates the species category each row belongs
|
|||
|
to. The color in the heatmap indicates the length of each measurement
|
|||
|
(from light yellow to dark red).</p>
|
|||
|
<p>In the heatmap we also see how the Setosa species has low petal
|
|||
|
values (in light yellow), but it is very difficult to see any clear
|
|||
|
distinction between the other two species.</p>
|
|||
|
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a>some_col_func <span class="ot"><-</span> <span class="cf">function</span>(n) <span class="fu">rev</span>(colorspace<span class="sc">::</span><span class="fu">heat_hcl</span>(n, <span class="at">c =</span> <span class="fu">c</span>(<span class="dv">80</span>, <span class="dv">30</span>), <span class="at">l =</span> <span class="fu">c</span>(<span class="dv">30</span>, <span class="dv">90</span>), <span class="at">power =</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">/</span><span class="dv">5</span>, <span class="fl">1.5</span>)))</span>
|
|||
|
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb9-3"><a href="#cb9-3" tabindex="-1"></a><span class="co"># scaled_iris2 <- iris2 %>% as.matrix %>% scale</span></span>
|
|||
|
<span id="cb9-4"><a href="#cb9-4" tabindex="-1"></a><span class="co"># library(gplots)</span></span>
|
|||
|
<span id="cb9-5"><a href="#cb9-5" tabindex="-1"></a>gplots<span class="sc">::</span><span class="fu">heatmap.2</span>(<span class="fu">as.matrix</span>(iris2), </span>
|
|||
|
<span id="cb9-6"><a href="#cb9-6" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"Heatmap for the Iris data set"</span>,</span>
|
|||
|
<span id="cb9-7"><a href="#cb9-7" tabindex="-1"></a> <span class="at">srtCol =</span> <span class="dv">20</span>,</span>
|
|||
|
<span id="cb9-8"><a href="#cb9-8" tabindex="-1"></a> <span class="at">dendrogram =</span> <span class="st">"row"</span>,</span>
|
|||
|
<span id="cb9-9"><a href="#cb9-9" tabindex="-1"></a> <span class="at">Rowv =</span> dend,</span>
|
|||
|
<span id="cb9-10"><a href="#cb9-10" tabindex="-1"></a> <span class="at">Colv =</span> <span class="st">"NA"</span>, <span class="co"># this to make sure the columns are not ordered</span></span>
|
|||
|
<span id="cb9-11"><a href="#cb9-11" tabindex="-1"></a> <span class="at">trace=</span><span class="st">"none"</span>, </span>
|
|||
|
<span id="cb9-12"><a href="#cb9-12" tabindex="-1"></a> <span class="at">margins =</span><span class="fu">c</span>(<span class="dv">5</span>,<span class="fl">0.1</span>), </span>
|
|||
|
<span id="cb9-13"><a href="#cb9-13" tabindex="-1"></a> <span class="at">key.xlab =</span> <span class="st">"Cm"</span>,</span>
|
|||
|
<span id="cb9-14"><a href="#cb9-14" tabindex="-1"></a> <span class="at">denscol =</span> <span class="st">"grey"</span>,</span>
|
|||
|
<span id="cb9-15"><a href="#cb9-15" tabindex="-1"></a> <span class="at">density.info =</span> <span class="st">"density"</span>,</span>
|
|||
|
<span id="cb9-16"><a href="#cb9-16" tabindex="-1"></a> <span class="at">RowSideColors =</span> <span class="fu">rev</span>(<span class="fu">labels_colors</span>(dend)), <span class="co"># to add nice colored strips </span></span>
|
|||
|
<span id="cb9-17"><a href="#cb9-17" tabindex="-1"></a> <span class="at">col =</span> some_col_func</span>
|
|||
|
<span id="cb9-18"><a href="#cb9-18" tabindex="-1"></a> )</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqMAAAKjCAYAAAApnwZoAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAKjoAMABAAAAAEAAAKjAAAAANgC0G8AAEAASURBVHgB7J0HYJvF+f+/2pb3yN57hwQSVggzQCGMMAOUtuzZP53sMkoLtJTZ0tD+KIXQUDaUQlsIu5CSsEkgC8heznK8be3/PWdeIdmSLdmSLb36Xivr1b333j33OeF8/dzdc5aQSmDqMoH6+npUVlZi2LBhsNvtHdYXDAbR0NAAt9udUPl4FUodFosF+fn54SJiiwyr5NlstnA+L0iABEiABEiABEgg0whYM82gbLNn27ZtOO6441BcXIzRo0ejvLwcDz74YIfdWLp0qX7mmWee6bBsewUOOOAAHHLIIeEif/zjH1FUVIQzzjgDgUAgnM8LEiABEiABEiABEshEAhSjXRyVuXPn4t1338X111+PF154ARMmTMDll1+O5cuXd7Hm5B9/4okn8KMf/QizZs3C888/D6fTmXwlfIIESIAESIAESIAEupEAxWgXYL/33ntYtGgRvve97+HWW2/FnDlz8Pjjj+P+++9HSUmJrnnx4sU46qij0LdvXxx22GH4+OOPY7YYr9z777+PiRMn4pprrsHAgQPxz3/+M+bzr7zyCs455xzMnDkTL774IvLy8nS5uro6nHvuuejXrx+mTp2Kl19+Wed///vfx7HHHhuu68orr9QeVlk+wEQCJEACJEACJEAC3UWAYrQLpFeuXKmfPvLII8O1jBgxApdddhkGDRqE9evX45hjjoEIwltuuQVVVVU4+uijsWPHjnB5uWivnKwJXbFiBf7xj3/gvPPOw/Tp06OelQ8bN27EqaeeCp/Ph0MPPTRq/egjjzyCZ599FnfddRemTJmiy8nSggMPPBAiYMWD6/V68dBDD2Hy5MmwWvmVaAOYGSRAAiRAAiRAAmkj0PFOm7Q1nf0VG15EWS8aK/3rX/9CbW0t/vCHP2C//fbD4MGDcfzxx2sRKMLPSO2VE1Er6eqrr8aFF15oPBL1vnPnTl2/rBW94447cPrpp2OvvfbSZZ5++mndrtgxdOhQNDU14fXXX8dZZ52Fn/3sZ5D7+++/P2pqaiDeUiYSIAESIAESIAES6E4CdIN1gbasD5UkU+lGEqG377776ul7v9+vs0UkSiosLNTv4omMTImUk2n+eEmm7xcuXIh58+bpIhdccEF485J4QWUj06ZNm7Tn9Nprr8Xw4cNRVlaGk046SYtR8ZyOGTMGshmKiQRIgARIgARIgAS6kwDFaBdoH3TQQXqd5e23346f/OQnePjhh/XmpS1btmjPpKwRlWnvG2+8UW9yuu222/RaTtlgFJkSKddeuChZD1paWoqxY8fqDUwfffQR7rnnHt2ELBOQ5QFnnnkmRo4ciSVLlmDIkCH6nkz7r1q1CrLxiV7RyBHhNQmQAAmQAAmQQHcRoBjtImnxKorge+CBB3DJJZfoTUYLFizQYZtkw9Df/vY3yEYnCb+0bNkyPPnkk9ozGdlsIuUklmgi6aabbtKbpW6++WZ89dVXenpf1rFKG7JJSTY4GWJUNlbJMgCPx0MxmghcliEBEiABEiABEkg5AQuD3qeGqUy9Nzc3axEaq8Zdu3ahV69esW5F5SVaLuqhBD7ImlBZJhAZBH/Pnj06FJV4VN9+++0EamEREiABEiABEiABEkgtAXpGU8RTYnrG28gkTSQiRJMpl6zZEmoqUojKNL6sNZWd/TfccEOy1bE8CZAACZAACZAACaSEAD2jKcGYfZVIOKkPPvhAr20dN25c9nWAFpMACZAACZAACZiCAMWoKYaRnSABEiABEiABEiCB7CTAafrsHDdaTQIkQAIkQAIkQAKmIMCg910YRgkgL4HvCwoKomqR3elGLFEJyeR2u6Pup/KDtC+nNEkb7YV/aq9Ned4I4O9wOMJHicozqai/vbZ5jwRIgARIgARIILcJ0DPahfE/+eSTYwaK/9WvfqU3M8mGpvz8fH39wx/+EPX19V1oLfajS5cu1fU/88wzusAXX3yh45rGLh07d5999gnbK6K2oqICjz76qC7cuv7YNbTkPv744/ro0fbK8B4JkAAJkAAJkAAJRBKgGI2kkeLrv/71rzqgvJyIJHFIr7nmmhS3AH3Ep5w/b5yeJG3997//TbodCe/0j3/8Q4tQOe1Jjh6trq5OuJ6NGzfi7LPPhpz4xEQCJEACJEACJEACiRLIWTH66quv4vDDD0efPn308Z1vvPGGZiYhj+REJDlVSYLDS7D4jz/+WN+T6exf//rXkCDyc+bM0ScbtQf6O9/5jj756N5779Xl/+///k9P39fV1eHcc8+FnJwk9b/88su6muXLl2PixIm4//77sffee+uA9Pfdd5++J0eG/vSnP4XsfB8/frw+aUmm0Ldv344777wTK1euxJ/+9Cd8/vnn2t5TTjlFB7I/9thjwyZK0HsJvm9MyYdvqAvjeNAf/OAHOni/tPfJJ59EFtHXixcvhgTLF8EqnAw2l156qb4v3KSfTCRAAiRAAiRAAiSQCIGcFKMixkTkidiUE5IkIPwtt9yieYm4E8+ihD76/e9/r08xu
|
|||
|
<p>We can get an interactive heatmap by using the <code>heatmaply</code>
|
|||
|
package/function: (code is not evaluated in order to keep the HTML
|
|||
|
size)</p>
|
|||
|
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a>heatmaply<span class="sc">::</span><span class="fu">heatmaply</span>(<span class="fu">as.matrix</span>(iris2),</span>
|
|||
|
<span id="cb10-2"><a href="#cb10-2" tabindex="-1"></a> <span class="at">dendrogram =</span> <span class="st">"row"</span>,</span>
|
|||
|
<span id="cb10-3"><a href="#cb10-3" tabindex="-1"></a> <span class="at">Rowv =</span> dend)</span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="similaritydifference-between-various-clustering-algorithms" class="section level3">
|
|||
|
<h3>Similarity/difference between various clustering algorithms</h3>
|
|||
|
<p>We may ask ourselves how many different results we could get if we
|
|||
|
would use different clustering algorithms (<code>hclust</code> has 8
|
|||
|
different algorithms implemented). For the purpose of this analysis, we
|
|||
|
will create all 8 hclust objects, and chain them together into a single
|
|||
|
<code>dendlist</code> object (which, as the name implies, can hold a
|
|||
|
bunch of dendrograms together for the purpose of further analysis).</p>
|
|||
|
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a>hclust_methods <span class="ot"><-</span> <span class="fu">c</span>(<span class="st">"ward.D"</span>, <span class="st">"single"</span>, <span class="st">"complete"</span>, <span class="st">"average"</span>, <span class="st">"mcquitty"</span>, </span>
|
|||
|
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a> <span class="st">"median"</span>, <span class="st">"centroid"</span>, <span class="st">"ward.D2"</span>)</span>
|
|||
|
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a>iris_dendlist <span class="ot"><-</span> <span class="fu">dendlist</span>()</span>
|
|||
|
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="fu">seq_along</span>(hclust_methods)) {</span>
|
|||
|
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a> hc_iris <span class="ot"><-</span> <span class="fu">hclust</span>(d_iris, <span class="at">method =</span> hclust_methods[i]) </span>
|
|||
|
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a> iris_dendlist <span class="ot"><-</span> <span class="fu">dendlist</span>(iris_dendlist, <span class="fu">as.dendrogram</span>(hc_iris))</span>
|
|||
|
<span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a>}</span>
|
|||
|
<span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="fu">names</span>(iris_dendlist) <span class="ot"><-</span> hclust_methods</span>
|
|||
|
<span id="cb11-9"><a href="#cb11-9" tabindex="-1"></a>iris_dendlist</span></code></pre></div>
|
|||
|
<pre><code>## $ward.D
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 199.6205
|
|||
|
##
|
|||
|
## $single
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 1.640122
|
|||
|
##
|
|||
|
## $complete
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 7.085196
|
|||
|
##
|
|||
|
## $average
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 4.062683
|
|||
|
##
|
|||
|
## $mcquitty
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 4.497283
|
|||
|
##
|
|||
|
## $median
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 2.82744
|
|||
|
##
|
|||
|
## $centroid
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 2.994307
|
|||
|
##
|
|||
|
## $ward.D2
|
|||
|
## 'dendrogram' with 2 branches and 150 members total, at height 32.44761
|
|||
|
##
|
|||
|
## attr(,"class")
|
|||
|
## [1] "dendlist"</code></pre>
|
|||
|
<p>Next, we can look at the cophenetic correlation between each
|
|||
|
clustering result using <code>cor.dendlist</code>. (This can be nicely
|
|||
|
plotted using the <code>corrplot</code> function from the
|
|||
|
<em>corrplot</em> package):</p>
|
|||
|
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a>iris_dendlist_cor <span class="ot"><-</span> <span class="fu">cor.dendlist</span>(iris_dendlist)</span>
|
|||
|
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a>iris_dendlist_cor</span></code></pre></div>
|
|||
|
<pre><code>## ward.D single complete average mcquitty median centroid
|
|||
|
## ward.D 1.0000000 0.9836838 0.5774013 0.9841333 0.9641103 0.9451815 0.9809088
|
|||
|
## single 0.9836838 1.0000000 0.5665529 0.9681156 0.9329029 0.9444723 0.9903934
|
|||
|
## complete 0.5774013 0.5665529 1.0000000 0.6195121 0.6107473 0.6889092 0.5870062
|
|||
|
## average 0.9841333 0.9681156 0.6195121 1.0000000 0.9828015 0.9449422 0.9801444
|
|||
|
## mcquitty 0.9641103 0.9329029 0.6107473 0.9828015 1.0000000 0.9203374 0.9499123
|
|||
|
## median 0.9451815 0.9444723 0.6889092 0.9449422 0.9203374 1.0000000 0.9403569
|
|||
|
## centroid 0.9809088 0.9903934 0.5870062 0.9801444 0.9499123 0.9403569 1.0000000
|
|||
|
## ward.D2 0.9911648 0.9682507 0.6096286 0.9895131 0.9829977 0.9445832 0.9737886
|
|||
|
## ward.D2
|
|||
|
## ward.D 0.9911648
|
|||
|
## single 0.9682507
|
|||
|
## complete 0.6096286
|
|||
|
## average 0.9895131
|
|||
|
## mcquitty 0.9829977
|
|||
|
## median 0.9445832
|
|||
|
## centroid 0.9737886
|
|||
|
## ward.D2 1.0000000</code></pre>
|
|||
|
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a>corrplot<span class="sc">::</span><span class="fu">corrplot</span>(iris_dendlist_cor, <span class="st">"pie"</span>, <span class="st">"lower"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAlgAAAJYCAYAAAC+ZpjcAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAJYoAMABAAAAAEAAAJYAAAAAIxUoKIAAEAASURBVHgB7J0HeBVF18f/u5teCSSQhE7ooSO9Si9iAQs2BLG8dhRRP7vYRUVFbIAiiApiQUVROtKk9957SAikt7u73+yNCUkIyc3N3hb+8zz3zZaZc8785vrew8yZM5IuClhIgARIgARIgARIgARMIyCbJomCSIAESIAESIAESIAErAToYPGLQAIkQAIkQAIkQAImE6CDZTJQiiMBEiABEiABEiABOlj8DpAACZAACZAACZCAyQToYJkMlOJIgARIgARIgARIgA4WvwMkQAIkQAIkQAIkYDIBOlgmA6U4EiABEiABEiABEqCDxe8ACZAACZAACZAACZhMgA6WyUApjgRIgARIgARIgAToYPE7QAIkQAIkQAIkQAImE6CDZTJQiiMBEiABEiABEiABOlj8DpAACZAACZAACZCAyQToYJkMlOJIgARIgARIgARIgA4WvwMkQAIkQAIkQAIkYDIBOlgmA6U4EiABEiABEiABEvAigopNQI+Lg75rN5CcAqluHaBZLCSZfnXFHnX2jgRIgARIwNUE6GC5egQcpF8/fx7ag49C//4HwNcHCA0FzsYD9WMgT/4Acr++DtJMsSRAAiRAAiRAApzKqKDfAe3m26EfPwFl/SooyQnwijsu/sZDHj0S2g03Q9+3v4L2nN0iARIgARIgAdcTkHRRXG8GLTCTgH7wINSW7aAcPwgpLOwS0epdo4E6taG88uIl7/iABEiABEiABEig/AQ4g1V+hu4n4eQpqwNVnHNlGCu1bgUcOep+dtMiEiABEiABEqggBOhgVZCBLNgNPSMDCAkp+KjwdXAQYNRhIQESIAESIAEScAgBBrk7BKsbCM3MvHycVdxZNzCQJpAACZAACZBAxSVAB6uiju3mrVAbNb9s76Sbhl72HV+QAAmQAAmQAAmUjwCD3MvHzy1b65oGZGWVbJuiQPIR6RtYSIAESIAESIAETCdAB8t0pBRIAiRAAiRAAiRwpRNgkPsV+A3Q16yFtnTZFdhzdpkESIAESIAEnEOADpZzOLuVFm3B39B/mudWNtEYEiABEiABEqhIBLhEWJFGk30hARIgARIgARJwCwLcRegWw2CuEbqqlp7nyssLkp+fuYopjQRIgARIgARIwEqAS4QV8IugL1oMNTj80k9oRO6zkHBoo++vgD1nl0iABEiABEjAPQjQwXKPcTDVCqlPbyip56wfefyLkIZdD2XNcijpF6CsXgapdy9I119rqk4KIwESIAESIAESuEiAMVgXWVTIK0u9xlBWLoEUHZ3fP+suwhdegbLoz/xnvCABEiABEiABEjCPAGewzGPpnpKio6CvXlvINu3Pv4CYeoWe8YYESIAESIAESMA8ApzBMo+lW0rS/vob2i13APVjIDWLhb5qDWCxQPnrd0gNG7ilzTSKBEiABEiABDydAB0sTx/BUuzX9+2HLkmASCyqHz0GqWkTSAP7Q6pcuZSWfE0CJEACJEACJGAvATpY9pLzkHaWBrGQP54IuX8/D7GYZpIACZAACZCA5xNgDJbnj2GJPZC6dYG+bAX00g5/LlEKX5IACZAACZAACZSFAB2sstDyxLoZGdDfeQ9qQCVYqtaApVpN60e99wFP7A1tJgESIAESIAGPIMAlQo8YJvuN1PfuKz6re6VQSHXq2C+YLUmABEiABEiABC5LgA7WZdFUnBe62DWIlBRA1wFNy3W4zsZDatum4nSSPSEBEiABEiABNyLAJUI3GgxHmKJ9NxuqcURO5SioVaKhRtSAWqchtO/nOEIdZZIACZAACZAACQgCPOy5gn8NtDffgfzuW4CPD/Q9eyHfOxraQ49BHnZDBe85u0cCJEACJEACriPAGSzXsXeO5rizkO4eCalTR+jrN1qTi8oT3oQ28SPn6KcWEiABEiABErgCCdDBquiDXqsmsHUb0KghsG079MxMQFGAxPMVvefsHwmQAAmQAAm4jACXCF2G3jmK5aefhNqzL5QDu6wZ3NVuvYCEc5Affcg5BlALCZAACZAACVyBBLiL8AoYdP3gQaBWLesZhPqUL8V1TUhDBkMyZrJYSIAESIAESIAETCdAB8t0pO4lUL31TiCmHuTrrwVEWgbJOJeQhQRIgARIgARIwKEEGIPlULyuFy4//iigqlBH3Qu1Vn2oD4+Btmgx9Jwc1xtHC0iABEiABEigghLgDFYFHdjiuqUfPgzt1Tehfz0T0i03Qfl2RnHV+IwESIAESIAESKCcBBjkXk6A7t7cOCpHX7gI+pJl0FesBGpUhyRmteSbhrm76bSPBEiABEiABDyWAB0sjx062wxXb7kd2L0H0qi7oGxaC8kIdmchARIgARIgARJwKAEuEToUr+uF60eOQP9bzGAZn9XCwWraBFLvqyFdew2k2KauN5AWkAAJkAAJkEAFJEAHqwIO6uW6pJ86Be3td6F/NgXSdUOgzPn2clX5nARIgARIgARIoBwE6GCVA54nNNVXrYb223zofy0Ejh23Jhu1zl4N6AcpJMQTu
|
|||
|
<p>From the above figure, we can easily see that most clustering methods
|
|||
|
yield very similar results, except for the complete method (the default
|
|||
|
method in <code>hclust</code>), which yields a correlation measure of
|
|||
|
around 0.6.</p>
|
|||
|
<p>The default cophenetic correlation uses pearson’s measure, but what
|
|||
|
if we use the spearman’s correlation coefficient?</p>
|
|||
|
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a>iris_dendlist_cor_spearman <span class="ot"><-</span> <span class="fu">cor.dendlist</span>(iris_dendlist, <span class="at">method_coef =</span> <span class="st">"spearman"</span>)</span>
|
|||
|
<span id="cb16-2"><a href="#cb16-2" tabindex="-1"></a>corrplot<span class="sc">::</span><span class="fu">corrplot</span>(iris_dendlist_cor_spearman, <span class="st">"pie"</span>, <span class="st">"lower"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAlgAAAJYCAYAAAC+ZpjcAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAJYoAMABAAAAAEAAAJYAAAAAIxUoKIAAEAASURBVHgB7J0HfBTFF8d/u3fpDUhCCb2X0DsIgnRUQMCCFRA7Fmx/FcWCigiiKKAoKGAHFUVFkS5deq/Sa0hISG+3u//ZiwkJhOTustfCbz6f8/Z2Z957853De5l580bSRAELCZAACZAACZAACZCAYQRkwyRREAmQAAmQAAmQAAmQgJUAHSx+EUiABEiABEiABEjAYAJ0sAwGSnEkQAIkQAIkQAIkQAeL3wESIAESIAESIAESMJgAHSyDgVIcCZAACZAACZAACdDB4neABEiABEiABEiABAwmQAfLYKAURwIkQAIkQAIkQAJ0sPgdIAESIAESIAESIAGDCdDBMhgoxZEACZAACZAACZAAHSx+B0iABEiABEiABEjAYAJ0sAwGSnEkQAIkQAIkQAIkQAeL3wESIAESIAESIAESMJgAHSyDgVIcCZAACZAACZAACdDB4neABEiABEiABEiABAwmQAfLYKAURwIkQAIkQAIkQAJmIijdBLSYGGh79wFJyZBq1gAaR0OS6VeX7lFn70iABEiABNxNgA6Wu0fASfq1hASojz0J7fsfAD9fICwMOB8L1KkNedpkyL16OkkzxZIACZAACZAACXAqo5R+B9Tb74Z28hRMm9bClBQHc8xJ8R4LecQwqANvh3bwUCntObtFAiRAAiRAAu4nIGmiuN8MWmAkAe3wYSjN2sB08jCksmWvEK0MHQHUqA7TG69e8Yw3SIAESIAESIAESk6AM1glZ+h5Ek6fsTpQhTlXurFSi+bAseOeZzctIgESIAESIIFSQoAOVikZyPzd0NLTgdDQ/LcKXocEA3odFhIgARIgARIgAacQYJC7U7B6gNCMjKvHWcWc9wADaQIJkAAJkAAJlF4CdLBK69hu2wGlfpOr9k66bdBVn/EBCZAACZAACZBAyQgwyL1k/DyytaaqQGZm0baZTJB8RfoGFhIgARIgARIgAcMJ0MEyHCkFkgAJkAAJkAAJXOsEGOR+DX4DtPUboK5YeQ32nF0mARIgARIgAdcQoIPlGs4epUVdtBja/AUeZRONIQESIAESIIHSRIBLhKVpNNkXEiABEiABEiABjyDAXYQeMQzGGqEpSvF5rsxmSP7+xiqmNBIgARIgARIgASsBLhGWwi+CtnQZlJCIK19hkTn3QiOgjni4FPacXSIBEiABEiABzyBAB8szxsFQK6Qe3WFKuWB9yWNfhTT4FpjW/w1T2kWY1q2E1L0bpFv6G6qTwkiABEiABEiABC4RYAzWJRal8spSqwFMa5ZDiorK6591F+GYN2Ba+mfePV6QAAmQAAmQAAkYR4AzWMax9ExJUZWgrdtQwDb1z7+A2rUK3OMHEiABEiABEiAB4whwBss4lh4pSf1rMdQ77gHq1IbUOBra2vWAxQLTX79DqlfXI22mUSRAAiRAAiTg7QToYHn7CBZjv3bwEDRJAkRiUe34CUiNGkLq2xtSuXLFtORjEiABEiABEiABRwnQwXKUnJe0s9SNhjz1A8i9e3mJxTSTBEiABEiABLyfAGOwvH8Mi+yB1Pk6aCtXQSvu8OcipfAhCZAACZAACZCAPQToYNlDyxvrpqdDmzAJSmAZWMpXgaVCVetLefBRb+wNbSYBEiABEiABryDAJUKvGCbHjdQOHCw8q3uZMEg1ajgumC1JgARIgARIgASuSoAO1lXRlJ4Hmtg1iORkQNMAVc1xuM7HQmrVsvR0kj0hARIgARIgAQ8iwCVCDxoMZ5iifjcXin5ETrlKUMKjoERWgVKjHtTv5zlDHWWSAAmQAAmQAAkIAjzsuZR/DdR3JkB+bzzg6wtt/wHID46AOvIpyIMHlvKes3skQAIkQAIk4D4CnMFyH3vXaI45D+n+YZA6tIe2aYs1uag88R2oH3zkGv3UQgIkQAIkQALXIAE6WKV90KtVBXbsBOrXA3bugpaRAZhMQHxCae85+0cCJEACJEACbiPAJUK3oXeNYvmF56B07QnTv3utGdyVzt2AuAuQnxzpGgOohQRIgARIgASuQQLcRXgNDLp2+DBQrZr1DEJtxhfiuiqkfjdB0meyWEiABEiABEiABAwnQAfLcKSeJVC5816gdi3It/QHRFoGST+XkIUESIAESIAESMCpBBiD5VS87hcuP/0koChQhj8IpVodKI+Pgrp0GbTsbPcbRwtIgARIgARIoJQS4AxWKR3YwrqlHT0K9c13oM35CtIdt8H07ZeFVeM9EiABEiABEiCBEhJgkHsJAXp6c/2oHG3JUmjLV0JbtQaoUhmSmNWSbxvs6abTPhIgARIgARLwWgJ0sLx26GwzXLnjbmDffkjDh8K0dQMkPdidhQRIgARIgARIwKkEuEToVLzuF64dOwZtsZjB0l/rhIPVqCGk7jdA6n8zpOhG7jeQFpAACZAACZBAKSRAB6sUDurVuqSdOQP13fegTZ8BaUA/mOZ9e7WqvE8CJEACJEACJFACAnSwSgDPG5pqa9dB/W0htL+WACdOWpONWmev+vSCFBrqD
|
|||
|
<p>We can see that the correlations are not so strong, indicating a
|
|||
|
behavior that is dependent on some items which are very distant from one
|
|||
|
another having an influence on the pearson’s correlation more than that
|
|||
|
of the spearman’s correlation.</p>
|
|||
|
<p>To further explore the similarity and difference between the
|
|||
|
alternative clustering algorithms, we can turn to using the
|
|||
|
<code>tanglegram</code> function (which works for either two
|
|||
|
<code>dendrogram</code>s, or a <code>dendlist</code>).</p>
|
|||
|
<p>First, let us see two methods which are very similar: ward.D vs
|
|||
|
ward.D2. From a first glance, we can see how they both give the same
|
|||
|
result for the top 3 clusters. However, since they are both ladderizes
|
|||
|
(i.e.: having their smaller branch rotated to be higher for each node),
|
|||
|
we can see that their clustering is not identical (due to the
|
|||
|
crossings).</p>
|
|||
|
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a><span class="co"># The `which` parameter allows us to pick the elements in the list to compare</span></span>
|
|||
|
<span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a>iris_dendlist <span class="sc">%>%</span> <span class="fu">dendlist</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">8</span>)) <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb17-3"><a href="#cb17-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k=</span><span class="dv">3</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb17-4"><a href="#cb17-4" tabindex="-1"></a> <span class="co"># untangle(method = "step1side", k_seq = 3:20) %>%</span></span>
|
|||
|
<span id="cb17-5"><a href="#cb17-5" tabindex="-1"></a> <span class="co"># set("clear_branches") %>% #otherwise the single lines are not black, since they retain the previous color from the branches_k_color.</span></span>
|
|||
|
<span id="cb17-6"><a href="#cb17-6" tabindex="-1"></a> <span class="fu">tanglegram</span>(<span class="at">faster =</span> <span class="cn">TRUE</span>) <span class="co"># (common_subtrees_color_branches = TRUE)</span></span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAF3CAYAAAA2H0uDAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAF3AAAAADwmOncAAEAASURBVHgB7J0HnBTF8seLdHDknHOWjCCKomBCRYwYwQDmnMX0fOpTMT8xPHPOggkDQREUAxJFcg6C5JzhgPv3t/3PuduzaXYXuOOq+MztTE9PT89vhq6u6goFsg2JkiKgCCgCioAikE8RKJhPn1sfWxFQBBQBRUARsAgoI9QPQRFQBBQBRSBfI6CMMF+/fn14RUARUAQUAWWE+g0oAoqAIqAI5GsElBHm69evD68IKAKKgCKgjFC/AUVAEVAEFIF8jYAywnz9+vXhFQFFQBFQBJQR6jegCCgCioAikK8RKJyvnz5NDz9p0iTp06ePbW3MmDGSkZHha3nx4sVy6qmnhpUXK1ZMqlWrJi1atJDbbrtNSpcuHXZeDxQBRUARSAcCe2uM2rhxo/Tv319GjBgh69evl8aNG8tNN90khx9+eDq6vc/aKKCRZVLH+pdffpFOnTrZhrZt2yYwOJfmzp0rjRo1cotzjqtXry6ff/65dOjQIadMdxQBRUARSAcCe2OM2rFjhxx55JEybty4sC4WKFDAjmWnnXZaWHluPlDV6H54O6+//rpMnDhRfv31V3n22WelatWqsnTpUrn88stl9+7d+6FHektFQBFQBP5BIJExauDAgZYJFi5cWL7++mthsn/88ccLUTvvueeefxrLA3t5VjU6bNgwYWvZsmWYWvLjjz+WzMxMefjhhy38q1atkkceecTuP/TQQ1K8eHHLhHjRCxculLJly0rTpk2lV69eUr9+fVtv/vz58vzzz0vFihWle/fu9noktr59+0qVKlUE9edbb70ly5cvl86dO0vbtm0DveqGDRvmXNOxY0dp3bq1bWfy5MnCx3XeeecFak8rKwKKQO5DgPHh8ccfl4IFC8qjjz4qMIzt27fL3XffbTt71VVXWVUiB08++aSdDJ999tnCmMBEObePUUiEjH9169aVk08+2T4TS0TfffedMIbmKUI1mhfp+++/J1h4tpGmsvfs2WMf4corr7RllM+bN8+WvfHGG7asVatW9tgwv+ySJUvm1KMuW6VKlbI5B/344485ZYYB2n3DQLM3bdqU/emnn2abDzrsevMh5Bwb1ahtw/0zZ86cnDq071LNmjXt+bvuuss9pceKgCKQBxEw2h07rjC+jBo1yj7B8OHDc8YBwxxt2bp163LGlJkzZ9pxKK+OUTfeeKN9vjZt2uSpN5ZnVaNHHXWUlC9f3kplLARDhjnaX/6MHDnS7n/zzTf298wzz7S/77zzjhQtWtSqITds2CBTpkyxUiKSo/lIbR3vD2VIgFzz9NNP2+tYCN61a5edCU2dOtVKpcz4UqVatWrZJgwDT7UpvV4RUARyAQKMC56B3JAhQ2yPIo1R3377rR1TmjVrJk2aNLHjTV4co1jqeeWVV+xzXnPNNbngDQToQp5i205nL774Yjv7MGpQO4syj51drlw5W2ZUndk7d+7MNpaY9tioHcOuZra2YMGC7Pfeey/bk/qMesLW8SRC2jPWUDnXTZs2zbZFufmwc8rNy88pT1YiNCoG28ZJJ52U067uKAKKQN5GwKyd2f/XnoRkjOHssVmSyS5RooQdoy666CJbdu+994Y9bF4ao3766afsUqVK2ec47rjjcrR0YQ+Uiw9SF2UCMN10Vz3jjDNsk8y2vJkWMxGjVrAS4c8//yyY97Imx1oitGXLFrn55pttWb169eTCCy+UlStX2nPmPdnf0D+4Nnj0559/ertyxBFH5Ox7a4s5BUnsLFmyxF7VoEGDJK7WSxQBRSA3ImCYghgGIWitpk+fLhMmTJDmzZvLKaecYsei3377TTxp0dNa5bUxyjBBOfHEE8UsHVlN2aBBgwTL0bxEeZoRdu3a1ao1R48eLZ988onFnRfSpUsXu/CMOhPyPjD2zznnHOv3YiRHef/99y0TbN++PaekUKFC9jf0j5m15RxWqFAhZ3/RokU5+0ZSzNlPZgcm6DFZ1CNKioAicGAggIrTaHnsw9x5553WKvzYY48VGCT0xBNPCEswTMqN1GjL8tIYhetEt27dLFPnuViKwiAxr1GeZoRYh8L4cDlgVoUkeOihh1oTXl7EV199Zd+HxwhZ28OiCbrvvvukZ8+e1sLUqEhtGeddKlKkSE4RzqKes/zQoUNteVZWlnzxxRc5dbwdLEuRUj0G55WH/mJ1xSwRSyvaqV27thg1SWgV3VcEFIE8joCnufLGI5igxwi9srw4RuFA36NHD9m8ebO1vGd9ELsLXMHY8hTlYrVtQl179913rV7agJ5tZib2mtC1PKwxPatSTho/F1vfqCeyH3jggeyDDz4453rPYjN0jdAwqLB+3HLLLba+kR6zjWtFNhajhiHntOGtERqXCFv21FNP2etDrUaN2iDbLKTnXEPfac8w1LB76YEioAjkfQQMc8g2E2j7/x2Lc44hxiD+77MZh/ecB80rY9SLL76Y03/vOUJ/jWCR80y5fSdPS4QGdOu/4klt3iwL9SJ+fxCzsVB99YMPPihYnCKJ3
|
|||
|
<p>Next, let us look at two methods which also have a high cophenetic
|
|||
|
correlation: ward.D vs the average:</p>
|
|||
|
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a><span class="co"># The `which` parameter allows us to pick the elements in the list to compare</span></span>
|
|||
|
<span id="cb18-2"><a href="#cb18-2" tabindex="-1"></a>iris_dendlist <span class="sc">%>%</span> <span class="fu">dendlist</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb18-3"><a href="#cb18-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k=</span><span class="dv">2</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb18-4"><a href="#cb18-4" tabindex="-1"></a> <span class="co"># untangle(method = "step1side", k_seq = 3:20) %>%</span></span>
|
|||
|
<span id="cb18-5"><a href="#cb18-5" tabindex="-1"></a> <span class="fu">tanglegram</span>(<span class="at">faster =</span> <span class="cn">TRUE</span>) <span class="co"># (common_subtrees_color_branches = TRUE)</span></span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAF3CAYAAAA2H0uDAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAF3AAAAADwmOncAAEAASURBVHgB7J0HnBRF1sAfOeeM5CCSJYuioogg5iwmzFnPrGdOZz7Dd4o5Y86YRURUFFDJIEjOOYcl71f/8mqup7pndnpmdtmdqcevd7qrq6urXjX16uViuQrEgcOAw4DDgMOAw0CWYqB4lo7bDdthwGHAYcBhwGFAY8ARQvchOAw4DDgMOAxkNQYcIczq6XeDdxhwGHAYcBhwhNB9Aw4DDgMOAw4DWY0BRwizevrd4B0GHAYcBhwGHCF034DDgMOAw4DDQFZjwBHCrJ5+N3iHAYcBhwGHAUcI3TfgMOAw4DDgMJDVGCiZ1aNP0+AnTJgg5557rm5tzJgxUrp0aV/LCxculGOOOSaqvGzZslKvXj1p166dXH/99VK5cuWo++7CYcBhwGHAYSD/MVDMRZZJHcmjRo2SXr166YZycnIEAmfDrFmzpGXLlnZx5Lp+/fry8ccfS/fu3SNl7sRhwGHAYcBhIP8x4ESj+Y9j3xteeuklGTdunPzyyy/yf//3f1K3bl1ZsmSJXHjhhbJr1y5ffVfgMOAw4DDgMJB/GCiyotFvvvlGONq3bx8llnz33XelXLly8q9//UtjbeXKlfLAAw/o8/vuu0/Kly+viRDEaN68eVK1alXZZ5995IwzzpBmzZrpenPmzJGnnnpKatasKUcddZR+Ho7txhtvlDp16gjiz1dffVWWLVsmBx98sHTq1CnUDLVo0SLyTM+ePaVjx466nUmTJsn7778vp512Wqj2XGWHAYeBwo0BNr6x1pw1a9YIaxNw5ZVXStOmTSODeeKJJ2TBggXSp08fOfLII3X5W2+9pde+zZs3y7777itXXHGFXsfMQ3feeads3LhRLrvsMnn66aeFNfDiiy+WAw88MM+1z7Txyiuv6HewXg4cOFBLuZBYocY577zzTDXJqy+RioX9BNFoUYThw4cTLDxXcVO5u3fv1kNQk63LKJ89e7Yue/nll3VZhw4d9LUifrkVK1aM1KMuR61atXK5B4wcOTJSpgigPlcfRK76uHI//PDD3JIlS0Y936RJk8i1Eo3qNuw/M2fOjNShfRsaNGig7//zn/+0b7lrhwGHgSKMgUTWHNYn1qFbbrklMtKlS5fmlihRQpf/+uuvuvzMM8+MrCNm7WrcuHGu2rxHnqtdu7auozbYkbqKQdDrW15rH42cc845ked4R/HixXP3228/XXbsscdG3pNIXyKVC/lJkRWNHnTQQVK9enXNlWGsAijiqH/5M2LECH3+xRdf6N8TTjhB/77++utSpkwZLYZcv369TJ48WXOJ7Jq+++47Xcf8oQwOkGcef/xx/dzVV18tO3fu1BzclClT9K5JfSjmkaR/GzZsqJ9VBDzpNtyDDgMOA4UPA4msOXBswBtvvCFqY6/P33zzTa0qQeqlCJF89tlnMmTIEG2MB3c2f/58LbHi96677tLPeP/MmDFDnnnmGbn77rvl6KOP1utYXmsf9g5Iu4DbbrtNq2weffRRGT16tC4zf8L2xTxXaH8LOaGO271BgwbpXYoSg+rdjkJybrVq1XSZEnXmbt++PVdZYuprJXaMakvp4nLnzp2bqz6sXMP1qQnXdQxHSHvff/995LmpU6fqtij/6quvIuXPP/98pDxZjlCJWHUbRxxxRKRdd+Iw4DCQORiIt+aoTXluhQoV9BqAtAtQYkh9rewI9LXh1FgjaIvjxx9/1HWUpXpEMmY4wksvvVQ/Z/+J1w9FUHV7SmWUqzb8kUdbt26tyw1HmGhfIg0U8pPUWZk9SOKPP/54/XZFlCLcIHJxxf5rjvDnn3+WDRs2CDo5dlUAcvVrrrlGlyGLP+uss2TFihX6npor/ev9g0zcALJ6AwcccIA5jegWIwVJnCxatEg/1bx58ySedo84DDgMFFYMJLLm4DplbAPgINEpInHC3oE1CjDSItY7JTLVB5IxQG36I+uYLlB/vGsXZYn0AzcvoGvXrrp9faH+eNc7ysL2xbRTWH+LrLEMCD388MO1WFPJz6VSpUoax/3795eJEyfK559/rsWZFBqxKOennHKKfPnll9K5c2etoKYNxAaw/nxcNqhdWqSoRo0akXPEEeZDU5xipDyZE4igIbJt2rRJpgn3jMOAw0AhxUCiaw7iUQxqPvnkk4jxy0knnRQ5N2sRrloXXHCBb7S225apbyom0g8M9wAM92AMihUrpq8hzF4wbSfaF++zhfG8SHOE7JYgfIrVF3ZJcII9evSQvn37alwjxwYMIUS3N2zYMF2GZdXpp5+ud1xKRKrLuG9DqVKlIkV77713xFn+66+/1uU7duzQH26k0n9PsCxFZ2kInH2f623btsm0adO01SvtNGrUSM4+++ygqq7MYcBhoAhiIMya061bN71Bx3Zh8ODBerQXXXRRZNTGOh2rdmWoIko1JMrIT5B8we1VqVIlUpcT79qVaD9gDACkZOgj0Vd++umnMn78eF1u/oTti3mu0P4WctFtnt1TymUtu1YIzh0wYICu79XlYY1prEq5q
|
|||
|
<p>We see that when it comes to the major clusters, the two algorithms
|
|||
|
perform quite similarly.</p>
|
|||
|
<p>However, how are they doing inside each of the clusters? It is quite
|
|||
|
difficult to compare the two because of the high value in ward.D. For
|
|||
|
comparison purposes, we can “rank” the heights of the branches in the
|
|||
|
two dendrograms (while still preserving their internal order). Next, we
|
|||
|
can highlight the shared common sub-trees (with different colors), and
|
|||
|
the distinct edges (with a dashed line):</p>
|
|||
|
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" tabindex="-1"></a><span class="co"># The `which` parameter allows us to pick the elements in the list to compare</span></span>
|
|||
|
<span id="cb19-2"><a href="#cb19-2" tabindex="-1"></a>iris_dendlist <span class="sc">%>%</span> <span class="fu">dendlist</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">1</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb19-3"><a href="#cb19-3" tabindex="-1"></a> <span class="co"># untangle(method = "step1side", k_seq = 3:20) %>%</span></span>
|
|||
|
<span id="cb19-4"><a href="#cb19-4" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"rank_branches"</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb19-5"><a href="#cb19-5" tabindex="-1"></a> <span class="fu">tanglegram</span>(<span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAF3CAYAAAA2H0uDAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAF3AAAAADwmOncAAEAASURBVHgB7J0HgB5F+f+f673kLr3epdATSIBQAwFEOggKKKIgiBUVEEH9iWBFxPZX7EhRQAFFpEoNLUKogYRAGuk9d7ne33v/z2fem/f23Xffdu9dcpfsk8zt7uzs7Oy8u/Odp05GUEl88nvA7wG/B/we8HtgD+2BzD30uf3H9nvA7wG/B/we8HvA9IAPhP6L4PeA3wN+D/g9sEf3gA+Ee/TP7z+83wN+D/g94PeAD4T+O+D3gN8Dfg/4PbBH94APhHv0z+8/vN8Dfg/4PeD3gA+E/jvg94DfA34P+D2wR/eAD4R79M/vP7zfA34P+D3g94APhP474PeA3wN+D/g9sEf3QPYe/fT99PALFy6Uz3zmM6a2BQsWSG5ublTN69atkzPPPDMiPz8/X8aMGSMHHHCAXH311VJaWhpx3j/we8DvAb8H/B4Y+B7I8CPLpN/J8+fPl6OPPtpU1NraKgCcm1asWCHTpk1zZ4ePx44dK//+979l9uzZ4Tx/x+8Bvwf8HvB7YOB7wBeNDnwfR93hL3/5i7z55pvyv//9T37961/L6NGjZePGjXLZZZdJIBCIKu9n+D3g94DfA34PDFwPDFnR6BNPPCGk6dOnR4gl7733XikoKJAf/ehHpte2bdsmN954o9n/4Q9/KIWFhQaEAKPVq1dLeXm57LPPPvLJT35SJk+ebMp98MEHcsstt8jw4cPl9NNPN9fDsV1zzTUyatQoQfx5xx13yObNm+XYY4+VmTNnpvQLTZ06NXzNEUccIQceeKCp55133pH7779fPv7xj6dUn1/Y7wG/BwZ3DzDxjTXm1NbWCmMT9JWvfEWqq6vDD/OrX/1K1q5dKyeccIKcdtppJv+ee+4xY19zc7McdNBBcvnll5txzF50/fXXS2Njo3zpS1+S3/72t8IY+PnPf17mzJmTcOyzddx+++3mHoyXn/jEJ4yUC4kVapxLLrnEFpNEbQkXHOw7iEaHIj3zzDMECw8qNxXs7u42j6A/tskjf+XKlSbvtttuM3kzZswwxwp+weLi4nA5ypJGjBgR5Bz0/PPPh/MUAM2+vhBBfbmC//rXv4LZ2dkR11dVVYWPVTRq6nD/Wb58ebgM9btp/Pjx5vy3vvUt9yn/2O8BvweGcA8kM+YwPjEOffvb3w4/6aZNm4JZWVkm/+WXXzb5F154YXgcsWPXpEmTgjp5D183cuRIU0Yn2OGyyiCY8S3R2EclF198cfg67pGZmRk8/PDDTd5ZZ50Vvk8ybQkXHuQ7Q1Y0eswxx0hFRYXhyjBWgRQczZY/8+bNM/uPPvqo2Z5zzjlm+9e//lXy8vKMGLK+vl4WLVpkuERmTU8//bQpY/+QBwfINb/85S/NdVdccYV0dXUZDm7x4sVm1qQvir2kz9sJEyaYaxXA+1yHf6HfA34PDL4eSGbMgWOD/va3v4lO7M3+3XffbVQlSL0UiOThhx+Wu+66yxjjwZ2tWbPGSKzY3nDDDeYa55+lS5fK73//e/ne974nZ5xxhhnHEo192Dsg7YK+853vGJXNz372M3nllVdMnv2TalvsdYN2O8iBOm7zLrroIjNLUTGome1oJweHDRtm8lTUGezo6AiqJaY5VrFjRF2qiwuuWrUqqC9W0HJ9+oObMpYjpL5nn302fN27775r6iL/8ccfD+f/6U9/Cuf3lSNUEaup45RTTgnX6+/4PeD3wO7TA/HGHJ2UB4uKiswYgLQLUjGkOVY7AnNsOTXGCOoivfDCC6aMWqqHJWOWI/ziF79ornP/idcOBVRTn6qMgjrhD1+67777mnzLESbblnAFg3wnfVZmF0L82Wefbe6uoBTmBpGLK/tvOMKXXnpJGhoaBJ0csyoIufqVV15p8pDFf+pTn5KtW7eac/pbma3zDzJxS8jqLR111FF2N6xbDGf0YWf9+vXmqilTpvThav8Svwf8HhisPZDMmIPrlLUNgINEp4jECXsHxijISosY71RkahKSMUgn/eFxzGToH+fYRV4y7cDNCzrkkENM/eZA/zjHO/JSbYutZ7Buh6yxDB364Q9/2Ig1VX4uJSUlpo9PPvlkefvtt+WRRx4x4kwyrViU/fPOO08ee+wxmTVrllFQUwdiA1h/Xi436SwtnFVZWRneRxxhXzTlFMP5fdkBBC3I7rfffn2pwr/G7wG/BwZpDyQ75iAexaDmwQcfDBu/fOxjHwvv27EIV63PfvazUU/rdtuy5W3BZNqB4R6E4R6MQUZGhjkGmJ1k6062Lc5rB+P+kOYImS0BfMrqC7MkOMHDDjtMTjzxRNPXyLEhC4To9p566imTh2XVBRdcYGZcKiI1eZx3U05OTjhrr732CjvL//e//zX5nZ2d5sUNF+rZwbIUnaUFOPd5jtvb22XJkiXG6pV6Jk6cKJ/+9Ke9ivp5fg/4PTAEeyCVMefQQw81E3RsF373u9+Zp/3c5z4XfmprnY5VuxqqiKqGRI38BMkX3F5ZWVm4LDvOsSvZdsAYQEjJ0Eeir/zPf/4jb
|
|||
|
<p>We have 39 sub-trees that are identical between the two
|
|||
|
dendrograms:</p>
|
|||
|
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a><span class="fu">length</span>(<span class="fu">unique</span>(<span class="fu">common_subtrees_clusters</span>(iris_dendlist[[<span class="dv">1</span>]], iris_dendlist[[<span class="dv">4</span>]]))[<span class="sc">-</span><span class="dv">1</span>])</span></code></pre></div>
|
|||
|
<pre><code>## [1] 39</code></pre>
|
|||
|
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" tabindex="-1"></a><span class="co"># -1 at the end is because we are ignoring the "0" subtree, which indicates leaves that are singletons.</span></span></code></pre></div>
|
|||
|
<p>What we can learn from this is that actually the two algorithms seem
|
|||
|
to give quite different results in the high resolution (higher cuts).
|
|||
|
However, since both capture the two major clusters (Setosa vs the
|
|||
|
others), they are considered quite similar by the cophenetic
|
|||
|
correlation.</p>
|
|||
|
<p>But what about the “complete” method (that got a lower cophenetic
|
|||
|
correlation than the other methods)? When we compare “complete” vs
|
|||
|
“average”, we can quickly see that in the “complete” method, the
|
|||
|
splitting of the clusters is much more balanced, and mixes the “Setosa”
|
|||
|
species with another one. This is probably the cause for the big
|
|||
|
difference found in the cophenetic correlation between the “complete
|
|||
|
method” and the other clustering methods:</p>
|
|||
|
<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" tabindex="-1"></a>iris_dendlist <span class="sc">%>%</span> <span class="fu">dendlist</span>(<span class="at">which =</span> <span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">4</span>)) <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb23-2"><a href="#cb23-2" tabindex="-1"></a> <span class="fu">untangle</span>(<span class="at">method =</span> <span class="st">"step1side"</span>, <span class="at">k_seq =</span> <span class="dv">2</span><span class="sc">:</span><span class="dv">6</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb23-3"><a href="#cb23-3" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k=</span><span class="dv">2</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb23-4"><a href="#cb23-4" tabindex="-1"></a> <span class="fu">tanglegram</span>(<span class="at">faster =</span> <span class="cn">TRUE</span>) <span class="co"># (common_subtrees_color_branches = TRUE)</span></span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAF3CAYAAAA2H0uDAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAF3AAAAADwmOncAAEAASURBVHgB7J0F3BTF+8Afuru7pUsaRVEkBBUVfyooYrfYLYr1Fws7sQVsEQVEECVUEBAERbq7u8H3P9/BOfb29u69u703bx4+y+3OzszOPLvvPPN0jhQFYsFiwGLAYsBiwGIgSTGQM0nnbadtMWAxYDFgMWAxoDFgCaH9ECwGLAYsBiwGkhoDlhAm9eu3k7cYsBiwGLAYsITQfgMWAxYDFgMWA0mNAUsIk/r128lbDFgMWAxYDFhCaL8BiwGLAYsBi4GkxoAlhEn9+u3kLQYsBiwGLAYsIbTfgMWAxYDFgMVAUmPAEsIEvf5Vq1ZJ8+bN9bFx48aYe503b56sXr065na2gcWAxYDFgMWAPwxYQugPf4HWBw4ckD///FMfhw4dCpSndnL06FHp37+/NGvWzBLC1JBl71sMWAxYDKQBBnKnQZ+2yxgwAAF95ZVXYmhhq1oMWAxYDFgMJBIDmZ4QTpw4Ub766itB9NimTRvp3LmztGrVKggHO3bskO+++04mTZokO3fulKZNm8r1118vpUuXDtS766675N9//5UBAwbIJ598Ij/99JNUqFBBbrjhBmnQoIG89957MnbsWClbtqxceuml0rZtW92W57744otSrFgxue222+T111+X33//XWrXrq2vK1euHHiG18muXbvk5Zdflj/++ENKliwpp512mu6funv37pX77rsv0Ix6M2fO1BwihXCYH3zwgSxdulTq1Kkjl1xyibRo0SJQ355YDFgMZA0MzJo1S959911ZsWKFFC9eXOrVq6f/nmvWrCnbtm2TJ554Qk/klltukRo1agQmxdrDGtSpUyfp0aOHLh8+fLj88MMPev1AknTzzTfrPk2jRx55RHbv3i033nijvPbaa7J582a57rrrpEOHDhJpHKY9v++//75+RsGCBaV3796SP39+GTFihDRq1EiuvPLKQNXUxhKomNlPCLqdWeGxxx5LyZEjB0HBg46hQ4cGhrx169YURfiC7lNfEbSUqVOnBurlypVL12nXrl1Q3TJlyqScd955QWWFCxdO+fvvv3VbRfT0vRIlSqQoHWBQPUVoUxYuXKjr8WvGqT5cXbZ27doU9VEHys39Xr166fubNm0Kucf4gM8//zwlb968Qffz5MmT8vHHH+v79j+LAYuBrIEBRfxSWFPM37/5Ze3hHtCkSRN9/4EHHghMav369Slm3TJrmdqkh/RTrVq1lGXLlgXasfbxDOe6+Nlnn+lnpTYOOrn88suDnpEzZ84UxRjosp49ewaeE81YApUz+Ylk1vH99ddfgY+Aj2P58uUpjz/+uH4ZECC1y9FDP/3003WZ4tBSIJCjR49OURxjoGzfvn26nvmg1E4sRXFYKco4JSVfvny6XpEiRTTR5JmKS9RlTz/9tG5nCCEfFoTw559/Thk5cmQKHx9l5sPwIoR9+/bVdVq2bKkJ65QpU1LKlSunyxQHm6J0iSnff/+9vqavV199NUXt2FIUV6sJOWVKf5jCH8Tzzz+v6/HHo3Z7mfW12XFZDFgMuDDAhr5UqVIp11xzjf7bZp1RnJb+e37nnXd0bcW56esqVaqkKLsBXfbcc8/pssaNG+vrb7/9Vl+zQVbcWcrKlStTzjrrLF122WWXBZ5qCKHi4lLeeOONlEcffTSFdTCacfzyyy+6P9aehx56KGXdunUpgwcPDpSZ9S7asQQGlclPMi0hHDRokEZ+0aJFU44cOaLRyO+oUaP0B0CBEimksFvhpTk5JSfxUuJO3dYQQoiNAYgiba+99lpTlHL22WfrsptuukmXOftiV2WAj4q2BQoUSFEiV80Zcs1hOEKz+1IiEf1x84Er0Yeuo8QNuqs9e/boa9r9+uuvumzMmDG6DA5wy5Ytuu3hw4dTKlWqpMv5I7BgMWAxkLUwwN8/G3o27BUrVtR/yxA7gM1voUKFdNmECRN0mRJD6mulMtHXhlM788wzA+vJ5MmTdR2II+sQYAihUvvoa/d/kcYxcOBA3Z8S3wbWXdrXr19flxtCGO1Y3M/OrNeZVkeIXgxo3bq1KCKmz/k1cnIKZsyYofV+nKuPgx8N6BDVDkyU2FTmz58vXbt2NbdEEZPAueII9bmzDFk4oAiP/nX+h4zdQMeOHfXp/v37xctdYsOGDaKInK5z1VVXCYcTlNjUeRl0vmTJEn3NGJx6TlNpzZo15tT+WgxYDGRyDGALoLgrUZIkUYRQlLonsKYpwqBHrzb8cvHFF2s94kcffaR1fko9I2qjLUqypOuYNVFJkQLtzdSxVFeqFlESJ1Ok9XmBC3USzTiMC5eSYgU946STTtJrqekv1rGYdpn1N9MSQgxLAF6eE+bMmSPVq1fXxiu1atUK3Fq8eLEmfhSgHN6+fbu+5/wwKODDMsAHCaidmCmK+KtEGtrAhkqKO9V1FUcqSn8oGMU4wdnnHXfcIUoH4LwdpNwOuqEuTFuIMsY5PMMJKMgtWAxYDGQNDFx44YWipDxy4oknaqOYLl26iJI8ybRp04KID
|
|||
|
<p>We can quickly plot all 8 methods to see this phenomenon (i.e.: that
|
|||
|
“complete” has its smaller cluster larger than it is in all the other
|
|||
|
clustering methods):</p>
|
|||
|
<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" tabindex="-1"></a><span class="fu">par</span>(<span class="at">mfrow =</span> <span class="fu">c</span>(<span class="dv">4</span>,<span class="dv">2</span>))</span>
|
|||
|
<span id="cb24-2"><a href="#cb24-2" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">8</span>) {</span>
|
|||
|
<span id="cb24-3"><a href="#cb24-3" tabindex="-1"></a> iris_dendlist[[i]] <span class="sc">%>%</span> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k=</span><span class="dv">2</span>) <span class="sc">%>%</span> <span class="fu">plot</span>(<span class="at">axes =</span> <span class="cn">FALSE</span>, <span class="at">horiz =</span> <span class="cn">TRUE</span>)</span>
|
|||
|
<span id="cb24-4"><a href="#cb24-4" tabindex="-1"></a> <span class="fu">title</span>(<span class="fu">names</span>(iris_dendlist)[i])</span>
|
|||
|
<span id="cb24-5"><a href="#cb24-5" tabindex="-1"></a>}</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAARlCAYAAAAqOce0AAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAARlAAAAAHbLbyYAAEAASURBVHgB7H0HeFTV1vYiPSFACCTUUELvSJGi0qsIgiCgV3+xXcGOFcu9WK6fXlFBsWDFi/SmgvQiRUB6h9ADhAAJgQAppM6/34UnnDkzSU7GhGTOrPU8w5yyT9nvZLH26qVsikhIEBAEBAFBQBDwUAS8PHTeMm1BQBAQBAQBQYAREEEofwiCgCAgCAgCHo2ACEKP/vll8oKAICAICAIiCOVvQBAQBAQBQcCjERBB6NE/v0xeEBAEBAFBQASh/A0IAoKAICAIeDQCIgg9+ueXyQsCgoAgIAiIIJS/AUFAEBAEBAGPRkAEoUf//DJ5QUAQEAQEARGE8jcgCAgCgoAg4NEIiCD06J9fJi8ICAKCgCAgglD+BgQBQUAQEAQ8GgERhB7988vkBQFBQBAQBEQQyt+AICAICAKCgEcjIILQo39+mbwgIAgIAoKACEIP/RvIzMykuLg4ysrKskMgOTmZj+MctoUEAUHAHAKXLl2ipKQkc4MNo8BvaWlphqOye7MQEEF4s5AuYc/Zt28fVapUiaKiouze7N133+XjOBccHEx169alNWvW2I2RHUFAEHBEoHfv3vTGG284nsjnSEpKCvPc4sWL8xkpp4sKARGERYVsId43PT2djh8/TtDibDYbb6empvITTp48SVeuXOHt+Ph42rFjR44mB40O5y9cuEDR0dE8BhogjuVFEIIYs2vXLqpfvz4NHz6crl69mtclck4Q8CgEwEe7d+/O4T1M/uOPP6bHH388h0eh4R06dMiB32JiYtjqAr6EJuiMTpw44WCtcTZOjhUSAuo/VqESjoASaLbAwEDbb7/9ZtuyZYtN/fS2Tz/91Hb+/HlbqVKlbGvXrrWNGTPGVq5cOVtERIStfPnyNiU4bTNmzLAprc5WrVo1W6tWrWxKSNpq1qxpq1y5sq1Dhw58H6UZ2s3+1VdftVWtWjXnGK7B81auXJlzTDYEAU9GYOfOnTa1WLS1a9fOVqZMGdvnn3/OcLRt29b27LPP2tSikXnmtttus9WuXdvm5eVlmzNnDo95+umnbQEBAcyTjRo1sg0dOtQG/gaPzZ8/33b58mVbly5d+P7gVfCwUNEjIBphIS0oivI2QUFB1KtXL1qyZAmtWrWK/Pz86Pfff6elS5dSWFgY3X777dSpUyfasGEDTZs2jRQz0datW/mV4LOYPHkyj58yZQqFh4cTVqSDBw829cpKaPK4M2fOmBovgwQBqyOgFp7k6+tLo0aNogULFlCPHj2cThk8C0uOEni0fPlySkhIoK+++oqUUKSjR48SfIrqv3i7a3E/nDt16hS9/PLLpASn3XnZKRoEfIrmtnLXwkZg0KBB9M4771BkZCQ98cQTNHXqVPLx8aGBAweSWnHSunXraPr06aRWpbx/7do1Fph4D7UyJQjTbdu2UcuWLcnb25tat25t6hVjY2N5nNISTY2XQYKA1RF49NFHmd+efPJJdle89NJL9N577zlMu3HjxnysSpUqHAijNEk2dzZr1oyUVkgNGzZ0uAZCFgJS409cC7eE0jwdxsqBwkNANMLCw7JI79S/f39eJUITfP7551nI/fzzz3TPPfdQYmIiffDBB/TRRx/R+PHjmdmys7Nz3sff35+3sXJdv349+yxwrUYQdtAk4YsEZWRkEJgWwvX1118nZd5hYaqNl29BwJMRWLZsGTVt2pQ1vGeeeYZ++OEHp3BAawQp9wV/Y5GKxSu0whUrVtCff/7Jx/X/KHMrC0kEqCFwbcSIESIE9QAV0bYIwiICtrBvGxoayubPGjVqsFYIoYZVYrdu3SgkJIQDWh555BEaMGAAKd8Cm2S0d9AYEUwFzQ5apabpYQyCYh544IGcIBsE3SifIvXt25ePzZ07l5SPUrudfAsCHo1AixYt2CQKXpo3b55TbdAZQMqHzwvW7777jt566y1q3rw5C0b92CFDhvBx5ddnyw80QqGiR6AU3JBF/xh5ws1AwKwJBeHaMJUKCQKCgOsIwP+OFCOzhOhupFfAndGxY0dq3749+/7HjRvncAvwMu6tLWIdBsiBQkVANMJChbN4b2bWjyBCsHh/J3m6NRAoiBDEjMuWLcuWFVhfcC1cECNHjnQKBnhZhKBTaIrkoGiERQKr3FQQEAQEgdwREKtM7tgUxxkRhMWBujxTEBAEBAFBoMQgIKbREvNTyIsIAoKAICAIFAcCIgiLA3V5piAgCAgCgkCJQUAEYYn5KeRFBAFBQBAQBIoDARGExYG6PFMQEAQEAUGgxCAggrDE/BTyIoKAICAICALFgYAIwuJAXZ4pCAgCgoAgUGIQEEFYYn4KeRFBQBAQBASB4kBABGFxoC7PFAQEAUFAECgxCIggLDE/hbyIICAICAKCQHEgIIKwOFCXZwoCgoAgIAiUGAREEJaYn0JeRBAQBAQBQaA4EBBBWByoyzMFAUFAEBAESgwCIghLzE8hLyIICAKCgCBQHAiIICwO1OWZgoAgIAgIAiUGARGEJeankBcRBAQBQUAQK
|
|||
|
<p>It seems that the cophenetic correlation is very biased towards the
|
|||
|
influence of the main clusters. Another correlation measure to use is
|
|||
|
the <code>cor_common_nodes</code> correlation (giving the proportion of
|
|||
|
nodes which share the exact same list of labels in both dendrograms). We
|
|||
|
can also check it out:</p>
|
|||
|
<div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" tabindex="-1"></a>iris_dendlist_cor2 <span class="ot"><-</span> <span class="fu">cor.dendlist</span>(iris_dendlist, <span class="at">method =</span> <span class="st">"common"</span>)</span>
|
|||
|
<span id="cb25-2"><a href="#cb25-2" tabindex="-1"></a>iris_dendlist_cor2</span></code></pre></div>
|
|||
|
<pre><code>## ward.D single complete average mcquitty median centroid
|
|||
|
## ward.D 1.0000000 0.7324415 0.8595318 0.8461538 0.8361204 0.7458194 0.7324415
|
|||
|
## single 0.7324415 1.0000000 0.7324415 0.7491639 0.7458194 0.7591973 0.7625418
|
|||
|
## complete 0.8595318 0.7324415 1.0000000 0.8060201 0.7993311 0.7491639 0.7290970
|
|||
|
## average 0.8461538 0.7491639 0.8060201 1.0000000 0.8494983 0.7892977 0.7725753
|
|||
|
## mcquitty 0.8361204 0.7458194 0.7993311 0.8494983 1.0000000 0.7859532 0.7759197
|
|||
|
## median 0.7458194 0.7591973 0.7491639 0.7892977 0.7859532 1.0000000 0.8528428
|
|||
|
## centroid 0.7324415 0.7625418 0.7290970 0.7725753 0.7759197 0.8528428 1.0000000
|
|||
|
## ward.D2 0.8795987 0.7324415 0.8294314 0.8294314 0.8294314 0.7558528 0.7357860
|
|||
|
## ward.D2
|
|||
|
## ward.D 0.8795987
|
|||
|
## single 0.7324415
|
|||
|
## complete 0.8294314
|
|||
|
## average 0.8294314
|
|||
|
## mcquitty 0.8294314
|
|||
|
## median 0.7558528
|
|||
|
## centroid 0.7357860
|
|||
|
## ward.D2 1.0000000</code></pre>
|
|||
|
<p>And plot it:</p>
|
|||
|
<div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1" tabindex="-1"></a><span class="co"># corrplot::corrplot(iris_dendlist_cor2, "pie", "lower")</span></span></code></pre></div>
|
|||
|
<p>This gives us another perspective on our clustering algorithms. We
|
|||
|
can see that most methods have around 75% common nodes with one another.
|
|||
|
Centroid and median seem relatively close to one another, as well as
|
|||
|
ward.D2 and ward.D to one another and to complete, average, and mcquitty
|
|||
|
(as compared to the other methods).</p>
|
|||
|
</div>
|
|||
|
<div id="clustering-prediction-of-the-3-species-classes" class="section level3">
|
|||
|
<h3>Clustering prediction of the 3 species classes</h3>
|
|||
|
<p>Lastly, we would like to see which of the different clustering
|
|||
|
algorithms came the closest to detecting the 3 flower species (when
|
|||
|
using a cut of k=3).</p>
|
|||
|
<p>For this purpose, we compare the clustering solution of each
|
|||
|
algorithm with the real clusters, using the Fowlkes-Mallows Index (also
|
|||
|
using in the package for the <code>Bk_plot</code>). This measure is
|
|||
|
similar to rand (or rand adjusted) index, and gives a value of 1 when
|
|||
|
the two clusters confirm, and 0 when they do not.</p>
|
|||
|
<div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" tabindex="-1"></a>get_ordered_3_clusters <span class="ot"><-</span> <span class="cf">function</span>(dend) {</span>
|
|||
|
<span id="cb28-2"><a href="#cb28-2" tabindex="-1"></a> <span class="fu">cutree</span>(dend, <span class="at">k =</span> <span class="dv">3</span>)[<span class="fu">order.dendrogram</span>(dend)]</span>
|
|||
|
<span id="cb28-3"><a href="#cb28-3" tabindex="-1"></a>}</span>
|
|||
|
<span id="cb28-4"><a href="#cb28-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb28-5"><a href="#cb28-5" tabindex="-1"></a>dend_3_clusters <span class="ot"><-</span> <span class="fu">lapply</span>(iris_dendlist, get_ordered_3_clusters)</span>
|
|||
|
<span id="cb28-6"><a href="#cb28-6" tabindex="-1"></a></span>
|
|||
|
<span id="cb28-7"><a href="#cb28-7" tabindex="-1"></a>compare_clusters_to_iris <span class="ot"><-</span> <span class="cf">function</span>(clus) {<span class="fu">FM_index</span>(clus, <span class="fu">rep</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>, <span class="at">each =</span> <span class="dv">50</span>), <span class="at">assume_sorted_vectors =</span> <span class="cn">TRUE</span>)}</span>
|
|||
|
<span id="cb28-8"><a href="#cb28-8" tabindex="-1"></a></span>
|
|||
|
<span id="cb28-9"><a href="#cb28-9" tabindex="-1"></a>clusters_performance <span class="ot"><-</span> <span class="fu">sapply</span>(dend_3_clusters, compare_clusters_to_iris)</span>
|
|||
|
<span id="cb28-10"><a href="#cb28-10" tabindex="-1"></a><span class="fu">dotchart</span>(<span class="fu">sort</span>(clusters_performance), <span class="at">xlim =</span> <span class="fu">c</span>(<span class="fl">0.7</span>,<span class="dv">1</span>),</span>
|
|||
|
<span id="cb28-11"><a href="#cb28-11" tabindex="-1"></a> <span class="at">xlab =</span> <span class="st">"Fowlkes-Mallows Index (from 0 to 1)"</span>,</span>
|
|||
|
<span id="cb28-12"><a href="#cb28-12" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"Perormance of clustering algorithms </span><span class="sc">\n</span><span class="st"> in detecting the 3 species"</span>,</span>
|
|||
|
<span id="cb28-13"><a href="#cb28-13" tabindex="-1"></a> <span class="at">pch =</span> <span class="dv">19</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAHCCAYAAAB8GMlFAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAHCAAAAAOQBvRMAAEAASURBVHgB7J0HmBTF1obPknPOIEgykDEgqEiOIhlUwICov4o5XQPBhEgQRVCCBBUUURD1AioCIiogUVABUQSRnHOG/us73mpnNszO7s5sz+x89Twz06HiWz399TlV3R3nmCAMJEACJEACJBCjBDLFaLvZbBIgARIgARJQAhRCHggkQAIkQAIxTYBCGNPdz8aTAAmQAAlQCHkMkAAJkAAJxDQBCmFMdz8bTwIkQAIkQCHkMUACJEACJBDTBCiEMd39bDwJkAAJkACFkMcACZAACZBATBOgEMZ097PxJEACJEACFEIeAyRAAiRAAjFNgEIY093PxpMACZAACVAIeQyQAAmQAAnENAEKYUx3PxtPAiRAAiRAIeQxQAIkQAIkENMEKIQx3f1sPAmQAAmQAIWQxwAJeEDgzJkzsmnTJsEvAwmQgLcEKITe8g976d99953ExcW5n0yZMknOnDmlQoUK8u6774a9fBaQkMDo0aMlb9682gf4PXXqVMJIyWwpUqSI9umff/6ZTMyU7T58+LAMHDgwZYmSiB2uOiZRXMDN6V2Xe++9V/tn0KBBbr1+/vln+eCDD9z1e+65R+MMGTLE3cYFbwhQCL3hnu6lQgDLli0rRYsWlfPnz6s1cvvtt8uoUaPSvS6xXiCEBuJ35513yquvvirZs2ePCCTbt2+XypUryxtvvBGS+mTJkkXwwYVYrIXMmTNr2/G/Q5g8ebLUrl1blixZEmsooqK9FMKo6Ka0V7JAgQLy119/ya5du2Tv3r3SqlUrzXTmzJlpz5w5BE3AcRzZunWrxn/llVekd+/eQacNd8RDhw7J7t27Q1bMzp071fVbvnz5kOUZLRmNHDlS2/7EE09olWG5nzt3LlqqH3P1pBDGXJeLuuXuuOMObfnSpUtdAnDdNGnSRPLnzy8XXXSRPPvss+4YFqzIGjVq6H649mBZtm7dWtOeOHFCHn/8cb3iRdqrr75aZs2a5eabVNqPPvpI83zzzTfl0UcflVKlSqlF8s4778hvv/0mjRo1Egh4w4YNZePGjW5+v//+u/Tq1UvrWLBgQalXr56fm/f555/XfFEHWF0lSpSQCy+8UAYPHuzmgQUIUteuXaVkyZJSrlw5ueuuu2T16tVunKNHjwrcV2XKlJFixYpJhw4dZMuWLe7+xBbWrFkj3bt31zRoT5cuXfQCBHExJgiG4IHQoEEDefnll3U5/ldydYsfH32FvKdMmeLu6t+/v26DNYKwbds2rU/x4sUlR44cUqlSJRk2bJjugwC2b99el3GhhLy++eYbXU/tcXHddddpPlb4r7nmGrn88ssFjNCnOFauvPJK+eGHH7Qc+wUvBY4/uDN79uyp1hTq89prr9koCX6nT58u7dq10zTwfOBCb/369Qni2Q1wAeMYQr+iLBzT6GuUs3btWhtN65pUfyISjlvL/eKLL9bj95dffpGXXnpJt0+YMEHrj2McAa5RMPANx44d02MPdalYsaKfRZ7a/8icOXOkTp06+l+3/8lFixb5FstlXwLmCpUhAxNYuHChY/rbKVSokNvK48ePOx07dtTtRkR0uznBO7lz59Zt5iTlmDFEXTaCqfvN1ayuZ8uWzTFuH8f8uZxbbrnFOXnypNOsWTPdZ8YenWrVqukyyjQngYBp33rrLTdd1apVHXMi1nXjTnLy5cvntGzZ0jECpdvq1q3r5oU6I39zsnOMGDvG/abry5cv1zhG0HQddTQnHadNmza6jjTffvutxjEi5xiB0+1GJB1zktZl4xp0jLBrHHNRoNuMReMYUdZlI6oO0iYWzAnQMSdvjYd6mxObLhsxd/7++2/HCKFTq1Yt3Ya61KxZ0zFu0gRZBVO3woULaz7mAkHToy+Q54gRI9z80HfYZgREt5mLAl2/6aabnIcfftgpXbq0rhuvgGM8BY4RRl0HT/TH/PnzndQeFygwfh3RH+hbMGrbtq1TpUoVLc9cMDhnz57VOr733nu6DfUGf/DOlSuXbnvyySc1Tvwvc9HkGGHXYxJtA1ekxzFsQ/y63HDDDRonT548zvXXX+9kzZrVLcceR8n1J/K2/yMc+2a810Ff4z9hRFXzN1a/gzbZYwH/QxwDCP/3f/+nccD7qquucoyQ6zrqvnjxYo2Tmv8Ijl9zsaNlGo+Dc9tttyl39Pf+/fs1X375ExD/Va5lNAJWCM04jZ5UcBKywoE/nLEWtMn4w2Adf2CE06dPq3hC9HAytEKIOC+++KJjrBrHuNI0PbZBTIxVoWnHjh2reeGkgD9lUmntnxyCZK6KNe0VV1yhaW+++WZd37x5s67jZIgAkcCJxbiedB1fVohtW6wQGotE64k4LVq00HyGDx+OVcdYQrpuLBL3JIwTE05sy5YtU8FEu3BCMTM7NY0Vltdff13X43/h5I40EBykQdutCEOoEHDCRxx8khLU5OqGfOKf2IMRQpyocSHz8ccfO8Yicn799VcHImhcmMjSMZaQ1gviY0Nqjwukj19HCCHabfsAJ2UcX
|
|||
|
<p>We can see that the “median” method did the best, although similar
|
|||
|
results were achieved by ward.D2, average, ward.D, and mcquitty.
|
|||
|
However, the complete, centroid, and single method did worse in our
|
|||
|
case.</p>
|
|||
|
</div>
|
|||
|
<div id="conclusion" class="section level3">
|
|||
|
<h3>Conclusion</h3>
|
|||
|
<p>The Iris data set is only 4-dimensional, making it possible to
|
|||
|
explore using pairs plot (SPLOM) or parallel coordinates plot. It is
|
|||
|
clear from these that two main clusters are visible, while the
|
|||
|
separation of the third cluster is difficult.</p>
|
|||
|
<p>In the above analysis, we learned that the complete method fails to
|
|||
|
do the proper separation of the two main clusters when cut in k=2 (but
|
|||
|
succeeds in doing it, if moving to k=3 clusters). This is different from
|
|||
|
all the other 7 methods available in <code>hclust</code>, which do
|
|||
|
succeed in separating the 2 main clusters from the beginning (i.e.: for
|
|||
|
k=2).</p>
|
|||
|
<p>We also noticed that all clustering algorithms share a relatively
|
|||
|
high proportion of common nodes (between 75% to 90%).</p>
|
|||
|
<p>Lastly, when it came to trying to separating the flowers into 3
|
|||
|
species, the median clustering method did the best, while the single
|
|||
|
method did the worst in this regard.</p>
|
|||
|
<p>While the Iris data set is well known, I hope the above analysis was
|
|||
|
able to offer some new perspectives on the performance of the different
|
|||
|
hierarchical clustering methods.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="khan---microarray-gene-expression-data-set-from-khan-et-al.-2001.-subset-of-306-genes." class="section level2">
|
|||
|
<h2>khan - Microarray gene expression data set from Khan et al., 2001.
|
|||
|
Subset of 306 genes.</h2>
|
|||
|
<div id="background-1" class="section level3">
|
|||
|
<h3>Background</h3>
|
|||
|
<blockquote>
|
|||
|
<p>Khan contains gene expression profiles of four types of small, round,
|
|||
|
blue cell tumors of childhood (SRBCT) published by Khan et al. (2001).
|
|||
|
It also contains further gene annotation retrieved from SOURCE at <a href="http://source.stanford.edu/" class="uri">http://source.stanford.edu/</a>.</p>
|
|||
|
</blockquote>
|
|||
|
<p>This interesting data set offers two interesting items:</p>
|
|||
|
<ul>
|
|||
|
<li>train: data.frame of 306 rows and 64 columns. The training data set
|
|||
|
of 64 arrays and 306 gene expression values</li>
|
|||
|
<li>test: data.frame, of 306 rows and 25 columns. The test data set of
|
|||
|
25 arrays and 306 genes expression values</li>
|
|||
|
</ul>
|
|||
|
<p>This way we can create a hierarchical clustering on the 306 genes
|
|||
|
expression values on the train and the test data and compare the two to
|
|||
|
see the stability of the results.</p>
|
|||
|
<p>We define the variables:</p>
|
|||
|
<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" tabindex="-1"></a>train <span class="ot"><-</span> dendextend<span class="sc">::</span>khan<span class="sc">$</span>train</span>
|
|||
|
<span id="cb29-2"><a href="#cb29-2" tabindex="-1"></a>test <span class="ot"><-</span> dendextend<span class="sc">::</span>khan<span class="sc">$</span>test</span></code></pre></div>
|
|||
|
<p>And create the dendrograms:</p>
|
|||
|
<div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" tabindex="-1"></a>d_train <span class="ot"><-</span> train <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb30-2"><a href="#cb30-2" tabindex="-1"></a>d_test <span class="ot"><-</span> test <span class="sc">%>%</span> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram</span>
|
|||
|
<span id="cb30-3"><a href="#cb30-3" tabindex="-1"></a>d_train_test <span class="ot"><-</span> <span class="fu">dendlist</span>(<span class="at">train =</span> d_train, <span class="at">test =</span> d_test)</span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="comparing-the-train-vs-test-dendrograms" class="section level3">
|
|||
|
<h3>Comparing the train vs test dendrograms</h3>
|
|||
|
<p>Using a cophenetic correlation, we can see the two trees have some
|
|||
|
similarity (0.57):</p>
|
|||
|
<div class="sourceCode" id="cb31"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" tabindex="-1"></a>d_train_test <span class="sc">%>%</span> cor.dendlist</span></code></pre></div>
|
|||
|
<pre><code>## train test
|
|||
|
## train 1.0000000 0.5708019
|
|||
|
## test 0.5708019 1.0000000</code></pre>
|
|||
|
<p>However, when looking at the cophenetic correlation with the spearman
|
|||
|
correlation coefficiant, the value is lower (0.49) indicating that some
|
|||
|
of the similarity is due to a small number of items, distant from the
|
|||
|
others, which are correlated similarly in the two trees:</p>
|
|||
|
<div class="sourceCode" id="cb33"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" tabindex="-1"></a>d_train_test <span class="sc">%>%</span> <span class="fu">cor.dendlist</span>(<span class="at">method_coef =</span> <span class="st">"spearman"</span>)</span></code></pre></div>
|
|||
|
<pre><code>## train test
|
|||
|
## train 1.0000000 0.4971936
|
|||
|
## test 0.4971936 1.0000000</code></pre>
|
|||
|
<p>We may ask at which level of cutting the dendrogram we get the “best”
|
|||
|
level of similarity. For this we may turn to the Bk plot. The plots
|
|||
|
shows us that at around 7 clusters the groups in the two are starting to
|
|||
|
look significantly similar. (Note that significantly does not mean
|
|||
|
substantially)</p>
|
|||
|
<div class="sourceCode" id="cb35"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1" tabindex="-1"></a><span class="fu">Bk_plot</span>(d_train, d_test, <span class="at">k =</span> <span class="dv">2</span><span class="sc">:</span><span class="dv">30</span>, <span class="at">xlim =</span> <span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">30</span>))</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAHCCAYAAAB8GMlFAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAHCAAAAAOQBvRMAAEAASURBVHgB7J0HnBNFG8bfK3D03nsv0kHgRECKgoKgKCJSVMRPBCkCFkRF7KICIlVUFKSKNAEFBKWDFEGk997uaAcHHFfy7TO48XLJ5kpyZDd55seS7O7M7Mx/cnkzM28JsmlJmEiABEiABEggQAkEB2i/2W0SIAESIAESUAQoCPlBIAESIAESCGgCFIQBPfzsPAmQAAmQAAUhPwMkQAIkQAIBTYCCMKCHn50nARIgARKgIORngARIgARIIKAJUBAG9PCz8yRAAiRAAhSE/AyQAAmQAAkENAEKwoAefnaeBEiABEiAgpCfARIgARIggYAmQEEY0MPPzpMACZAACVAQ8jNAAiRAAiQQ0AQoCAN6+Nl5EiABEiABCkJ+BkiABEiABAKaAAVhQA8/O08CJEACJEBByM8ACZAACZBAQBOgIAzo4WfnSYAESIAEKAj5GSABEiABEghoAhSEAT387DwJkAAJkAAFIT8DJEACJEACAU2AgjCgh5+dJwESIAESoCDkZ4AESIAESCCgCVAQBvTws/MkQAIkQAIUhPwMkAAJkAAJBDQBCsKAHn52ngRIgARIgIKQnwESSCcC/fr1k6CgIIcjW7ZsUqpUKRk8eLDEx8fbn/zkk0+qfF9//bX9mjfe5MuXT9V7+PDhVFX3zz//yPTp01NVhplJwKoEKAitOnJst2UI5MyZUypUqCDly5eXHDlyyLFjx+Tjjz+Wd99915R9mDp1qtSqVUs2btxoyvaxUSTgbQIUhN4myvpIIAkBzPb27dsn+/fvl9OnT8tbb72lcixevDhJTnOcYvaYeLZqjlaxFSSQfgQoCNOPLWsmAZcEatSooa7nzZvX5X1c/Pzzz6V69erSsmVLuXLlilO+33//Xd3HrPKTTz6RcuXKSZEiReTVV1+V2NhYp/yJL0yZMkVatGghuXLlkrvuukveeecduXXrlsqC2eDYsWPVeyyN1qlTJ3FRvicBvyQQ6pe9YqdIwEQEtmzZIkOHDhWbzSaXL1+WGTNmSJYsWZTQctXMadOmyWuvvaYEFd5jaTVpioqKEuzjYZn1xo0bEh4eLuvWrVMCFILwiy++SFpEnY8ePVr69u2r3tesWVN27twp7733nmzevFl++eUX1Ua9INqbkJCgn/KVBPyXgPZhZyIBEkgHAprAsWnfHC6PRo0a2c6dO2d/aocOHVS+zp072zJmzGjLmjWrbf369fb7Sd/MmzfPXu/27dvVbW2pVV3LkCGDTROU6po261TXDh06ZLt27ZoN99Am5EU6fvy4LXv27Orazz//rK5ps0x13qdPH3XO/0jA3wlwadR/f+OwZyYh8OCDD8qiRYtEEzRqNtilSxdZs2aNVKtWTW7evOnQSswAsUwJpRrcTy5BAUdfan3ggQcEWqmYER44cMCpKGaMuFesWDFp1aqVul+8eHFB+5A0wate+R8JBBoBCsJAG3H2944TKFGihLRu3VratGkjHTt2lB9++EEKFiwo58+fl+XLlzu0Jzg4WAm2M2fOqL0/h5suThIrtYSGhoo2m1S5El/Xi+nXtBmgfkm9Qngi6fuE6oT/kUAAEaAgDKDBZlfNQQCztQsXLqjGaEuVDo2CfSFmjpkyZZLhw4fL0aNHHe4nPYGGp7bsqS5jz/DixYvKbrBs2bJJs6p9RNg17tmzR/bu3avuY4aoa6/Wrl1bXQsJCVGv3B90QsgLfkqAgtBPB5bdMg+BOXPmKLs8KKdUqVJFqlatKnFxccquUNsrdGgoZo84+vfvr5ZNoTSTXLrnnnukd+/eSsMUeZ966inJkyePU7HcuXPL008/ra7Xq1dPunXrJmXKlFEzU9gNwswDSVfOgYB87rnnqDCjqPA/fyZAQejPo8u+mYIAZn+aQovs2LFD7d1habJTp05KSxPao67SoEGDJH/+/DJ79mxZvXq1qyzqGrRFYWIxbtw4OXv2rFqChWaoUZo0aZIMHDhQaa1+//33qszjjz8uS5YsESytImEJF6YYmI3ChCIyMtKoOl4nAb8gEARtIL/oCTtBAgFEYP78+dKuXTtp2rSpwKbw0qVLyvTB1UzQCMuJEyekQIECEhYW5pQFM9aTJ08qxRpdQDpl4gUS8BMCtCP0k4FkNwKbAJY9U5ugMWqUIPzgE5WJBAKBAJdGA2GU2Ue/IwCFFiyrQqmGiQRIwDMCXBr1jB9LkwAJkAAJWJwAZ4QWH0A2nwRIgARIwDMCFISe8WNpEiABEiABixOgILT4ALL5JEACJEACnhGgIPSMH0uTAAmQAAlYnAAFocUHkM0nARIgARLwjAAFoWf8WJoESIAESMDiBCgILT6AbD4JkAAJkIBnBCgIPePH0iRAAiRAAhYnQEFo8QFk80mABEiABDwjQEHoGT+WJgESIAESsDgBCkKLDyCbTwIkQAIk4BkBCkLP+LE0CZAACZCAxQlQEFp8ANl8EiABEiABzwhQEHrGj6VJgARIgAQsToCC0OIDyOaTAAmQAAl4RoCC0DN+LE0CJEACJ
|
|||
|
<p>Next, we compare the results with a tanglegram. We make sure to color
|
|||
|
the connecting line with the colors of the branches of the train (left)
|
|||
|
dendrogram. This can help us see which patterns are somewhat preserved
|
|||
|
between the two trees.</p>
|
|||
|
<div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" tabindex="-1"></a>pre_tang_d_train_test <span class="ot"><-</span> d_train_test <span class="sc">%>%</span> ladderize <span class="sc">%>%</span> <span class="co"># untangle %>%</span></span>
|
|||
|
<span id="cb36-2"><a href="#cb36-2" tabindex="-1"></a> <span class="fu">set</span>(<span class="st">"branches_k_color"</span>, <span class="at">k =</span> <span class="dv">7</span>)</span>
|
|||
|
<span id="cb36-3"><a href="#cb36-3" tabindex="-1"></a>train_branches_colors <span class="ot"><-</span> <span class="fu">get_leaves_branches_col</span>(pre_tang_d_train_test<span class="sc">$</span>train)</span>
|
|||
|
<span id="cb36-4"><a href="#cb36-4" tabindex="-1"></a>pre_tang_d_train_test <span class="sc">%>%</span> <span class="fu">tanglegram</span>(<span class="at">fast =</span> <span class="cn">TRUE</span>, <span class="at">color_lines =</span> train_branches_colors)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAlgAAAF3CAYAAAB5dDWiAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAJYoAMABAAAAAEAAAF3AAAAAElRatkAAEAASURBVHgB7H0HYBzV1fVR770Xq9iS3Hs3NrYDprfQk9gBvhASEkg+8oUU8odUIAFCgBBKQu+YXkw3priBu43lIqtavXet+n/PW81qd7WrLlky79qjmXnz5s3M3bf7zpxbnkunCLRoDWgNaA1oDWgNaA1oDWgNDJsGXIetJd2Q1oDWgNaA1oDWgNaA1oDWgNKABli6I2gNaA1oDWgNaA1oDWgNDLMGNMAaZoXq5rQGtAa0BrQGtAa0BrQGNMDSfUBrQGtAa0BrQGtAa0BrYJg1oAHWMCtUN6c1oDWgNaA1oDWgNaA1oAGW7gNaA1oDWgNaA1oDWgNaA8OsAQ2whlmhujmtAa0BrQGtAa0BrQGtAQ2wdB/QGtAa0BrQGtAa0BrQGhhmDWiANcwKtW6urq4OW7ZssS4a0vYVV1yBuXPn4plnnhlSO/pkrQGtAa0BrYGxp4HhHjPsn3Ck27e/3jd9XwOsEeoBr7zyClJTU8H1cMmRI0ewd+9elJWVDVeTuh2tAa0BrQGtgTGggZEYM6wfa6Tbt76W3jZrwF0rYmQ08MQTT6CkpGRYG//3v/8NvoFMmTJlWNvVjWkNaA1oDWgNnFgNjMSYYf1EI92+9bX0tlkDbn8U0coYXg08+OCDePPNN1FdXY3m5maQeVqyZAneffdd/Pe//0V7ezu2bt2Kv/zlL4qNWrRoETgl5HPPPYc777xT1dm8eTNyc3MxZ84cuLm5qRtcv369YrCio6ORlJSkANytt96Kzz//HCtWrMD999+Pu+++Gxs3bkRERATi4+OH98F0a1oDWgNaA1oDw64BZ2OGt7e3utbzzz+vfts5Rhw9ehSzZs2CcYwVMjMzcfvtt4Mv4az75ZdfIi4uDlFRUer8vtpXlfSf4dcAJ3vWMrwaWLlyJSfQtlny8/M7f/rTn6oy+XJ0uri4qO1LL71UXVy+ADb1jfPPPfdcy83Nnj1b1fnHP/6hyg4ePKj2fX19O0899VSb893d3Tt37txpOVdvaA1oDWgNaA2MTQ04GzN4t2vXrrX5befYkJiY2JmVlaUe5sCBA50cA1ju4+PTKcBLbQcHB3fm5eWpOr21ryroPyOiAe2DJb1yuOW+++7D4sWLVbMCoPDpp58qRsm4zv79+3HttdfikUcewc9//nO0tLTgpZdegnw58Prrr6O+vh733nuvqr5hwwbU1tYapzpcNzY2qvLs7GwI6EJoaCja2trw8ssvO6yvC7UGtAa0BrQGxo4GnI0Zb7/9Np599ll4enqqsYFWjfPOO09ZNwzj05NPPgmOAevWrVMuJLScXHPNNcr6QUsIxVn7Y0cDJ+edaB+sEfhchWlCWFiYaplmOnl7sLkKTXykbIVlspQThHV0dKiFAKyqqspyjF+YwMBAy76jjb///e/KbMhjvB6BmnaGd6QpXaY1oDWgNTC2NOBszHjttdfUjZ522mm44IIL1PavfvUrvPPOO3jxxRdBcCVMlSp/4YUXUFFRgdNPPx0333wzpk6danlIZ+1bKuiNEdGAZrBGRK29N0ondWtwxdqMDrz88ssVMJs/fz7uuOOO3huxOxobG2sp8ff3V9utra2WMr2hNaA1oDWgNTC+NEDfKsp7772nfHHpjyvuIKqMlo/S0lKI6wk4ZtBqQT/fX/ziF5g2bRrmzZsHMR+quvrPidFAN4VyYq7/jbyqn5+fzXMXFxerLw0jBK+++mp8//vfR0JCAlJSUlQ9V9e+cbCHh4elTfHvsmzrDa0BrQGtAa2B8akBY6xYvny5ciuxfwo6ugcFBWHHjh0quIkA65NPPsG+ffuwZ88e/O53v8Nbb71lf5reHyUN9D1yj9KNnGyXMUCRIxbJGgzxuWknJ7jil4lRhqtXr1aRIoZO+GaiRWtAa0BrQGvg5NWAozGDiaUp4tAOcXbHVVddpfx5OWY0NDQocMXIwSuvvBLp6em45557lDXk8ccfV+d99NFHKkKdO47aV5X0nxHTgGawRki1TJNAoYMizX9PP/200yuR8qUTI78w3/3ud5UvFXOWGEJ/rCRJy6BFa0BrQGtAa+Dk1ICjMeOGG27AAw88gMLCQkyePBlr1qxRyavLy8sh0eRKERwbWI9MFVkrpmZ444031LGzzjoLhkXDUfsTJ048OZU5Rp5KM1gj9EFcd911yvmwpqYGX331Va8O55GRkbjrrruQnJysIv/4RkInRebHotABXovWgNaA1oDWwMmrAUdjBn1rmdOKPlaMEn/44YdBH9tf/vKXuOmmm5QyJJWPyoHIOsyTxYCnjIwMxXg99thjFoU5at9yUG+MiAZcmPxhRFrWjar0CwyrlZwliqHqj0pycnJUglB7J/j+nKvraA1oDWgNaA2MXw3Qcd3ZmEELB5krjifOhHXIdtGH18vLq0e13trvUVkXDFkDGmANWYW6Aa0BrQGtAa0BrQGtAa0BWw1oE6GtPvSe1oDWgNaA1oDWgNaA1sCQNaAB1pBVqBvQGtAa0BrQGtAa0BrQGrDVgAZYt
|
|||
|
<p>We can see that the top most (small) cluster is somewhat preserved
|
|||
|
between the two trees. However, a large spaghetti-like tangle of lines
|
|||
|
is indicating that the two trees are far from being identical.</p>
|
|||
|
<p>If we look only at subtrees of the two dendrograms so that they
|
|||
|
include only genes that are clustered with genes in both trees, we get
|
|||
|
only 14 genes (while the original trees had 306 genes). We can see how
|
|||
|
we have several groups of pairs of genes, and one group with four genes
|
|||
|
clustered together in both trees:</p>
|
|||
|
<div class="sourceCode" id="cb37"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb37-1"><a href="#cb37-1" tabindex="-1"></a><span class="co"># This was calculated before</span></span>
|
|||
|
<span id="cb37-2"><a href="#cb37-2" tabindex="-1"></a><span class="co"># d_train_test_common <- d_train_test %>% prune_common_subtrees.dendlist</span></span>
|
|||
|
<span id="cb37-3"><a href="#cb37-3" tabindex="-1"></a><span class="co"># d_train_test_common</span></span>
|
|||
|
<span id="cb37-4"><a href="#cb37-4" tabindex="-1"></a>d_train_test_common <span class="sc">%>%</span> untangle <span class="sc">%>%</span> <span class="fu">tanglegram</span>(<span class="at">common_subtrees_color_branches =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAHCCAYAAAB8GMlFAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAHCAAAAAOQBvRMAAEAASURBVHgB7F0HWBRHG36lo1gQEUEEFOzYjb1h773FbjQau9GoiUmM+TWJscVYY9QYe++9l9i7UuyAIIi9ISKg/PPNccdxlDvxgDv4xme53dnZKe+s8+5XZiZbrAjgwAgwAowAI8AIZFEETLJou7nZjAAjwAgwAoyARICJkF8ERoARYAQYgSyNABNhlu5+bjwjwAgwAowAEyG/A4wAI8AIMAJZGgEmwizd/dx4RoARYAQYASZCfgcYAUaAEWAEsjQCTIRZuvu58YwAI8AIMAJMhPwOMAKMACPACGRpBLIMEb5+/RonT57UW2cvXrwYFSpUQK9evfSWJ2fECDACjEByCOh7DNMsJ63z1yzPkK6zBBFu3LgRRYsWBf3qKzx8+BBXrlzBrVu39JUl58MIMAKMQJIIpMUYpl5QWuevXpYhnpsZYqX0XaelS5eCiEufoWvXrqhUqRLy5Mmjz2w5L0aAEWAEEiGQFmOYeiFpnb96WYZ4bjpRBEOsmL7qNH/+fGzbtg0vXrzAu3fvcPPmTVSrVg2hoaGYNGkSLl68iNy5c2PkyJE4ffo0ypcvjxw5cmDVqlWYOnUqFi1ahBMnTuDevXvynqmpqazapUuXsHfvXoSHh8v8iGgnTJiA48ePo3bt2pg9ezamT5+OQ4cOwd7eHs7OzvpqEufDCDACWQiB5MYwKysrvHr1CtOmTcMff/yBAwcOgNSbZcuWTYDO3bt38euvv2LevHlYvXo1zp49i4IFC8LBwUGmSyn/BBll5gtadDszh7p169Ki4gmO+/fvxx47dkzGCZKKdXJykufZs2ePFS9SrHgxEqRXPt+iRQsVVJMnT5ZpqlatKuN8fX1VedSpUyfB82ZmZrEXLlxQPcsnjAAjwAjoikByY1hISEhs4cKFE4w1NFZ16NBBlbW3t3csjWsUb21tHSvIU54LTVZsUFCQTJdc/qpMssBJprcR/vnnnxBkJd4DoGPHjjh69KiU0GSE+PP48WP5ZbR8+XL5VWVhYYF169ZBvDTYsmWLlPhmzZolk+/atUt+gSmfTeo3IiJCRgcEBECQI/LmzYuYmBhs2LAhqeQcxwgwAoxAiggkN4Z9++23oHGmcuXK8PHxwX///SfHsk2bNmHnzp0yz3///Rc0JvXs2VNKi6QZ69u3r9RukaaLQnL5y5tZ5E+mtxGWK1cOdnZ2sjtJPSm+fhJ17YwZM+Dl5aWKJ7L88OGDPK5du4bnz5+r7tGLlCtXLtV1Uie///473Nzc5C0qjwiVCJcDI8AIMAIfi0ByYxiNKxQGDRqEkiVLyvPOnTtjzpw5UgXasmVLlQ/DmjVr8PTpUzRs2BBjxoxRpaeHkstfZphF/mR6iVCXfvT09EyQjLxB6YUiAiWHmN9++y3BfW0XQtWqSmJjYyPPo6OjVXF8wggwAozApyAQFhYmtVWUR79+/UC+C3QQCVIQalP5O2TIEDmGkVZq9+7dGDVqFEqVKoWKFStCqE1lGv4DZHqJUJdOJucYZaAXTNj4pBqhT58+cp6gi4sLPDw8ZBITE+3fDubm5srskC1bNtU5nzACjAAjoA8E1McsIjdNBxmlN7utrS3Onz8vnfaICA8fPoyrV6/i8uXL+P7777F9+3Z9VMfo89A+qht9EwEleSUnlakTF+nNyfOKXjTyGCWVqfpcQfqy4sAIMAKMQHoioDmG5cyZE+7u7rIKNK717t1bHuQhSpIeecJTIE9Rmurl5+eHmTNnyrnP//zzj7xHXqbCD0aea+YvI7PQnywhEdL0BQorV66ULwI5xqgHdamNpEFymHnz5g26desmbX00x0YZyF6otP8p4/iXEWAEGIG0RCCpMWz8+PFSLTp37lw5LYwc89auXQvhJYr+/fvL6tBYNXToUCn5kRRIUya2bt0q7zVt2lSlsUoq/yJFiqRlkwwr7yzgGRsr5gfGkruwQD5WSH+xZ86cUU2foDjxRZUABuFFpXJLJtdj4fwSW6VKFfm8+KqSaZObPkH5iTmKqvzEEmzyOeG1pYrjE0aAEWAEPgaBpMYwel58pMcKcpNjjLARxoo50rGbN29OkLWY0xxbs2ZNOfbR+CSkv9gePXrECucZVbrk8lclyOQn2ah9ApxMH6KiouSkeFdXVynx6dLgwMBAORFezAPUJTmnYQQYAUYgzRBIaQx78OABaII92QSTC6TlooVEyOfB0tIyUbKU8k+UOJNFZBkizGT9xs1hBBgBRoAR0BMCWcJZRk9YcTaMACPACDACmRABJsJM2KncJEaAEWAEGAHdEWAi1B0rTskIMAKMACOQCRFgIsyEncpNYgQYAUaAEdAdASZC3bHilIwAI8AIMAKZEAEmwkzYqdwkRoARYAQYAd0RYCLUHStOyQgwAowAI5AJEWAizISdyk1iBBgBRoAR0B0BJkLdseKUjAAjwAgwApkQASbCTNip3CRGgBFgBBgB3RFgItQdK07JCDACjAAjkAkRYCLMhJ3KTWIEGAFGgBHQHQEmQt2x4pSMACPAC
|
|||
|
<p>Trees’ sizes:</p>
|
|||
|
<div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" tabindex="-1"></a>d_train_test <span class="sc">%>%</span> nleaves</span></code></pre></div>
|
|||
|
<pre><code>## train test
|
|||
|
## 306 306</code></pre>
|
|||
|
<div class="sourceCode" id="cb40"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1" tabindex="-1"></a>d_train_test_common <span class="sc">%>%</span> nleaves</span></code></pre></div>
|
|||
|
<pre><code>## train test
|
|||
|
## 14 14</code></pre>
|
|||
|
</div>
|
|||
|
<div id="conclusion-1" class="section level3">
|
|||
|
<h3>Conclusion</h3>
|
|||
|
<p>To conclude: we see that the clustering algorithm resulted in trees
|
|||
|
which are significantly similar in both the training and the test data
|
|||
|
sets beyond chance, but that this similarity is restricted to only a
|
|||
|
very small proportion of genes.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="votes.repub---votes-for-republican-candidate-in-presidential-elections" class="section level2">
|
|||
|
<h2>votes.repub - Votes for Republican Candidate in Presidential
|
|||
|
Elections</h2>
|
|||
|
<div id="background-2" class="section level3">
|
|||
|
<h3>Background</h3>
|
|||
|
<blockquote>
|
|||
|
<p>This is a data frame with the percentage of votes given to the
|
|||
|
republican candidate in presidential elections from 1856 to 1976. Rows
|
|||
|
represent the 50 states, and columns the 31 elections.</p>
|
|||
|
</blockquote>
|
|||
|
<blockquote>
|
|||
|
<p>Source: S. Peterson (1973): A Statistical History of the American
|
|||
|
Presidential Elections. New York: Frederick Ungar Publishing Co. Data
|
|||
|
from 1964 to 1976 is from R. M. Scammon, American Votes 12,
|
|||
|
Congressional Quarterly.</p>
|
|||
|
</blockquote>
|
|||
|
<p>Define variables:</p>
|
|||
|
<div class="sourceCode" id="cb42"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" tabindex="-1"></a>votes.repub <span class="ot"><-</span> cluster<span class="sc">::</span>votes.repub</span></code></pre></div>
|
|||
|
<p>These data can be visualized using a (costumed made) parallel
|
|||
|
coordinates plot:</p>
|
|||
|
<div class="sourceCode" id="cb43"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" tabindex="-1"></a>years <span class="ot"><-</span> <span class="fu">as.numeric</span>(<span class="fu">gsub</span>(<span class="st">"X"</span>, <span class="st">""</span>, <span class="fu">colnames</span>(votes.repub)))</span>
|
|||
|
<span id="cb43-2"><a href="#cb43-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb43-3"><a href="#cb43-3" tabindex="-1"></a><span class="fu">par</span>(<span class="at">las =</span> <span class="dv">2</span>, <span class="at">mar =</span> <span class="fu">c</span>(<span class="fl">4.5</span>, <span class="dv">3</span>, <span class="dv">3</span>, <span class="dv">2</span>) <span class="sc">+</span> <span class="fl">0.1</span>, <span class="at">cex =</span> .<span class="dv">8</span>)</span>
|
|||
|
<span id="cb43-4"><a href="#cb43-4" tabindex="-1"></a><span class="co"># MASS::parcoord(votes.repub, var.label = FALSE, lwd = 1)</span></span>
|
|||
|
<span id="cb43-5"><a href="#cb43-5" tabindex="-1"></a><span class="fu">matplot</span>(<span class="dv">1</span><span class="dt">L</span><span class="sc">:</span><span class="fu">ncol</span>(votes.repub), <span class="fu">t</span>(votes.repub), <span class="at">type =</span> <span class="st">"l"</span>, <span class="at">col =</span> <span class="dv">1</span>, <span class="at">lty =</span> <span class="dv">1</span>,</span>
|
|||
|
<span id="cb43-6"><a href="#cb43-6" tabindex="-1"></a> <span class="at">axes =</span> F, <span class="at">xlab =</span> <span class="st">""</span>, <span class="at">ylab =</span> <span class="st">""</span>)</span>
|
|||
|
<span id="cb43-7"><a href="#cb43-7" tabindex="-1"></a><span class="fu">axis</span>(<span class="dv">1</span>, <span class="at">at =</span> <span class="fu">seq_along</span>(years), <span class="at">labels =</span> years)</span>
|
|||
|
<span id="cb43-8"><a href="#cb43-8" tabindex="-1"></a><span class="fu">axis</span>(<span class="dv">2</span>)</span>
|
|||
|
<span id="cb43-9"><a href="#cb43-9" tabindex="-1"></a><span class="co"># Add Title</span></span>
|
|||
|
<span id="cb43-10"><a href="#cb43-10" tabindex="-1"></a><span class="fu">title</span>(<span class="st">"Votes for Republican Candidate</span><span class="sc">\n</span><span class="st"> in Presidential Elections </span><span class="sc">\n</span><span class="st"> (each line is a country - over the years)"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAF3CAYAAAA2H0uDAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAF3AAAAADwmOncAAEAASURBVHgB7N0FuKRHsTfwF0hwPpzgLBYcggQnLG7BIYRAwuIBQnANssEhwd0XCXZxDXCBIAkQnCDBwuKu994En69/TWroffcdn3POzDldz3POzLzSUl1d/6pqa3qVKgcm4MClL33p3qlPfereH/7wh/5b//u//9s73elO17vc5S7Xvzboy29/+9veIYccMuj2VNeld8pTnrLXNE3vqKOOmiqNeOmqV71qTkda/s52trP1rnSlK/U+9rGPxSMr8nmhC12od81rXrMz7ac//em5LMcdd1zv+9//fv7+hCc8ofPZlbi4ffv23k1vetPc7niy22679V75ylfOPavrXe96vfOc5zw53bLO7YzIkHIcfPDB7Vs7/f7CF77Qe9WrXrXT9XqhcqDkwCmTQFWqHBibA3e6052av/3tb8373ve+/jsf/vCHm5NOOqnZd999+9cGfbnrXe/avOxlLxt0e6rrr3nNa5oEws3xxx/fXOMa15gqjfKl0572tM2rX/3q/Pf4xz+++cUvftHsvffezf/93/+Vj63J93Oc4xzNc5/73CYB06rk/z//8z/Nta51rearX/1q87CHPSzz5PznP39z73vfu/nIRz6yYmW4wQ1ukOuZgHHqPP785z83ybBpvvjFL06dRn1xY3CgAuHGaOe51RIQore//e39NN/5znfm73HvJz/5SXOHO9yhOfvZz94kD7J5y1veku8DwE996lPNH//4x+ZSl7pUc+KJJ2aQufnNb978v//3/5o999yzOfbYY/vpPuMZz2iufOUrN5ShtH/+85/378WXm9zkJs2vfvWr5gc/+EFzt7vdrUneajMof+/c8IY3bB7wgAc0173udZuLX/ziuQyRVnzuuuuuzT3ucY/89+AHP7iRB6D/9a9/nR8B/OqVvMXmzne+c/OnP/0pXwfE6vWSl7yk2WuvvXL9H/rQhzb//Oc/m+9973v5XvKk8rMnnHBC/l0aBX/9619znmc5y1mazZs3Nz/60Y+iSP1Pyv3lL395X7nLe//998954dWLX/zi/rNf+9rXmlve8pbNWc961uaSl7xk8+xnPzvfSx5eP291k98+++yT69h/+eQvL3rRi5qf/exnzUtf+tLmaU97Wi7f2972tuapT31qAxDRtPn89Kc/ze0h/wMOOGCH/AGvev7+97/Pebz//e9vLnaxizXnPe95m1e84hX5WvwblD+jK1n9Wf7uc5/75McHtV2kVT83KAdK97B+rxwYhwN77LFH7zSnOU0vKeVe8g57SZH1rnKVq+RXk9fUu+xlL9tLYNJLINFLyjKHsZIC6r3nPe/JoS/vJsXX+8tf/tITDjvjGc/YS95GLynrHIoUav3sZz+b33vsYx/be8ELXtA7/elP30teyE7Fe8xjHpPzusAFLpBDZcPy97IQpDDqFa5whd5tb3vbndITGlW+F77whb3nP//5vcc97nG9M5zhDLlOHk4gm+8nwMtldi8p3JxOUt65zELH6o5PSa30PvShD/WENX0X8kPf/va38+8nP/nJ+bdyuZ8AsJdAuHeqU50q88bNMkzYDo3KW318Xv/61++d4hSn6H3+85/v/etf/+pd7WpX613wghfsJdDqXf7yl8/pJ6Oh953vfCd/V8+HP/zhOR95b9u2LZel/HeXu9wlP5uMkPJy//ss+dzsZjfLZd+yZUu/DF2hUWXW/skbzjwPmRIaHZb/Ix/5yFz23XffvXf44YcPbbt+heqXDckBFlOlyoGJOBCK+U1velMPwFGihx12WE6D0vc7FHyy+vNvY0zoRje6US95ivm7sSfP3u9+98u/Iy3pvutd78r3kifYe+9739v75S9/mZ/p+pe8yV4KXeZbo/IHOIA3hfy6kuq1xwiV7573vGfvxz/+cX7+Wc96Vi7Xxz/+8fz7Xve6V2+XXXbpJY+xF0CYPNx8L3kzGZgo7HGAEDAlLzm/CxDlLY3gd3uMMHmQGTCvc53r9PNLXl/vW9/6Vv4NvH74wx/2fDIipAckAwiBNTKO5t7WrVvz7/Kf9nKP0TOIpsmHwSLdKDujiAHRBYQA2rMMIpQ80vw7xggH5Z8iD/m5kK9hbTeobvX6xuBADY2mHlZpMg5ECFR4NAFWk7yQHFqTijAVEhpE5zvf+ZqLXOQiOTSYLxT/ErjkX8bjznSmMzXJQ8u/hTZvdatb5fG+t771rfm7sOmb3/zm4u3ur+PkrzwJDLsTSFfdE8pM3mB+Rlg0woBRZiFHZX7961/f/OMf/8gh3kgwecf5q5CkcN4nPvGJuJVDdX4Il7ZJ+DJNOsqXhTlRUvL5s+ufkKV0kreXb8tPKFZ4FiWAy+2iDbQTEn4NSt5i/irEi4z9til52vmS8HOQ5z796U/36zJNPlGvqGcyApoUSYgsdviMZ4Ov8U48NCr/eG6ctotn6+fG4kAFwo3V3nOp7YUvfOE8CSF5X1nBmqASSvWiF71oziOF/vJnCnPmMbty0kOyMfM9ih+IGss55phjms985jN5Uoaxv
|
|||
|
</div>
|
|||
|
<div id="heatmap" class="section level3">
|
|||
|
<h3>Heatmap</h3>
|
|||
|
<p>This is a nice example when the parallel coordinates plot has some
|
|||
|
serious limitations: it does not help us detect the states, we fail to
|
|||
|
see the missing value patterns, and it is tricky to see clusters in
|
|||
|
general (due to the large number of threads).</p>
|
|||
|
<p>For these data, it can be quite helpful to see a heatmap of the votes
|
|||
|
across the years. The ordering of the rows is tricky. First, the
|
|||
|
distance of the vectors (later used for the clustering) should be done
|
|||
|
after transformation (since we are dealing with proportion of votes). In
|
|||
|
this case, I used the arcsin transformation (a logit transformation
|
|||
|
could also work, but the arcsin is safer for dealing with 0/1
|
|||
|
observations). But given the clusters, we wish to order the leaves (as
|
|||
|
much as possible), in order to take into account the missing value
|
|||
|
clusterings. So we, in fact, have two clusters, one for the raw values,
|
|||
|
and another for the “shadow matrix” (i.e.: the matrix with 0/1,
|
|||
|
indicating if a value was missing or not).</p>
|
|||
|
<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" tabindex="-1"></a>arcsin_transformation <span class="ot"><-</span> <span class="cf">function</span>(x) <span class="fu">asin</span>(x<span class="sc">/</span><span class="dv">100</span>)</span>
|
|||
|
<span id="cb44-2"><a href="#cb44-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb44-3"><a href="#cb44-3" tabindex="-1"></a>dend_NA <span class="ot"><-</span> votes.repub <span class="sc">%>%</span> is.na <span class="sc">%>%</span></span>
|
|||
|
<span id="cb44-4"><a href="#cb44-4" tabindex="-1"></a> dist <span class="sc">%>%</span> hclust <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span> ladderize</span>
|
|||
|
<span id="cb44-5"><a href="#cb44-5" tabindex="-1"></a></span>
|
|||
|
<span id="cb44-6"><a href="#cb44-6" tabindex="-1"></a>dend <span class="ot"><-</span> votes.repub <span class="sc">%>%</span> arcsin_transformation <span class="sc">%>%</span></span>
|
|||
|
<span id="cb44-7"><a href="#cb44-7" tabindex="-1"></a> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"com"</span>) <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb44-8"><a href="#cb44-8" tabindex="-1"></a> <span class="fu">rotate</span>(<span class="fu">labels</span>(dend_NA)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb44-9"><a href="#cb44-9" tabindex="-1"></a> <span class="fu">color_branches</span>(<span class="at">k=</span><span class="dv">3</span>)</span>
|
|||
|
<span id="cb44-10"><a href="#cb44-10" tabindex="-1"></a></span>
|
|||
|
<span id="cb44-11"><a href="#cb44-11" tabindex="-1"></a><span class="co"># some_col_func <- function(n) rev(colorspace::heat_hcl(n, c = c(80, 30), l = c(30, 90), power = c(1/5, 1.5)))</span></span>
|
|||
|
<span id="cb44-12"><a href="#cb44-12" tabindex="-1"></a>some_col_func <span class="ot"><-</span> colorspace<span class="sc">::</span>diverge_hcl</span>
|
|||
|
<span id="cb44-13"><a href="#cb44-13" tabindex="-1"></a></span>
|
|||
|
<span id="cb44-14"><a href="#cb44-14" tabindex="-1"></a></span>
|
|||
|
<span id="cb44-15"><a href="#cb44-15" tabindex="-1"></a><span class="co"># par(mar = c(3,3,3,3))</span></span>
|
|||
|
<span id="cb44-16"><a href="#cb44-16" tabindex="-1"></a><span class="co"># library(gplots)</span></span>
|
|||
|
<span id="cb44-17"><a href="#cb44-17" tabindex="-1"></a>gplots<span class="sc">::</span><span class="fu">heatmap.2</span>(<span class="fu">as.matrix</span>(votes.repub), </span>
|
|||
|
<span id="cb44-18"><a href="#cb44-18" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"Votes for</span><span class="sc">\n</span><span class="st"> Republican Presidential Candidate</span><span class="sc">\n</span><span class="st"> (clustered using complete)"</span>,</span>
|
|||
|
<span id="cb44-19"><a href="#cb44-19" tabindex="-1"></a> <span class="at">srtCol =</span> <span class="dv">60</span>,</span>
|
|||
|
<span id="cb44-20"><a href="#cb44-20" tabindex="-1"></a> <span class="at">dendrogram =</span> <span class="st">"row"</span>,</span>
|
|||
|
<span id="cb44-21"><a href="#cb44-21" tabindex="-1"></a> <span class="at">Rowv =</span> dend,</span>
|
|||
|
<span id="cb44-22"><a href="#cb44-22" tabindex="-1"></a> <span class="at">Colv =</span> <span class="st">"NA"</span>, <span class="co"># this to make sure the columns are not ordered</span></span>
|
|||
|
<span id="cb44-23"><a href="#cb44-23" tabindex="-1"></a> <span class="at">trace=</span><span class="st">"none"</span>, </span>
|
|||
|
<span id="cb44-24"><a href="#cb44-24" tabindex="-1"></a> <span class="at">margins =</span><span class="fu">c</span>(<span class="dv">3</span>,<span class="dv">6</span>), </span>
|
|||
|
<span id="cb44-25"><a href="#cb44-25" tabindex="-1"></a> <span class="at">key.xlab =</span> <span class="st">"% Votes for Republican</span><span class="sc">\n</span><span class="st"> Presidential Candidate"</span>,</span>
|
|||
|
<span id="cb44-26"><a href="#cb44-26" tabindex="-1"></a> <span class="at">labCol =</span> years,</span>
|
|||
|
<span id="cb44-27"><a href="#cb44-27" tabindex="-1"></a> <span class="at">denscol =</span> <span class="st">"grey"</span>,</span>
|
|||
|
<span id="cb44-28"><a href="#cb44-28" tabindex="-1"></a> <span class="at">density.info =</span> <span class="st">"density"</span>,</span>
|
|||
|
<span id="cb44-29"><a href="#cb44-29" tabindex="-1"></a> <span class="at">col =</span> some_col_func</span>
|
|||
|
<span id="cb44-30"><a href="#cb44-30" tabindex="-1"></a> )</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqMAAAKjCAYAAAApnwZoAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAKjoAMABAAAAAEAAAKjAAAAANgC0G8AAEAASURBVHgB7J0HeFTV1oZXeiD0Kr2pYMEuYsdeEHuv2Hvvvf1iQQWvBbleFRt2vdeKooANGzZsKIo0pbdQ0jP/fnfYk5PJZDJnMpNMMms9TzJn9tn1O2dmvrP2KmkBI6ISVwTKysrkr7/+ks6dO0vLli2j6nvt2rWSnp4uzZo1i6p+uErFxcVSVFQkLVq0kLS0NFulsLBQSkpKJDs7W3JycsI10zJFQBFQBBQBRUARUAQaDIH0Bhu5CQ4MCb399tstGdxoo42kVatWcsIJJ8i6detqXe1WW20lxx9/fK31IlW466677JgQYeSXX36Rbt26yYYbbmjJcaS2ek4RUAQUAUVAEVAEFIGGQCCzIQZtqmM+/PDDctNNN8kRRxwhw4cPl3fffVceeeQRGTBggNx44431uuy5c+fKfvvtZzWkH3zwgZ1DvU5AB1MEFIGICPz666/y9ddf2zpbb721DBw4MGz9Z599VsrLy+1D7uGHHx62TjSF9MHuS0PItGnTZNKkSbJs2TLp37+/HHbYYdK2bduGmIqOqQgoAsmIANv0KvFBoGvXroGOHTsGzJe+7ZDXBx54IDB16lT7ftWqVYGTTz450KNHj8Amm2wSMEQ1OLDRXgYOPfTQWuvttNNOgYsuuiiw8cYbB84666xgew5uvfVWTC4CX375ZcB84Qdat24d+Oabb4J18vPzA6ecckrAmA8Ettxyy8A777xjz5144omB/fffP1jv8ssvD+y6664Bo+kNlumBIqAIxBcBPpt8Xvnzfv68o/BZdnX47MYixgQoYB6S7fdDLO3r2ubll18OGBIcXAfrMQ/Lde1W2ysCikATQqBhHpPNt1FTE0P05J9//pG99toraK+J3aYhjrLjjjva5aINePvtt+XSSy+VbbbZRs477zx5/vnnq0ERqd5vv/0maGCPOuooGTp0aLW2FBxzzDFCPUN6q2hbnnzySXnllVfk3nvvFUNGrQZ3wYIFdn4TJkyQn3/+WbA7/c9//mPbNZQWJeyitFARaGII8B3ArgnC7gVaw1AxRC5YZB4ag8fRHvB5ZozbbrtNCgoKom0W13rjxo2zml3s4TEl4vuH7yYVRUARUAQcAkpGHRJ1fGULDKnJYWnRokV2m+rUU0+1ZBRiaDSX8txzz1UZOZp6EN7/+7//k4MPPrhKW/dm6dKlcuGFF8pPP/0kd955pyuWl156yf4IQJx79eplf5z4ETzuuOOscxPneW80uHLSSScF2+mBIqAIJAYBRzBLS0vl1VdfrTYID4+I2XWRPffcs9r52grod968ebVVS+h5Z8M+aNAgufrqq8XsvCR0PO1cEVAEGh8CSkbjdM3atGljfzC++uqrYI9oIrbffnu55557BOcmBE93JDMzU3Jzc60m0has/xdNPbz0I8mLL74oxjzAjn3HHXdYjSf10YLSPz9OeNhfc8010qdPH2u7ZUwELFnlx8+YAMjgwYMjDaHnFAFFIA4IeJ0W+dx6BTvL2bNn26Jjjz22ir2n2Z2TF154Qc4++2zZfPPNZZdddrG7MOzOOPn999+Fdk5ee+01a6v53XffuSK7MzNs2DD7mcfGnO8qCKxX+K549NFH7a4PjploWvfdd195+umnvdWqHTMOuzxz5syx59it4f1DDz0UrPvHH3/IiBEjrH27MV2yuzWhD+hUvu6662xbdoXQFm+77bZ2Z8iYMQT70gNFQBFoxAg0IZODBl/Kgw8+aO2izJd6wDgdWBtQc2sEzNa8nRt2nob8BcyWeNC+81//+pc957UZjVSvffv2AaNdDbtWZzP6559/2vOff/55wJgKBHbYYQdr/2nMAgK0//777wOPPfZYYMiQIQHzQ2HrMifmaghywEQECNu/FioCikD8Edh5553tZw+7SvPAGBzgqquuCtpZfvvtt8FyDs4555zgOT637s84BQXef/99W/eLL74IlrvzvPJZR/g+8Ja7Y/MgGjAP0rYO/8wuSbBeRkZG8Jj648ePD9YLPXDfKa5f9+q+v3788cdAhw4dqvTn6hxyyCEBQ4KDXTqMtthii4AJUxdsM3369GAdPVAEFIHGi4A03qkn38xx+Ln77rut4xBfqpBKs50enCgk0TzRW4KYlZUVuPjii4NOQl4yGqmeHzLKwDg9MJf7778/YLQsAaOpte9xbrrhhhuCc2Pu3bt3t3OjnooioAjUDwI4MjoSxgOtk759+9ryTTfd1BXZ15tvvjlYf/fddw+Y3YyA2eUImN0ZW77BBhsEli9fHjDmOoHHH388WNdoQO1DstGeBoxne7DchJ8L/PDDD9ah0sQituV8XyCLFy8O1mNuJkydLcPZMi8vL3DAAQcEjF2qrRv6b+HChQGcl5gP6zM2sva9iSAQMOZIgU6dOtly5n3ttdfaczhOOiyMxjTYpSOjnDNRBwJmxydwxhlnBM/rgSKgCDRuBJSMJuj68UNQk6xcuTJggtPXdDpYHm29YIMoD+jXbMVVqc2PFz8a/LipKAKKQP0hwHcFD
|
|||
|
<div class="sourceCode" id="cb45"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb45-1"><a href="#cb45-1" tabindex="-1"></a> <span class="co"># RowSideColors = rev(labels_colors(dend)), # to add nice colored strips </span></span></code></pre></div>
|
|||
|
<p>How much of a difference would we get if we used another clustering
|
|||
|
algorithm?</p>
|
|||
|
<p>We first calculate the clustering using 8 different methods:</p>
|
|||
|
<div class="sourceCode" id="cb46"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" tabindex="-1"></a>hclust_methods <span class="ot"><-</span> <span class="fu">c</span>(<span class="st">"ward.D"</span>, <span class="st">"single"</span>, <span class="st">"complete"</span>, <span class="st">"average"</span>, <span class="st">"mcquitty"</span>, </span>
|
|||
|
<span id="cb46-2"><a href="#cb46-2" tabindex="-1"></a> <span class="st">"median"</span>, <span class="st">"centroid"</span>, <span class="st">"ward.D2"</span>)</span>
|
|||
|
<span id="cb46-3"><a href="#cb46-3" tabindex="-1"></a>votes.repub_dendlist <span class="ot"><-</span> <span class="fu">dendlist</span>()</span>
|
|||
|
<span id="cb46-4"><a href="#cb46-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb46-5"><a href="#cb46-5" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="fu">seq_along</span>(hclust_methods)) {</span>
|
|||
|
<span id="cb46-6"><a href="#cb46-6" tabindex="-1"></a> tmp_dend <span class="ot"><-</span> votes.repub <span class="sc">%>%</span> arcsin_transformation <span class="sc">%>%</span> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> hclust_methods[i]) <span class="sc">%>%</span> as.dendrogram </span>
|
|||
|
<span id="cb46-7"><a href="#cb46-7" tabindex="-1"></a> votes.repub_dendlist <span class="ot"><-</span> <span class="fu">dendlist</span>(votes.repub_dendlist, tmp_dend)</span>
|
|||
|
<span id="cb46-8"><a href="#cb46-8" tabindex="-1"></a>}</span>
|
|||
|
<span id="cb46-9"><a href="#cb46-9" tabindex="-1"></a><span class="fu">names</span>(votes.repub_dendlist) <span class="ot"><-</span> hclust_methods</span>
|
|||
|
<span id="cb46-10"><a href="#cb46-10" tabindex="-1"></a><span class="co"># votes.repub_dendlist</span></span></code></pre></div>
|
|||
|
<p>Next, we can look at the cophenetic correlation between each
|
|||
|
clustering result using <code>cor.dendlist</code>. (This can be nicely
|
|||
|
plotted using the <code>corrplot</code> function from the
|
|||
|
<em>corrplot</em> package):</p>
|
|||
|
<div class="sourceCode" id="cb47"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb47-1"><a href="#cb47-1" tabindex="-1"></a>corrplot<span class="sc">::</span><span class="fu">corrplot</span>(<span class="fu">cor.dendlist</span>(votes.repub_dendlist), <span class="st">"pie"</span>, <span class="st">"lower"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAlgAAAJYCAYAAAC+ZpjcAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAJYoAMABAAAAAEAAAJYAAAAAIxUoKIAAEAASURBVHgB7J0HfBTFF8d/u3u59IRAKAkt9N6rSpNeBAUUUZQiYi9YsCA2VKQJ/gUUBRFFFBALKqACioiAVOkC0jsJCentdvc/e5ESSC53ydXwm8/nvL3dmffefOfwXmbevJF0UcBCAiRAAiRAAiRAAiTgNAKy0yRREAmQAAmQAAmQAAmQgJUAHSx+EUiABEiABEiABEjAyQToYDkZKMWRAAmQAAmQAAmQAB0sfgdIgARIgARIgARIwMkE6GA5GSjFkQAJkAAJkAAJkAAdLH4HSIAESIAESIAESMDJBOhgORkoxZEACZAACZAACZAAHSx+B0iABEiABEiABEjAyQToYDkZKMWRAAmQAAmQAAmQAB0sfgdIgARIgARIgARIwMkE6GA5GSjFkQAJkAAJkAAJkAAdLH4HSIAESIAESIAESMDJBOhgORkoxZEACZAACZAACZAAHSx+B0iABEiABEiABEjAyQToYDkZKMWRAAmQAAmQAAmQgIkIijcB/exZ6Hv2AknJkKrEAPXrQZLpVxfvUWfvSIAESIAEPE2ADpanR8BF+vWEBGiPPAF9wVeAvxkIDwfOxQLVq0Ge8S7krl1cpJliSYAESIAESIAEOJVRTL8D2oBB0I+fgLLpTyhJcTCdPS7eYyEPHwqt7wDo+w8U056zWyRAAiRAAiTgeQKSLornzaAFziSgHzwItVELKMcPQoqIuEa0OmQ4EFMZyuuvXPOMN0iABEiABEiABIpOgDNYRWfofRJOnrI6UHk5V4axUpPGwJGj3mc3LSIBEiABEiCBYkKADlYxGcgru6GnpwNhYVfeyn0dGgIYdVhIgARIgARIgARcQoBB7i7B6gVCMzLyj7M6e84LDKQJJEACJEACJFB8CdDBKq5ju2071FoN8u2ddEe/fJ/xAQmQAAmQAAmQQNEIMMi9aPy8srWuaUBmpm3bFAWSWaRvYCEBEiABEiABEnA6ATpYTkdKgSRAAiRAAiRAAtc7AQa5X4ffAH39Bmi/rb4Oe84ukwAJkAAJkIB7CNDBcg9nr9Ki/fQL9G+WeJVNNIYESIAESIAEihMBLhEWp9FkX0iABEiABEiABLyCAHcResUwONcIXVULznNlMkEKCHCuYkojARIgARIgARKwEuASYTH8IugrV0ENjbz2FV46515YJLThDxbDnrNLJEACJEACJOAdBOhgecc4ONUKqXMnKCnnrS957CuQ+t8GZf3vUNIuQFm3GlKnjpBu6+NUnRRGAiRAAiRAAiRwmQBjsC6zKJZXlqq1oaz9FVJ09KX+WXcRvvw6lJXLL93jBQmQAAmQAAmQgPMIcAbLeSy9U1J0FPR1G3LZpi3/GahWNdc9fiABEiABEiABEnAeAc5gOY+lV0rSfv4F2p33ANWrQapfD/qf6wGLBcrPP0KqWcMrbaZRJEACJEACJODrBOhg+foIFmC/vv8AdEkCRGJR/egxSHXrQOrRDVLJkgW05GMSIAESIAESIIHCEqCDVVhyPtLOUqMe5OlTIXfr6iMW00wSIAESIAES8H0CjMHy/TG02QOp7U3QV6+BXtDhzzal8CEJkAAJkAAJkIAjBOhgOULLF+ump0Of+A7UoBKwlKkAS9mK1pc64mFf7A1tJgESIAESIAGfIMAlQp8YpsIbqe/bn3dW9xLhkGJiCi+YLUmABEiABEiABPIlQAcrXzTF54Eudg0iORnQdUDTchyuc7GQmjUtPp1kT0iABEiABEjAiwhwidCLBsMVpmhfLoRqHJFTMgpqqWiopStAjakJbcEiV6ijTBIgARIgARIgAUGAhz0X86+B9vZEyJPHA2Yz9H/2QR4xHNqjT0Lu37eY95zdIwESIAESIAHPEeAMlufYu0fz2XOQ7hsK6YbW0DdtsSYXlSe9DW3qe+7RTy0kQAIkQAIkcB0SoINV3Ae9UkVg+w6gVk1gx07oGRmAogDxCcW95+wfCZAACZAACXiMAJcIPYbePYrl55+F2qELlH/3WDO4q207AnHnIT/xqHsMoBYSIAESIAESuA4JcBfhdTDo+sGDQKVK1jMI9VlzxHVFSL17QTJmslhIgARIgARIgAScToAOltORepdA9a57gWpVId/WBxBpGSTjXEIWEiABEiABEiABlxJgDJZL8XpeuPzUE4CqQh02Amql6lAfGwlt5Sro2dmeN44WkAAJkAAJkEAxJcAZrGI6sHl1Sz98GNobb0P/dB6kO++A8sVneVXjPRIgARIgARIggSISYJB7EQF6e3PjqBx9xUrov66GvmYtUKE8JDGrJd/R39tNp30kQAIkQAIk4LME6GD57NDZZ7h65yBg7z+Qhg2BsnUDJCPYnYUESIAESIAESMClBLhE6FK8nheuHzkC/Rcxg2W81gkHq24dSJ1uhtTnFkj16nreQFpAAiRAAiRAAsWQAB2sYjio+XVJP3UK2oTJ0GfOgnRrbyiLvsivKu+TAAmQAAmQAAkUgQAdrCLA84Wm+p/roP2wFPrPK4Bjx63JRq2zV927Q
|
|||
|
<p>We see that the “complete” method is somewhat similar to the
|
|||
|
ward.D/ward.D2 methods, but there is less similarity with the other
|
|||
|
methods. We can see that the methods “average”, “mcquitty” and “median”,
|
|||
|
all give somewhat similar results. So by using “average”, we will see an
|
|||
|
alternative presentation that represents (in a sense) three other
|
|||
|
clustering solutions.</p>
|
|||
|
<p>We can look at the heatmap of the “average” method. However, as you
|
|||
|
can see, it is not very helpful in seeing the difference between the two
|
|||
|
clustering solutions.</p>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqMAAAKjCAYAAAApnwZoAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAKjoAMABAAAAAEAAAKjAAAAANgC0G8AAEAASURBVHgB7J0HeFTV1oZXeiD0Kr2p2LCL2PHaReyiWLH33nsFCyp6Lcj1qtiwXfW3oyhgw4YNFVEUaUpvSSA98+93hz2cTCaTmclMMplZ63mS03Y73z4z8521V0nzGRGVmCJQUVEhf/31l3Tu3FlatmwZVttr166V9PR0adasWVjlgxUqLS2VkpISadGihaSlpdkixcXFUlZWJtnZ2ZKTkxOsmp5TBBQBRUARUAQUAUWg0RBIb7Sek7BjSOjtt99uyeAmm2wirVq1khNOOEHWrVtX591uu+22cvzxx9dZLlSBu+66y/YJEUZmzpwp3bp1k4033tiS41B19ZoioAgoAoqAIqAIKAKNgUBmY3SarH0+8sgjctNNN8lRRx0lI0aMkPfee08effRR2WyzzeTGG29s0NueP3++HHDAAVZD+uGHH9oxNOgAtDNFQBEIicCvv/4q33zzjS2z3XbbyYABA4KWf+6556SystK+5B555JFBy4RzkjZYfWkMmT59ukyePFlWrFgh/fv3lyOOOELatm3bGEPRPhUBRSAREWCZXiU2CHTt2tXXsWNHn/nStw2yffDBB33Tpk2zx2vWrPGdfPLJvh49evg233xznyGq/o6N9tJ3+OGH11lu11139V100UW+TTfd1HfWWWf567Nz6623YnLh++qrr3zmC9/XunVr37fffusvk5+f7zvllFN8xnzAt8022/jeffdde+3EE0/0HXjggf5yl19+uW+PPfbwGU2v/5zuKAKKQGwR4LPJ55U/7+fP2wufZVeGz240YkyAfOYl2X4/RFO/vnVeeeUVnyHB/vvgfszLcn2b1fqKgCKQRAg0zmuy+TZKNjFET/755x/ZZ599/Paa2G0a4ii77LKLvV20Ae+8845ceumlsv3228t5550nL7zwQg0oQpX77bffBA3sMcccI0OGDKlRlxPHHnusUM6Q3mralqeeekr+97//yb333iuGjFoN7qJFi+z4Jk6cKL/88otgd/rf//7X1mssLUrQm9KTikCSIcB3AKsmCKsXaA0DxRA5/ynz0ujfD3eHzzN93HbbbVJUVBRutZiWGz9+vNXsYg+PKRHfP3w3qSgCioAi4BBQMuqQqOeWJTCkNoelJUuW2GWqU0891ZJRiKHRXMrzzz9fredwykF477jjDjn00EOr1XUHy5cvlwsvvFB+/vlnufPOO91pefnll+2PAMS5V69e9seJH8Hhw4db5yauc2w0uHLSSSf56+mOIqAIxAcBRzDLy8vl1VdfrdEJL4+IWXWRf/3rXzWu13WCdhcsWFBXsbhedzbsAwcOlKuvvlrMyktc+9PGFQFFoOkhoGQ0RnPWpk0b+4Px9ddf+1tEE7HTTjvJPffcIzg3IXi6I5mZmZKbm2s1kfbE+n/hlMNLP5S89NJLYswDbN8jR460Gk/KowWlfX6c8LC/5pprpE+fPtZ2y5gIWLLKj58xAZBBgwaF6kKvKQKKQAwQ8Dot8rn1CnaWc+fOtaeOO+64avaeZnVOXnzxRTn77LNlq622kt13392uwrA64+T3338X6jl57bXXrK3m999/707ZlZmhQ4fazzw25nxXQWC9wnfFY489Zld9cMxE07r//vvLM8884y1WY59+WOWZN2+evcZqDccPP/ywv+wff/who0aNsvbtxnTJrtYEvqBT+LrrrrN1WRVCW7zDDjvYlSFjxuBvS3cUAUWgCSOQRCYHjX4rDz30kLWLMl/qPuN0YG1AzaPhM0vzdmzYeRry5zNL4n77zn//+9/2mtdmNFS59u3b+4x2Nei9OpvRP//8017/4osvfMZUwLfzzjtb+09jFuCj/g8//OB7/PHHfYMHD/aZHwpbljExVkOQfSYiQND29aQioAjEHoHddtvNfvawqzQvjP4OrrrqKr+d5Xfffec/z84555zjv8bn1v0ZpyDfBx98YMt++eWX/vPuOls+6wjfB97zbt+8iPrMi7Qtwz+zSuIvl5GR4d+n/IQJE/zlAnfcd4pr123d99dPP/3k69ChQ7X2XJnDDjvMZ0iwv0mH0dZbb+0zYer8dWbMmOEvozuKgCLQdBGQpjv0xBs5Dj933323dRziSxVSaZbT/QOFJJo3eksQs7KyfBdffLHfSchLRkOVi4SM0jFOD4zl/vvv9xkti89oau0xzk033HCDf2yMvXv37nZslFNRBBSBhkEAR0ZHwnihddK3b197fosttnCn7Pbmm2/2l99rr718ZjXDZ1Y5fGZ1xp7faKONfCtXrvQZcx3fE0884S9rNKD2JdloT33Gs91/3oSf8/3444/WodLEIrbn+b5Ali5d6i/H2EyYOnsOZ8u8vDzfQQcd5DN2qbZs4L/Fixf7cF5iPNyfsZG1xyaCgM+YI/k6depkzzPua6+91l7DcdJhYTSm/iYdGeWaiTrgMys+vjPOOMN/XXcUAUWgaSOgZDRO88cPQW2yevVqnwlOX9tl//lwy/krhLlDu2Yprlppfrz40eDHTUURUAQaDgG+K
|
|||
|
<p>Let’s look at the tanglegram of the two methods to get a better
|
|||
|
insight into the differences between the two:</p>
|
|||
|
<div class="sourceCode" id="cb48"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" tabindex="-1"></a>dend_com <span class="ot"><-</span> votes.repub <span class="sc">%>%</span> arcsin_transformation <span class="sc">%>%</span></span>
|
|||
|
<span id="cb48-2"><a href="#cb48-2" tabindex="-1"></a> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"com"</span>) <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb48-3"><a href="#cb48-3" tabindex="-1"></a> <span class="fu">rotate</span>(<span class="fu">labels</span>(dend_NA)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb48-4"><a href="#cb48-4" tabindex="-1"></a> <span class="fu">color_branches</span>(<span class="at">k=</span><span class="dv">3</span>) <span class="co"># %>% ladderize</span></span>
|
|||
|
<span id="cb48-5"><a href="#cb48-5" tabindex="-1"></a>dend_ave <span class="ot"><-</span> votes.repub <span class="sc">%>%</span> arcsin_transformation <span class="sc">%>%</span></span>
|
|||
|
<span id="cb48-6"><a href="#cb48-6" tabindex="-1"></a> dist <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"ave"</span>) <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span></span>
|
|||
|
<span id="cb48-7"><a href="#cb48-7" tabindex="-1"></a> <span class="fu">rotate</span>(<span class="fu">labels</span>(dend_NA)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb48-8"><a href="#cb48-8" tabindex="-1"></a> <span class="fu">color_branches</span>(<span class="at">k=</span><span class="dv">3</span>) <span class="co"># %>% ladderize</span></span>
|
|||
|
<span id="cb48-9"><a href="#cb48-9" tabindex="-1"></a></span>
|
|||
|
<span id="cb48-10"><a href="#cb48-10" tabindex="-1"></a><span class="co"># The orders were predefined after using untangle("step2side")</span></span>
|
|||
|
<span id="cb48-11"><a href="#cb48-11" tabindex="-1"></a><span class="co"># They are omitted here to save running time.</span></span>
|
|||
|
<span id="cb48-12"><a href="#cb48-12" tabindex="-1"></a>dend_com <span class="ot"><-</span> <span class="fu">rotate</span>(dend_com, ord1)</span>
|
|||
|
<span id="cb48-13"><a href="#cb48-13" tabindex="-1"></a>dend_ave <span class="ot"><-</span> <span class="fu">rotate</span>(dend_ave, ord2)</span>
|
|||
|
<span id="cb48-14"><a href="#cb48-14" tabindex="-1"></a></span>
|
|||
|
<span id="cb48-15"><a href="#cb48-15" tabindex="-1"></a>dends <span class="ot"><-</span> <span class="fu">dendlist</span>(<span class="at">complete =</span> dend_com, <span class="at">average =</span> dend_ave) <span class="co"># %>% untangle("step2side")</span></span>
|
|||
|
<span id="cb48-16"><a href="#cb48-16" tabindex="-1"></a>dends <span class="sc">%>%</span> <span class="fu">tanglegram</span>(<span class="at">margin_inner =</span> <span class="dv">7</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAHCCAYAAAB8GMlFAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAHCAAAAAOQBvRMAAEAASURBVHgB7J0HeBzF2cdfW726yb03cMMVYwwBTA+EbjoBTA2BmITy0TGGQOgdEgwECCUJJqG3UI3p2MbGvVvu3bJl9eL75jenOa327qQ76U53suZ9npP2dmdnd9/bmf+8vYVHkViyHLAcsBywHLAcaKYcaNlMn9s+tuWA5YDlgOWA5YDmgAVC+yJYDlgOWA5YDjRrDlggbNY/v314ywHLAcsBywELhPYdsBywHLAcsBxo1hywQNisf3778JYDlgOWA5YDFgjtO2A5YDlgOWA50Kw5YIGwWf/89uEtBywHLAcsBywQ2nfAcsBywHLAcqBZc6BZA+FZZ50lI0aMkFdeeSXsl2DBggWydu3asM+zJ1gOWA5YDlgOxBcHmjUQLlmyRObMmSNbt24N+VeprKyUq6++WoYPH26BMGSu2YaWA5YDlgPxy4HE+L21+LyzkpISefLJJ+Pz5uxdWQ5YDlgOWA6EzYGIA+GGDRvkr3/9q6A67NGjhxx99NFyxBFHSHp6uu/mdu7cKe+995589dVXsmvXLhk2bJhcccUVkpOT42vz6quvys8//yynnnqqIIW99tprUlBQIKeddpqcccYZ8uOPP8qLL74o9HXMMcfIxRdf7Dv3z3/+s+Tl5WnJ7YcffpD3339fkpKS5Nxzz9X342sYZOOf//yn/O9//5PCwkIt+f3hD3+Q1q1b6+833XST76wnnnhCZs6cqa/Dzvz8fGHfrFmzpG3btnL44YfLb3/7W197u2E5YDlgORAJDjA3/v3vf5fc3Fw9Nw0YMEDOO+886dOnj3zxxRd6zsvOzpbJkyf7LsecyNwIXXvttdKtWze9HWy+4+DKlSvlqaee0nPzCSecIPfee6906dJFbrjhBunYsaOeo4Pdh+5c/eG6jzzyiMydO1e4z6uuukreffdd3Tfz48iRI3XTmM6fJN2OFH3//feeNm3akMS7xuekk07yXWL79u0eBXw1jtO+Q4cOHs43dM455+g2BxxwgCchIaFG+wkTJniSk5Nr7HvwwQfNqR4FwPqYAuAabVq0aOFRIO1rZ+7j4Ycf9u1TP0yNc7i3nj17etQL4dmyZYvfsbFjx+pz169f7+ndu7ff8fHjx/v6thuWA5YDlgMN5YACP09mZqbfXNO+fXsPxxYvXuw7Nnv2bN/lmPvMfKaEC72/tvmOBkpY0efQtwJAva2EGs/u3bv1tWq7D85XZifffGxwgX569eql+1ICD808sZ4/Rd9FBP7A2KFDh+qHU5KQR0lFnmnTpvl+sNdff11fxYBTv379PDDhgw8+8IwePVqfx76ioiLdzgAhIPjWW295lOToOfTQQ3U7GKpWJp7Nmzd7lISo9+2///6+pzBAyI/0zDPPeH755RePWs3odmqVpH8cGruBUK1SdBtAlmuuXr3ad94FF1zgKSsr83z00Ue+e1ArJY9amenrnn/++b77mD9/vufrr7/2qBWT3qekX9+92Q3LAcsBy4GGcOCuu+7ytGvXznPZZZfpeXHevHkewIl58fnnn9ddjxs3Tn+/5pprfJdCqKAN50N1zXe0MUDIecqx0PPyyy97pkyZwiHdT1338cc//lFfU2n79Jy6bNkyj9Ly6X30aYAw1vNnxIAQiYkH4/Phhx9qRvHnu+++00AEiOzYscPTsmVL3UZ5avraKDWn79yPP/5Y7zdAeNhhh/naTZo0SbdD6jTED8M1kSgNGSA8/fTTzS7PokWLfNfgBYDcQIikSV/HHXecB2DnM336dL0PcNyzZ49HqWd9/Xz77be+/s3KSKkJfOdOnDhRt+VZLFkOWA5YDkSSA8xPq1at0mBipLWHHnpIX+Jf//qXnntYjJeXl3sWLlyovyNYrFu3TrcJZb5zAqFSuQa8/druY/Dgwfq6ynTlOxfgNlhhgDDW82fEbITLly9Xz+YlpS40m+LcnjFjhigw0ccU2PjaKIlQ1MpClNpUFGDJscce6zvWtWtX33ZKSoredu5LS0vT+9SP7WtnNg455BCzqXXTnTp1kk2bNolSH/j2OzdWrFihvyqpT9QL4zwkCshFqUZF/WA19vOFPrFfQpdccon+6C9Vf5TY7/xqty0HLAcsB+rNAXwXbrvtNnnnnXdEAaEok49vvlJoo/vFl0KpM0VpzbS/wzfffKP3H3/88WLmz1DmO+dNDhkyxPlV+0zUdR8mxOzAAw/0nUs/rVq10v4h7IyH+TNiQIhziCF+KJxLIH6oxMRE6d69u/Tt29c0ESUia/BjB+ELGFQhDLBOMkDn3JeRkeH8GnRbrTxqHDNgCegGItPvr371K7n00kv9mqSmpvrtY4c5j22M0EpFzKaPDC98O+yG5YDlgOVAPTlw5plnitK6aSeTu+++WzsLnnjiiYJjoFnAKw2WKIlPlO+ETJ06VZRmS1/t8ssv913VzFuhznemvekglPtgLgSECVMzhAMOTpKGnP3Gav6MWBzhvvvuKwYoXnjhBVSu+jlPPvlk7T2q9NIaCFmlQEqXLQAT7Z577
|
|||
|
<p>We see that the two clusterings give similar results for: “Alabama”,
|
|||
|
“Georgia”, “Louisiana”, “Arkansas”, “Florida”, “Texas”, “South
|
|||
|
Carolina”, “Mississippi”.</p>
|
|||
|
<p>There are also several other sub-trees which are identical between
|
|||
|
the two methods. The biggest difference lies in several “rouge” states
|
|||
|
that are placed differently in the two clustering algorithms. They are:
|
|||
|
Vermont, Michigan, Maine, Hawaii, New Jersey, West Virginia, and
|
|||
|
Oklahoma.</p>
|
|||
|
<p>A better understanding of the data requires a much more in-depth
|
|||
|
historical perspective than is within the scope of the current
|
|||
|
analysis.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="animals---attributes-of-animals" class="section level2">
|
|||
|
<h2>animals - Attributes of Animals</h2>
|
|||
|
<div id="background-3" class="section level3">
|
|||
|
<h3>Background</h3>
|
|||
|
<blockquote>
|
|||
|
<p>This data set considers 6 binary attributes for 20 animals.</p>
|
|||
|
</blockquote>
|
|||
|
<blockquote>
|
|||
|
<p>see Struyf, Hubert & Rousseeuw (1996), in agnes.</p>
|
|||
|
</blockquote>
|
|||
|
<p>Define variables:</p>
|
|||
|
<div class="sourceCode" id="cb49"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb49-1"><a href="#cb49-1" tabindex="-1"></a>animals <span class="ot"><-</span> cluster<span class="sc">::</span>animals</span>
|
|||
|
<span id="cb49-2"><a href="#cb49-2" tabindex="-1"></a></span>
|
|||
|
<span id="cb49-3"><a href="#cb49-3" tabindex="-1"></a><span class="fu">colnames</span>(animals) <span class="ot"><-</span> <span class="fu">c</span>(<span class="st">"warm-blooded"</span>, </span>
|
|||
|
<span id="cb49-4"><a href="#cb49-4" tabindex="-1"></a> <span class="st">"can fly"</span>,</span>
|
|||
|
<span id="cb49-5"><a href="#cb49-5" tabindex="-1"></a> <span class="st">"vertebrate"</span>,</span>
|
|||
|
<span id="cb49-6"><a href="#cb49-6" tabindex="-1"></a> <span class="st">"endangered"</span>,</span>
|
|||
|
<span id="cb49-7"><a href="#cb49-7" tabindex="-1"></a> <span class="st">"live in groups"</span>,</span>
|
|||
|
<span id="cb49-8"><a href="#cb49-8" tabindex="-1"></a> <span class="st">"have hair"</span>)</span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="heatmap-1" class="section level3">
|
|||
|
<h3>Heatmap</h3>
|
|||
|
<p>This is a good example for using a heatmap + colored branches.</p>
|
|||
|
<div class="sourceCode" id="cb50"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb50-1"><a href="#cb50-1" tabindex="-1"></a>dend_r <span class="ot"><-</span> animals <span class="sc">%>%</span> <span class="fu">dist</span>(<span class="at">method =</span> <span class="st">"man"</span>) <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"ward.D"</span>) <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span> ladderize <span class="sc">%>%</span></span>
|
|||
|
<span id="cb50-2"><a href="#cb50-2" tabindex="-1"></a> <span class="fu">color_branches</span>(<span class="at">k=</span><span class="dv">4</span>)</span>
|
|||
|
<span id="cb50-3"><a href="#cb50-3" tabindex="-1"></a></span>
|
|||
|
<span id="cb50-4"><a href="#cb50-4" tabindex="-1"></a>dend_c <span class="ot"><-</span> <span class="fu">t</span>(animals) <span class="sc">%>%</span> <span class="fu">dist</span>(<span class="at">method =</span> <span class="st">"man"</span>) <span class="sc">%>%</span> <span class="fu">hclust</span>(<span class="at">method =</span> <span class="st">"com"</span>) <span class="sc">%>%</span> as.dendrogram <span class="sc">%>%</span> ladderize<span class="sc">%>%</span></span>
|
|||
|
<span id="cb50-5"><a href="#cb50-5" tabindex="-1"></a> <span class="fu">color_branches</span>(<span class="at">k=</span><span class="dv">3</span>)</span>
|
|||
|
<span id="cb50-6"><a href="#cb50-6" tabindex="-1"></a></span>
|
|||
|
<span id="cb50-7"><a href="#cb50-7" tabindex="-1"></a></span>
|
|||
|
<span id="cb50-8"><a href="#cb50-8" tabindex="-1"></a><span class="co"># some_col_func <- function(n) rev(colorspace::heat_hcl(n, c = c(80, 30), l = c(30, 90), power = c(1/5, 1.5)))</span></span>
|
|||
|
<span id="cb50-9"><a href="#cb50-9" tabindex="-1"></a><span class="co"># some_col_func <- colorspace::diverge_hcl</span></span>
|
|||
|
<span id="cb50-10"><a href="#cb50-10" tabindex="-1"></a><span class="co"># some_col_func <- colorspace::sequential_hcl</span></span>
|
|||
|
<span id="cb50-11"><a href="#cb50-11" tabindex="-1"></a>some_col_func <span class="ot"><-</span> <span class="cf">function</span>(n) (colorspace<span class="sc">::</span><span class="fu">diverge_hcl</span>(n, <span class="at">h =</span> <span class="fu">c</span>(<span class="dv">246</span>, <span class="dv">40</span>), <span class="at">c =</span> <span class="dv">96</span>, <span class="at">l =</span> <span class="fu">c</span>(<span class="dv">65</span>, <span class="dv">90</span>)))</span>
|
|||
|
<span id="cb50-12"><a href="#cb50-12" tabindex="-1"></a></span>
|
|||
|
<span id="cb50-13"><a href="#cb50-13" tabindex="-1"></a></span>
|
|||
|
<span id="cb50-14"><a href="#cb50-14" tabindex="-1"></a></span>
|
|||
|
<span id="cb50-15"><a href="#cb50-15" tabindex="-1"></a><span class="co"># par(mar = c(3,3,3,3))</span></span>
|
|||
|
<span id="cb50-16"><a href="#cb50-16" tabindex="-1"></a><span class="co"># library(gplots)</span></span>
|
|||
|
<span id="cb50-17"><a href="#cb50-17" tabindex="-1"></a>gplots<span class="sc">::</span><span class="fu">heatmap.2</span>(<span class="fu">as.matrix</span>(animals<span class="dv">-1</span>), </span>
|
|||
|
<span id="cb50-18"><a href="#cb50-18" tabindex="-1"></a> <span class="at">main =</span> <span class="st">"Attributes of Animals"</span>,</span>
|
|||
|
<span id="cb50-19"><a href="#cb50-19" tabindex="-1"></a> <span class="at">srtCol =</span> <span class="dv">35</span>,</span>
|
|||
|
<span id="cb50-20"><a href="#cb50-20" tabindex="-1"></a> <span class="at">Rowv =</span> dend_r,</span>
|
|||
|
<span id="cb50-21"><a href="#cb50-21" tabindex="-1"></a> <span class="at">Colv =</span> dend_c,</span>
|
|||
|
<span id="cb50-22"><a href="#cb50-22" tabindex="-1"></a> <span class="at">trace=</span><span class="st">"row"</span>, <span class="at">hline =</span> <span class="cn">NA</span>, <span class="at">tracecol =</span> <span class="st">"darkgrey"</span>, </span>
|
|||
|
<span id="cb50-23"><a href="#cb50-23" tabindex="-1"></a> <span class="at">margins =</span><span class="fu">c</span>(<span class="dv">6</span>,<span class="dv">3</span>), </span>
|
|||
|
<span id="cb50-24"><a href="#cb50-24" tabindex="-1"></a> <span class="at">key.xlab =</span> <span class="st">"no / yes"</span>,</span>
|
|||
|
<span id="cb50-25"><a href="#cb50-25" tabindex="-1"></a> <span class="at">denscol =</span> <span class="st">"grey"</span>,</span>
|
|||
|
<span id="cb50-26"><a href="#cb50-26" tabindex="-1"></a> <span class="at">density.info =</span> <span class="st">"density"</span>,</span>
|
|||
|
<span id="cb50-27"><a href="#cb50-27" tabindex="-1"></a> <span class="at">col =</span> some_col_func</span>
|
|||
|
<span id="cb50-28"><a href="#cb50-28" tabindex="-1"></a> )</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqMAAAKjCAYAAAApnwZoAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAKjoAMABAAAAAEAAAKjAAAAANgC0G8AAEAASURBVHgB7N0HgJNF+vjxJ5utLLtLrypVsIAI6NkARVRUigVFwYYNT89esPz1BD31LKeeePKznojoWa8gCoqKCMIBeioqYgPpUha2t2TzzzP4xuxuspvsJtk3yXe8Jcn7zjvvzOfNwbPzvjPj8HiTkCIq4Ha7Ze3atdKxY0fJyckJqeySkhJJSUmRrKyskPIHylRZWSkVFRXSsmVLcTgcJkt5eblUVVVJenq6ZGRkBDqMbQgggAACCCCAQLMJpDTbmRPwxBqE3n333SYY3HfffSU3N1fOOeccKS0tbbC1Bx98sEycOLHBfPVl+POf/2zOqYGwpm+++Ua6du0qvXv3NsFxfceyDwEEEEAAAQQQaA4BgtEIqv/tb3+TP/7xjzJq1CiZM2eOXHHFFfLSSy/JX/7ylwieJbSi1q9fLyNHjjQ9pAsWLJD99tsvtAPJhQACCCCAAAIIxFCAYDSC2Pfff7+0b99eXnvtNRk9erQ8/vjj8te//lWOO+44c5bCwkK54IILZJ999pEDDjhAZsyYEfDs9eU76qij5JprrpG+ffvKZZddFvD4HTt2yAknnCBFRUXy7rvvSv/+/U0+/Txp0iTp1KmTaE/sO++8Y7afd955ctJJJ/nKuvHGG2XYsGFSXV3t28YbBBBAAAEEEEAgGgIEoxFS1QBy8+bNMmLECN/zmvrc5tVXXy1HHHGEOctpp50mc+fOleuuu04GDRpkek5ffvnlOjWoL9+aNWtEe2DPPPNM0wNb52DvhrPOOks039577+0LRDXf3//+d3n99dfloYcekgEDBsi4ceNky5Ytpn7z5s2Tr7/+WvS502eeecYcp8+wkhBAAAEEEEAAgWgKEG1ESNfqRQw2YOmXX36RDz74QC688EITjGpgmJeXJ7Nnz65Rg1DyacD7pz/9ScaOHVvjWOuD9oxeddVV8tVXX8l9991nbZZXX33VBKgaOHfr1k3KyspEb+FPmDDBDG7S/fq5oKBAtLeUhAACCCCAAAIIRFuAYDRCwq1atZIuXbrI8uXLfSVqsHfooYfKAw88IDq4SZOOdNeUmpoqmZmZpifSbPj1j1Dy6Sj9+tIrr7xiHg/Qc99zzz2mx1Pzay+olr9hwwYzwv6WW26RHj16SOvWreXUU081war2nPbp00cOP/zw+k7BPgQQQAABBBBAICICBKMRYdxTyK233ipffPGFnHjiiabHU0fHr1y5Uvr162cCVQ3yZs6cKfPnzzej7rUXdMyYMTVqoAFtQ/k0kK0v6WAlfUTgscceM0HnxRdfbJ7/1Hrl5+fL2WefLb169ZJly5aZ51e1LO2x/fbbb0UfG6BXtD5d9iGAAAIIIIBAJAXqj2oieaYkKEtHz+s0Tvfee68JODWo1NvpJ598smm9DhgaP368GSykAaUORPrDH/5QRybUfHUOrLVBezfPP/98EwDrQKopU6bIihUrzOAlfURAb+XrYCpNxx9/vOy1116yadMmgtFajnxEAAEEEEAAgegJOJj0Pjq4O3fulLZt2wYsXJ/J1MntdSL6+lKo+eorI9A+LVcfF3A6nb7du3btMiP8dZT+woULfdt5gwACCCCAAAIIRFOA2/RR0g0WiOrptFeyoUA0nHzhNkHP7x+IPvzww2Zy/G3btsntt98ebnHkRwABBBBAAAEEGi1Az2ij6RLnwHXr1pmBVwcddBCT4yfOZaUlCCCAAAIIxIUAwWhcXCYqiQACCCCAAAIIJKYAt+kT87rSKgQQQAABBBBAIC4EGE0focukc4rqxPfZ2dk1SqyoqPDNJaoj6HXgUjRTSUmJ6MpJjT2PrsCkddakz5XqXKjWSkzaPi1fy25oeqlotpGyEUAAAQQQQCBxBOgZjdC11CU8A00Uf9ddd0lubq75adGihXnV6ZyKi4sjdOaaxeia8zq/qSZdgemOO+6omaGBT3fffbevvhpYZ2RkyCWXXGKmrNI5VLUtr732WgOliLz00ktm6dEGM5IBAQQQQAABBJJagJ7RGF3+Z599VjQY/e9//yuPPvqoOauuMR/pdP/995sVlbRcnexeg8nGJK2vTv/0n//8R/T9kCFDzHr2oZS1fv16Oeecc8yk+6HkJw8CCCCAAAIIJK8APaPea69ruetk9Drpu65e9Mc//tF8I77++ms58MADZfr06TJw4ECz3wok9Xa19iL27NlTTjnlFLOyUX1fo5EjR5qVjx555BGT/8knn/Tdvp8xY4b07t3brBf///7f//MtHarH6ET148aNE10C9IwzzjA9lC6Xy6xvr3Xdf//95eqrrzaPCOj5n376afnnP/8pWuaqVavk008/ldNPP11efPFF05bVq1ebar733nvms//ypf71P+GEE4yJLmWqSYPo2knXuL/gggvMxPkHHHCAOafm+f3vf2+y6uT/2k4SAggggAACCCAQTIBg1Csze/Zs+eGHH+SZZ56Rww47zLdUpz4H+s0335jtupJS9+7d5aabbhLd/o9//MMErZdffrkceeSRZmWjY
|
|||
|
<p>We see that we have several groups of variables: the “can fly” and
|
|||
|
“endangered” (which usually are both “no”), the “have hair”, and the
|
|||
|
“warm-blooded”, “vertebrate”, and “live in groups”.</p>
|
|||
|
<p>We see that within the animals there are (roughly!) the following 4
|
|||
|
groups:</p>
|
|||
|
<ol style="list-style-type: decimal">
|
|||
|
<li>The cold-blooded non-vertebrates, which are mostly not
|
|||
|
endangered.</li>
|
|||
|
<li>The warm-blooded vertebrates, which live in groups, have hair,
|
|||
|
cannot fly, and mostly are not endangered.</li>
|
|||
|
<li>The cold-blooded vertebrates, without hair, cannot fly, and are not
|
|||
|
endangered.</li>
|
|||
|
<li>The (mostly) warm-blooded vertebrates, without hair, some can fly,
|
|||
|
and some are endangered.</li>
|
|||
|
</ol>
|
|||
|
<p>How much of a difference would we get if we used another clustering
|
|||
|
algorithm?</p>
|
|||
|
<p>We first calculate the clustering using 8 different methods:</p>
|
|||
|
<div class="sourceCode" id="cb51"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb51-1"><a href="#cb51-1" tabindex="-1"></a>hclust_methods <span class="ot"><-</span> <span class="fu">c</span>(<span class="st">"ward.D"</span>, <span class="st">"single"</span>, <span class="st">"complete"</span>, <span class="st">"average"</span>, <span class="st">"mcquitty"</span>, </span>
|
|||
|
<span id="cb51-2"><a href="#cb51-2" tabindex="-1"></a> <span class="st">"median"</span>, <span class="st">"centroid"</span>, <span class="st">"ward.D2"</span>)</span>
|
|||
|
<span id="cb51-3"><a href="#cb51-3" tabindex="-1"></a>animals_dendlist <span class="ot"><-</span> <span class="fu">dendlist</span>()</span>
|
|||
|
<span id="cb51-4"><a href="#cb51-4" tabindex="-1"></a></span>
|
|||
|
<span id="cb51-5"><a href="#cb51-5" tabindex="-1"></a><span class="cf">for</span>(i <span class="cf">in</span> <span class="fu">seq_along</span>(hclust_methods)) {</span>
|
|||
|
<span id="cb51-6"><a href="#cb51-6" tabindex="-1"></a> tmp_dend <span class="ot"><-</span> animals <span class="sc">%>%</span> <span class="fu">dist</span>(<span class="at">method =</span> <span class="st">"man"</span>) <span class="sc">%>%</span> </span>
|
|||
|
<span id="cb51-7"><a href="#cb51-7" tabindex="-1"></a> <span class="fu">hclust</span>(<span class="at">method =</span> hclust_methods[i]) <span class="sc">%>%</span> as.dendrogram </span>
|
|||
|
<span id="cb51-8"><a href="#cb51-8" tabindex="-1"></a> animals_dendlist <span class="ot"><-</span> <span class="fu">dendlist</span>(animals_dendlist, tmp_dend)</span>
|
|||
|
<span id="cb51-9"><a href="#cb51-9" tabindex="-1"></a>}</span>
|
|||
|
<span id="cb51-10"><a href="#cb51-10" tabindex="-1"></a><span class="fu">names</span>(animals_dendlist) <span class="ot"><-</span> hclust_methods</span>
|
|||
|
<span id="cb51-11"><a href="#cb51-11" tabindex="-1"></a><span class="co"># votes.repub_dendlist</span></span></code></pre></div>
|
|||
|
<p>Next, we can look at the cophenetic correlation between each
|
|||
|
clustering result using <code>cor.dendlist</code> (This can be nicely
|
|||
|
plotted using the <code>corrplot</code> function from the
|
|||
|
<em>corrplot</em> package):</p>
|
|||
|
<div class="sourceCode" id="cb52"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" tabindex="-1"></a>cophenetic_cors <span class="ot"><-</span> <span class="fu">cor.dendlist</span>(animals_dendlist)</span>
|
|||
|
<span id="cb52-2"><a href="#cb52-2" tabindex="-1"></a>corrplot<span class="sc">::</span><span class="fu">corrplot</span>(cophenetic_cors, <span class="st">"pie"</span>, <span class="st">"lower"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAlgAAAJYCAYAAAC+ZpjcAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAJYoAMABAAAAAEAAAJYAAAAAIxUoKIAAEAASURBVHgB7J0HfBTFF8d/u3fpjTRCQu+9S5UmvYiFItgoYq/Y/vZeUEBRAf8WQEQRUCzYQAGlKUjvVTqEkpBGem53/7OXf0IC4XKX3N3ehd98Puft7c68efOdw3uZefOepIkCFhIgARIgARIgARIgAacRkJ0miYJIgARIgARIgARIgASsBGhg8YtAAiRAAiRAAiRAAk4mQAPLyUApjgRIgARIgARIgARoYPE7QAIkQAIkQAIkQAJOJkADy8lAKY4ESIAESIAESIAEaGDxO0ACJEACJEACJEACTiZAA8vJQCmOBEiABEiABEiABGhg8TtAAiRAAiRAAiRAAk4mQAPLyUApjgRIgARIgARIgARoYPE7QAIkQAIkQAIkQAJOJkADy8lAKY4ESIAESIAESIAEaGDxO0ACJEACJEACJEACTiZAA8vJQCmOBEiABEiABEiABGhg8TtAAiRAAiRAAiRAAk4mQAPLyUApjgRIgARIgARIgATMRFCxCWhnzkDbvQdIOw+pdi2gWVNIMu3qij3rHB0JkAAJkIDRBGhgGT0DLupfS06Gev/D0OZ/A/j5AmFhwNkEoF5dyNPfg9y3j4t6plgSIAESIAESIAEuZVTQ74B6063Qjp+AacNfMKUlwnzmuHhPgDxuDNQbb4K2/0AFHTmHRQIkQAIkQALGE5A0UYxXgxo4k4B28CCUlu1gOn4QUnj4JaKV0eOAWjVheuXFS57xBgmQAAmQAAmQQPkJcAWr/Aw9T8LJeKsBVZJxpSsrtW4FHDnqeXpTIxIgARIgARKoIARoYFWQiSw6DC0rCwgNLXqr+HVIMKDXYSEBEiABEiABEnAJATq5uwSrBwjNzr68n9WZsx6gIFUgARIgARIggYpLgAZWRZ3bLdugNGx+2dFJw4dc9hkfkAAJkAAJkAAJlI8AndzLx88jW2uqCuTk2NbNZILkK8I3sJAACZAACZAACTidAA0spyOlQBIgARIgARIggSudAJ3cr8BvgLZ2HdQ/V1yBI+eQSYAESIAESMA9BGhguYezR/WiLvkd2neLPEonKkMCJEACJEACFYkAtwgr0mxyLCRAAiRAAiRAAh5BgKcIPWIanKuEpiilx7kymyH5+zu3Y0ojARIgARIgARKwEuAWYQX8ImjLlkMJibr0FRadfy80Cuq4eyrgyDkkEiABEiABEvAMAjSwPGMenKqF1LsXTOnnrC/51RchDb0BprUrYcpMgenvFZB69YR0w3VO7ZPCSIAESIAESIAELhCgD9YFFhXyylKnEUxr/oAUF1c4PuspwhdegWnZ4sJ7vCABEiABEiABEnAeAa5gOY+lZ0qKi4X297piuqmLfwPq1il2jx9IgARIgARIgAScR4ArWM5j6ZGS1N9+hzriNqBeXUjNmkL7ay1gscD028+QGtT3SJ2pFAmQAAmQAAl4OwEaWN4+g6Xor+0/AE2SABFYVDt6DFKTxpAG9IMUEVFKSz4mARIgARIgARIoKwEaWGUl5yXtLPWbQp42BXK/vl6iMdUkARIgARIgAe8nQB8s759DmyOQul4NbcUqaKUlf7YphQ9JgARIgARIgAQcIUADyxFa3lg3KwvaxHegBFaCpXI1WGKqW1/KXfd542ioMwmQAAmQAAl4BQFuEXrFNJVdSW3f/pKjulcKg1SrVtkFsyUJkAAJkAAJkMBlCdDAuiyaivNAE6cGcf48oGmAquYbXGcTILVtU3EGyZGQAAmQAAmQgAcR4BahB02GK1RR5y2AoqfIiYiFEhkHJboalFoNoM7/2hXdUSYJkAAJkAAJkIAgwGTPFfxroE6YCHnyW4CvL7S9+yDfNQ7qA49AHnpjBR85h0cCJEACJEACxhHgCpZx7N3T85mzkO4YA6lTR2gbNlmDi8qTJkCd8oF7+mcvJEACJEACJHAFEqCBVdEnvUZ1YNt2oGEDYPsOaNnZgMkEJCVX9JFzfCRAAiRAAiRgGAFuERqG3j0dy089AaVHH5j+3W2N4K507QkknoP88APuUYC9kAAJkAAJkMAVSICnCK+ASdcOHgRq1LDmINQ+nSWuq0MaPAiSvpLFQgIkQAIkQAIk4HQCNLCcjtSzBCo33w7UrQP5husAEZZB0vMSspAACZAACZAACbiUAH2wXIrXeOHyow8DigJl7F1QatSD8uB4qMuWQ8vLM145akACJEACJEACFZQAV7Aq6MSWNCzt8GGor02A9vkXkEYMh+mrOSVV4z0SIAESIAESIIFyEqCTezkBenpzPVWOtnQZtD9WQFu1BqhWFZJY1ZKHD/V01akfCZAACZAACXgtARpYXjt19imujLgV2LMX0tjRMG1eB0l3dmchARIgARIgARJwKQFuEboUr/HCtSNHoP0uVrD019/CwGrSGFKvayBddy2kpk2MV5AakAAJkAAJkEAFJEADqwJO6uWGpMXHQ317MrSPPoV0/WCYvv7qclV5nwRIgARIgARIoBwEaGCVA543NNX++hvqT79A+20pcOy4NdiodfWqf
|
|||
|
<p>We see that the different methods (other than ward.D and ward.D2),
|
|||
|
all give quite different results. So would the above analysis be
|
|||
|
different if we had used another clustering algorithm?</p>
|
|||
|
<p>For this purpose, we compare the clustering solution of each
|
|||
|
algorithm with one another, when cut to k=4 clusters, using the
|
|||
|
Fowlkes-Mallows Index. This measure is similar to rand (or rand
|
|||
|
adjusted) index, and gives a value of 1 when the two clusters conform,
|
|||
|
and 0 when they do not:</p>
|
|||
|
<div class="sourceCode" id="cb53"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" tabindex="-1"></a>remove_median <span class="ot"><-</span> <span class="fu">dendlist</span>(animals_dendlist, <span class="at">which =</span> <span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">8</span>)[<span class="sc">-</span><span class="dv">6</span>] )</span>
|
|||
|
<span id="cb53-2"><a href="#cb53-2" tabindex="-1"></a>FM_cors <span class="ot"><-</span> <span class="fu">cor.dendlist</span>(remove_median, <span class="at">method =</span> <span class="st">"FM_index"</span>, <span class="at">k =</span> <span class="dv">4</span>)</span>
|
|||
|
<span id="cb53-3"><a href="#cb53-3" tabindex="-1"></a>corrplot<span class="sc">::</span><span class="fu">corrplot</span>(FM_cors, <span class="st">"pie"</span>, <span class="st">"lower"</span>)</span></code></pre></div>
|
|||
|
<p><img role="img" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAHCCAYAAAB8GMlFAAAEDmlDQ1BrQ0dDb2xvclNwYWNlR2VuZXJpY1JHQgAAOI2NVV1oHFUUPpu5syskzoPUpqaSDv41lLRsUtGE2uj+ZbNt3CyTbLRBkMns3Z1pJjPj/KRpKT4UQRDBqOCT4P9bwSchaqvtiy2itFCiBIMo+ND6R6HSFwnruTOzu5O4a73L3PnmnO9+595z7t4LkLgsW5beJQIsGq4t5dPis8fmxMQ6dMF90A190C0rjpUqlSYBG+PCv9rt7yDG3tf2t/f/Z+uuUEcBiN2F2Kw4yiLiZQD+FcWyXYAEQfvICddi+AnEO2ycIOISw7UAVxieD/Cyz5mRMohfRSwoqoz+xNuIB+cj9loEB3Pw2448NaitKSLLRck2q5pOI9O9g/t/tkXda8Tbg0+PszB9FN8DuPaXKnKW4YcQn1Xk3HSIry5ps8UQ/2W5aQnxIwBdu7yFcgrxPsRjVXu8HOh0qao30cArp9SZZxDfg3h1wTzKxu5E/LUxX5wKdX5SnAzmDx4A4OIqLbB69yMesE1pKojLjVdoNsfyiPi45hZmAn3uLWdpOtfQOaVmikEs7ovj8hFWpz7EV6mel0L9Xy23FMYlPYZenAx0yDB1/PX6dledmQjikjkXCxqMJS9WtfFCyH9XtSekEF+2dH+P4tzITduTygGfv58a5VCTH5PtXD7EFZiNyUDBhHnsFTBgE0SQIA9pfFtgo6cKGuhooeilaKH41eDs38Ip+f4At1Rq/sjr6NEwQqb/I/DQqsLvaFUjvAx+eWirddAJZnAj1DFJL0mSg/gcIpPkMBkhoyCSJ8lTZIxk0TpKDjXHliJzZPO50dR5ASNSnzeLvIvod0HG/mdkmOC0z8VKnzcQ2M/Yz2vKldduXjp9bleLu0ZWn7vWc+l0JGcaai10yNrUnXLP/8Jf59ewX+c3Wgz+B34Df+vbVrc16zTMVgp9um9bxEfzPU5kPqUtVWxhs6OiWTVW+gIfywB9uXi7CGcGW/zk98k/kmvJ95IfJn/j3uQ+4c5zn3Kfcd+AyF3gLnJfcl9xH3OfR2rUee80a+6vo7EK5mmXUdyfQlrYLTwoZIU9wsPCZEtP6BWGhAlhL3p2N6sTjRdduwbHsG9kq32sgBepc+xurLPW4T9URpYGJ3ym4+8zA05u44QjST8ZIoVtu3qE7fWmdn5LPdqvgcZz8Ww8BWJ8X3w0PhQ/wnCDGd+LvlHs8dRy6bLLDuKMaZ20tZrqisPJ5ONiCq8yKhYM5cCgKOu66Lsc0aYOtZdo5QCwezI4wm9J/v0X23mlZXOfBjj8Jzv3WrY5D+CsA9D7aMs2gGfjve8ArD6mePZSeCfEYt8CONWDw8FXTxrPqx/r9Vt4biXeANh8vV7/+/16ffMD1N8AuKD/A/8leAvFY9bLAAAAOGVYSWZNTQAqAAAACAABh2kABAAAAAEAAAAaAAAAAAACoAIABAAAAAEAAAHCoAMABAAAAAEAAAHCAAAAAOQBvRMAAEAASURBVHgB7J0HYBTV1sf/M7vpvRNCIEDovYj03lEEkSJgQQREsfvsvZdPfbaHvaEigiCIovQuSq+hhwAhgYQQ0svOzHfuxECAlN3N7GZDztXJzs7cNr+77Nl77ymSRgmcmAATYAJMgAnUUAJyDX1ufmwmwASYABNgAjoBFoT8QWACTIAJMIEaTYAFYY0efn54JsAEmAATYEHInwEmwASYABOo0QRYENbo4eeHZwJMgAkwARaE/BlgAkyACTCBGk2ABWGNHn5+eCbABJgAE2BByJ8BJsAEmAATqNEEWBDW6OHnh2cCTIAJMAEWhPwZYAJMgAkwgRpNgAVhjR5+fngmwASYABNgQcifASbABJgAE6jRBFgQ1ujh54dnAkyACTABFoT8GWACTIAJMIEaTcBco5++Gjy8pqrAkaPQ4vYDXp6QmjSGVLduNeg5d5EJMAEmUD0IsCB04XHSNm+Bcssk4MAhIDgIyM8HsnMgDR0E+YtPINWq5cK9564xASbABKoHAV4addFx0pKSoPTsB2nkDTAd2Qfz2SSYs9JgOrQHMJmg3HCTi/acu8UEmAATqF4EJI5Q75oDpr7+FrS/NsG08OcrOqjl5kKp0wCmNcshtWxxxX2+wASYABNgAtYT4Bmh9aycmlNLPAWpfbtS25S8vADaK9SOJZR6ny8yASbABJiA9QRYEFrPyrk5c3IAf/8y25TEPZoZcmICTIAJMIHKEWBlmcrxc2hpLSUF2kFSlCkladnZkEq5zpeYABNgAkzANgIsCG3j5dTc2mtvQaGjzHTfPWXe4htMgAkwASZgHQFWlrGOk9NzaYWFgMVSfrvu7pBIg5QTE2ACTIAJ2E+A9wjtZ+fQkpKbG4RSTFmHtmAhcOKEQ/vAlTMBJsAEagIBFoTVdJTVd98nbzMHqmnvudtMgAkwAdchwEujrjMW3BMmwASYABOoAgKsLFMF0K1pUhPu1MQ+YXnJk3yPmnkIy0PE95gAE2ACFRHgpdGKCFXRffXu+6D4hV55BIQVXQsMh7ZocRX1jptlAkyACVw9BFgQuuhYyjM/gCnrLEzpp4GO7SG//Ybuc9R07jTk+XOAZk0hde7kor3nbjEBJsAEqg8BFoQuOlaSMI3w8QGEQT0tgcoP3Q+pQQMIjzIyOeKW+veFtvh3F+09d4sJMAEmUH0IsCB09bGKqg3s3Qft/PkLPdXIvlBbvhJo2ODCNT5hAkyACTAB+wiwpoV93JxWSqpdG9JtE/VoE1KPboC3N7QVq/QZodSrp9P6wQ0xASbABK5WAmw+UU1GVlu7Dtqmf4qC817TAdKggZAk9jZaTYaPu8kEmIALE2BB6MKDU9w1bV8cpObNit/yKxNgAkyACRhIgPcIDYTpiKq0HTuhdOsNTVEcUT3XyQSYABOo8QRYELr6R6BOFBAWClC0ek3TXL233D8mwASYQLUjwILQ1YdMBOhVNSg9+kHxDoQlIvrCof76m6v3nvvHBJgAE3B5ArxH6OJDpLtai9tfei9j6kEKDCz9Hl9lAkyACTABqwiwILQKU9Vn0nJzAXGIJPYLU1IBXx9IdesWXeO/TIAJMAEmYBcBXhq1C5tzCykTb6dl0SAoIbWLjvBoKJ17QDty1Lkd4daYABNgAlchARaELj6o2v4D0Jb8CdPurZCuHwrTmmWQl5Kz7VYtIXXt4uK95+4xASbABFyfAAtCFx8jLSkJUs/ukFq2gNShPbTdeyAP6A+pd09orCzj4qPH3WMCTKA6EGBB6OKjJNWPoUj0+6Hl5QGtW0Fbu76oxyYTkJbm4r3n7jEBJsAEXJ8AK8u4/hhBufkW4EwK5AU/QYltXjQzXLcepv27INWpUw2egLvIBJgAE3BdAiwIXXdsLumZduAgpCaNoVFYJm3+L5CuG6ovl16Sid8wASbABJiAzQRYENqMzLkFtKNHod51L6QR10O6gY4o8jTDiQkwASbABAwjwHuEhqF0UEW09ClNnQztny1QOnSB5druUF97U983dFCLXC0TYAJMoEYR4BlhNRpuTVWhrVwF9fGnga3bIc+fo0err0aPwF1lAkyACbgcAQ7M63JDcmmHRNQJjRRjtJWr9QN79kK6thOkN1+F1OXaSzPzOybABJgAE7CZAAtCm5E5ucDReKh9BgG1IyE//gik5UsgeXo6uRPcHBNgAkzg6iXAS6MuPrZiORT/bIa2dDnUZSuAYwmQenSD1K8PpJE3QAoOdvEn4O4xASbABFybAAtC1x6fK3qnbdgI9ZkXoK1aA/mn7yGPHnVFHr7ABJgAE2AC1hNgQWg9qyrJKcIwCbtB7c9l+qwQ/n6Qhl8HmQ6Qr1FJZsXfKhkYbpQJMIGrhgALQhcfSu3IESi3T9EFnxCAwqieExNgAkyACRhHgAWhcSy5JibABJgAE6iGBHhdrRoOGneZCTABJsAEjCPAgtA4llwTE2ACTIAJVEMCbEdo0KClUUiko+QX1M3NzaAanVuNxWJB3bp1ERYW5tyGuTUmwASYQBUTY
|
|||
|
<p>We removed the “median” method since it did not have k=4 possible. In
|
|||
|
general, the results seems sensitive to the algorithm used, and the
|
|||
|
different algorithm methods do not seem to agree with one another (with
|
|||
|
regards to k=4), so further analyses may be in place in order to decide
|
|||
|
on which algorithm and interpretation are most appropriate for these
|
|||
|
data.</p>
|
|||
|
<p>(Other possible data sets for the future: chorSub, flower,
|
|||
|
plantTraits, pluton, ruspini, agriculture)</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<!-- code folding -->
|
|||
|
|
|||
|
|
|||
|
<!-- dynamically load mathjax for compatibility with self-contained -->
|
|||
|
<script>
|
|||
|
(function () {
|
|||
|
var script = document.createElement("script");
|
|||
|
script.type = "text/javascript";
|
|||
|
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
|
|||
|
document.getElementsByTagName("head")[0].appendChild(script);
|
|||
|
})();
|
|||
|
</script>
|
|||
|
|
|||
|
</body>
|
|||
|
</html>
|