155 lines
402 KiB
HTML
155 lines
402 KiB
HTML
|
<!DOCTYPE html>
|
|||
|
|
|||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|||
|
|
|||
|
<head>
|
|||
|
|
|||
|
<meta charset="utf-8" />
|
|||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|||
|
<meta name="generator" content="pandoc" />
|
|||
|
|
|||
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|||
|
|
|||
|
<meta name="author" content="Catherine Hurley" />
|
|||
|
|
|||
|
<meta name="date" content="2019-01-07" />
|
|||
|
|
|||
|
<title>Clustering Graphics</title>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style type="text/css">code{white-space: pre;}</style>
|
|||
|
<style type="text/css">
|
|||
|
div.sourceCode { overflow-x: auto; }
|
|||
|
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
|
|||
|
margin: 0; padding: 0; vertical-align: baseline; border: none; }
|
|||
|
table.sourceCode { width: 100%; line-height: 100%; }
|
|||
|
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
|
|||
|
td.sourceCode { padding-left: 5px; }
|
|||
|
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
|
|||
|
code > span.dt { color: #902000; } /* DataType */
|
|||
|
code > span.dv { color: #40a070; } /* DecVal */
|
|||
|
code > span.bn { color: #40a070; } /* BaseN */
|
|||
|
code > span.fl { color: #40a070; } /* Float */
|
|||
|
code > span.ch { color: #4070a0; } /* Char */
|
|||
|
code > span.st { color: #4070a0; } /* String */
|
|||
|
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
|
|||
|
code > span.ot { color: #007020; } /* Other */
|
|||
|
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
|
|||
|
code > span.fu { color: #06287e; } /* Function */
|
|||
|
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
|
|||
|
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
|
|||
|
code > span.cn { color: #880000; } /* Constant */
|
|||
|
code > span.sc { color: #4070a0; } /* SpecialChar */
|
|||
|
code > span.vs { color: #4070a0; } /* VerbatimString */
|
|||
|
code > span.ss { color: #bb6688; } /* SpecialString */
|
|||
|
code > span.im { } /* Import */
|
|||
|
code > span.va { color: #19177c; } /* Variable */
|
|||
|
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
|
|||
|
code > span.op { color: #666666; } /* Operator */
|
|||
|
code > span.bu { } /* BuiltIn */
|
|||
|
code > span.ex { } /* Extension */
|
|||
|
code > span.pp { color: #bc7a00; } /* Preprocessor */
|
|||
|
code > span.at { color: #7d9029; } /* Attribute */
|
|||
|
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
|
|||
|
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
|
|||
|
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
|
|||
|
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
|
|||
|
</style>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http
|
|||
|
|
|||
|
</head>
|
|||
|
|
|||
|
<body>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<h1 class="title toc-ignore">Clustering Graphics</h1>
|
|||
|
<h4 class="author"><em>Catherine Hurley</em></h4>
|
|||
|
<h4 class="date"><em>2019-01-07</em></h4>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<p>This package will order panels in scatterplot matrices and parallel coordinate displays by some merit index. The package contains various indices of merit, ordering functions, and enhanced versions of pairs and parcoord which color panels according to their merit level. For details on the methods used, consult “Clustering Visualisations of Multidimensional Data”, Journal of Computational and Graphical Statistics, vol. 13, (4), pp 788-806, 2004.</p>
|
|||
|
<div id="displaying-a-correlation-matrix" class="section level2">
|
|||
|
<h2>Displaying a correlation matrix</h2>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(gclus)
|
|||
|
<span class="co">#> Loading required package: cluster</span>
|
|||
|
<span class="kw">data</span>(longley)
|
|||
|
longley.cor <-<span class="st"> </span><span class="kw">cor</span>(longley)
|
|||
|
longley.color <-<span class="st"> </span><span class="kw">dmat.color</span>(longley.cor)</code></pre></div>
|
|||
|
<p><code>dmat.color</code> assigns three colours to the correlations according to the correlation magnitude. High correlations are in pink, the middle third are in blue, and the botom third are in yellow.</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>))
|
|||
|
<span class="kw">plotcolors</span>(longley.color,<span class="dt">dlabels=</span><span class="kw">rownames</span>(longley.color))</code></pre></div>
|
|||
|
<p><img src="
|
|||
|
<p>If you want to change the colour scheme:</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">longley.color <-<span class="st"> </span><span class="kw">dmat.color</span>(longley.cor, <span class="dt">byrank=</span><span class="ot">FALSE</span>)
|
|||
|
longley.color <-<span class="st"> </span><span class="kw">dmat.color</span>(longley.cor, <span class="dt">breaks=</span><span class="kw">c</span>(<span class="op">-</span><span class="dv">1</span>,<span class="dv">0</span>,.<span class="dv">5</span>,.<span class="dv">8</span>,<span class="dv">1</span>),
|
|||
|
<span class="kw">cm.colors</span>(<span class="dv">4</span>))</code></pre></div>
|
|||
|
<p>The plot is easier to interpret if variables are reorded prior to plotting.</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>))
|
|||
|
longley.o <-<span class="st"> </span><span class="kw">order.hclust</span>(longley.cor)
|
|||
|
longley.color1 <-<span class="st"> </span>longley.color[longley.o,longley.o]
|
|||
|
<span class="kw">plotcolors</span>(longley.color1,<span class="dt">dlabels=</span><span class="kw">rownames</span>(longley.color1))</code></pre></div>
|
|||
|
<p><img src="
|
|||
|
</div>
|
|||
|
<div id="displaying-a-pairs-plot-with-coloured-panels" class="section level2">
|
|||
|
<h2>Displaying a pairs plot with coloured panels</h2>
|
|||
|
<p><code>cpairs</code> is a version of <code>pairs</code> All the high-correlation panels appear together in a block.</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>))
|
|||
|
<span class="kw">cpairs</span>(longley, <span class="dt">order=</span> longley.o,<span class="dt">panel.color=</span> longley.color)</code></pre></div>
|
|||
|
<p><img src="
|
|||
|
<p>If the <code>order</code> is not supplied, then the variables are plotted in default dataset order.</p>
|
|||
|
</div>
|
|||
|
<div id="displaying-a-pcp-plot-with-coloured-panels" class="section level2">
|
|||
|
<h2>Displaying a PCP plot with coloured panels</h2>
|
|||
|
<p><code>cparcoord</code> is a versions of <code>`parcoord</code> where panels can be coloured. Again, the pink panels have high correlation, blue panels have middling correlation, and yellow panels have low correlation.</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">cparcoord</span>(longley, <span class="dt">order=</span> longley.o,<span class="dt">panel.color=</span> longley.color,
|
|||
|
<span class="dt">horizontal=</span><span class="ot">TRUE</span>, <span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">2</span>,<span class="dv">4</span>,<span class="dv">1</span>,<span class="dv">1</span>))</code></pre></div>
|
|||
|
<p><img src="
|
|||
|
</div>
|
|||
|
<div id="plotting-re-ordered-dendrograms." class="section level2">
|
|||
|
<h2>Plotting re-ordered dendrograms.</h2>
|
|||
|
<p><code>eurodist</code> is a built-in distance matrix giving the distance between European cities.</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>))
|
|||
|
<span class="kw">data</span>(eurodist)
|
|||
|
dis <-<span class="st"> </span><span class="kw">as.dist</span>(eurodist)
|
|||
|
hc <-<span class="st"> </span><span class="kw">hclust</span>(dis, <span class="st">"ave"</span>)
|
|||
|
<span class="kw">plot</span>(hc)</code></pre></div>
|
|||
|
<p><img src="
|
|||
|
<p><code>order.hclust</code> re-orders a dendrogram to improve the similarity between nearby leaves. Applying it to the <code>hc</code> object:</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">par</span>(<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>,<span class="dv">1</span>))
|
|||
|
hc1 <-<span class="st"> </span><span class="kw">reorder.hclust</span>(hc, dis)
|
|||
|
<span class="kw">plot</span>(hc1)</code></pre></div>
|
|||
|
<p><img src="
|
|||
|
<p>Both dendrograms correspond to the same tree structure, but the second one shows that Paris is closer to Cherbourg than Munich, and Rome is closer to Gibralter than to Barcelona.</p>
|
|||
|
<p>We can also compare both orderings with an image plot of the colors. The second ordering seems to place nearby cities closer to each other.</p>
|
|||
|
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">
|
|||
|
<span class="kw">layout</span>(<span class="kw">matrix</span>(<span class="dv">1</span><span class="op">:</span><span class="dv">2</span>,<span class="dt">nrow=</span><span class="dv">1</span>,<span class="dt">ncol=</span><span class="dv">2</span>))
|
|||
|
<span class="kw">par</span>(<span class="dt">mar=</span><span class="kw">c</span>(<span class="dv">1</span>,<span class="dv">6</span>,<span class="dv">1</span>,<span class="dv">1</span>))
|
|||
|
cmat <-<span class="st"> </span><span class="kw">dmat.color</span>(eurodist, <span class="kw">rev</span>(<span class="kw">cm.colors</span>(<span class="dv">5</span>)))
|
|||
|
<span class="kw">plotcolors</span>(cmat[hc<span class="op">$</span>order,hc<span class="op">$</span>order], <span class="dt">rlabels=</span><span class="kw">labels</span>(eurodist)[hc<span class="op">$</span>order])
|
|||
|
|
|||
|
<span class="kw">plotcolors</span>(cmat[hc1<span class="op">$</span>order,hc1<span class="op">$</span>order], <span class="dt">rlabels=</span><span class="kw">labels</span>(eurodist)[hc1<span class="op">$</span>order])</code></pre></div>
|
|||
|
<p><img src="
|
|||
|
</div>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<!-- dynamically load mathjax for compatibility with self-contained -->
|
|||
|
<script>
|
|||
|
(function () {
|
|||
|
var script = document.createElement("script");
|
|||
|
script.type = "text/javascript";
|
|||
|
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
|
|||
|
document.getElementsByTagName("head")[0].appendChild(script);
|
|||
|
})();
|
|||
|
</script>
|
|||
|
|
|||
|
</body>
|
|||
|
</html>
|