522 lines
244 KiB
HTML
Raw Permalink Normal View History

2025-01-12 00:52:51 +08:00
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Introduction to ggplot2</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">Introduction to ggplot2</h1>
<p>ggplot2 is an R package for producing visualizations of data. Unlike
many graphics packages, ggplot2 uses a conceptual framework based on the
grammar of graphics. This allows you to speak a graph from composable
elements, instead of being limited to a predefined set of charts.</p>
<p>More complete information about how to use ggplot2 can be found in
the <a href="https://ggplot2-book.org/">book</a>, but here youll find a
brief overview of the plot components and some terse examples to build a
plot like this:</p>
<p><img src="
<p>For structure, we go over the 7 composable parts that come together
as a set of instructions on how to draw a chart.</p>
<p><img src="
<p>Out of these components, ggplot2 needs at least the following three
to produce a chart: data, a mapping, and a layer. The scales, facets,
coordinates, and themes have sensible defaults that take away a lot of
finicky work.</p>
<div id="data" class="section level2">
<h2>Data</h2>
<p>As the foundation of every graphic, ggplot2 uses <a href="https://ggplot2-book.org/getting-started.html#fuel-economy-data">data</a>
to construct a plot. The system works best if the data is provided in a
<a href="https://tidyr.tidyverse.org/articles/tidy-data.html">tidy</a>
format, which briefly means a rectangular data frame structure where
rows are observations and columns are variables.</p>
<p>As the first step in many plots, you would pass the data to the
<code>ggplot()</code> function, which stores the data to be used later
by other parts of the plotting system. For example, if we intend to make
a graphic about the <code>mpg</code> dataset, we would start as
follows:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">ggplot</span>(<span class="at">data =</span> mpg)</span></code></pre></div>
</div>
<div id="mapping" class="section level2">
<h2>Mapping</h2>
<p>The <a href="https://ggplot2-book.org/getting-started.html#aesthetics">mapping</a>
of a plot is a set of instructions on how parts of the data are mapped
onto aesthetic attributes of geometric objects. It is the dictionary
to translate tidy data to the graphics system.</p>
<p>A mapping can be made by using the <code>aes()</code> function to
make pairs of graphical attributes and parts of the data. If we want the
<code>cty</code> and <code>hwy</code> columns to map to the x- and
y-coordinates in the plot, we can do that as follows:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a><span class="fu">ggplot</span>(mpg, <span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">x =</span> cty, <span class="at">y =</span> hwy))</span></code></pre></div>
</div>
<div id="layers" class="section level2">
<h2>Layers</h2>
<p>The heart of any graphic is the <a href="https://ggplot2-book.org/toolbox.html">layers</a>. They take the
mapped data and display it in something humans can understand as a
representation of the data. Every layer consists of three important
parts:</p>
<ol style="list-style-type: decimal">
<li>The <a href="https://ggplot2-book.org/individual-geoms.html"><strong>geometry</strong></a>
that determines <em>how</em> data are displayed, such as points, lines,
or rectangles.</li>
<li>The <a href="https://ggplot2-book.org/statistical-summaries.html"><strong>statistical
transformation</strong></a> that may compute new variables from the data
and affect <em>what</em> of the data is displayed.</li>
<li>The <a href="https://ggplot2-book.org/layers.html#position"><strong>position
adjustment</strong></a> that primarily determines <em>where</em> a piece
of data is being displayed.</li>
</ol>
<p>A layer can be constructed using the <code>geom_*()</code> and
<code>stat_*()</code> functions. These functions often determine one of
the three parts of a layer, while the other two can still be specified.
Here is how we can use two layers to display the <code>cty</code> and
<code>hwy</code> columns of the <code>mpg</code> dataset as points and
stack a trend line on top.</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a><span class="fu">ggplot</span>(mpg, <span class="fu">aes</span>(cty, hwy)) <span class="sc">+</span></span>
<span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a> <span class="co"># to create a scatterplot</span></span>
<span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a> <span class="co"># to fit and overlay a loess trendline</span></span>
<span id="cb3-5"><a href="#cb3-5" tabindex="-1"></a> <span class="fu">geom_smooth</span>(<span class="at">formula =</span> y <span class="sc">~</span> x, <span class="at">method =</span> <span class="st">&quot;lm&quot;</span>)</span></code></pre></div>
<p><img src="
</div>
<div id="scales" class="section level2">
<h2>Scales</h2>
<p><a href="https://ggplot2-book.org/scales-guides.html">Scales</a> are
important for translating what is shown on the graph back to an
understanding of the data. The scales typically form pairs with
aesthetic attributes of the plots, and are represented in plots by
guides, like axes or legends. Scales are responsible for updating the
limits of a plot, setting the breaks, formatting the labels, and
possibly applying a transformation.</p>
<p>To use scales, one can use one of the scale functions that are
patterned as <code>scale_{aesthetic}_{type}()</code> functions, where
<code>{aesthetic}</code> is one of the pairings made in the mapping part
of a plot. To map the <code>class</code> column in the <code>mpg</code>
dataset to the viridis colour palette, we can write the following:</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a><span class="fu">ggplot</span>(mpg, <span class="fu">aes</span>(cty, hwy, <span class="at">colour =</span> class)) <span class="sc">+</span></span>
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a> <span class="fu">scale_colour_viridis_d</span>()</span></code></pre></div>
<p><img src="
</div>
<div id="facets" class="section level2">
<h2>Facets</h2>
<p><a href="https://ggplot2-book.org/facet.html">Facets</a> can be used
to separate small multiples, or different subsets of the data. It is a
powerful tool to quickly split up the data into smaller panels, based on
one or more variables, to display patterns or trends (or the lack
thereof) within the subsets.</p>
<p>The facets have their own mapping that can be given as a formula. To
plot subsets of the <code>mpg</code> dataset based on levels of the
<code>drv</code> and <code>year</code> variables, we can use
<code>facet_grid()</code> as follows:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a><span class="fu">ggplot</span>(mpg, <span class="fu">aes</span>(cty, hwy)) <span class="sc">+</span></span>
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a> <span class="fu">facet_grid</span>(year <span class="sc">~</span> drv)</span></code></pre></div>
<p><img src="
</div>
<div id="coordinates" class="section level2">
<h2>Coordinates</h2>
<p>You can view the <a href="https://ggplot2-book.org/coord.html">coordinates</a> part of the
plot as an interpreter of position aesthetics. While typically Cartesian
coordinates are used, the coordinate system powers the display of <a href="https://ggplot2-book.org/maps.html">map</a> projections and <a href="https://ggplot2-book.org/coord.html#polar-coordinates-with-coord_polar">polar</a>
plots.</p>
<p>We can also use coordinates to display a plot with a fixed aspect
ratio so that one unit has the same length in both the x and y
directions. The <code>coord_fixed()</code> function sets this ratio
automatically.</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a><span class="fu">ggplot</span>(mpg, <span class="fu">aes</span>(cty, hwy)) <span class="sc">+</span></span>
<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a> <span class="fu">coord_fixed</span>()</span></code></pre></div>
<p><img src="
</div>
<div id="theme" class="section level2">
<h2>Theme</h2>
<p>The <a href="https://ggplot2-book.org/themes">theme</a> system
controls almost any visuals of the plot that are not controlled by the
data and is therefore important for the look and feel of the plot. You
can use the theme for customizations ranging from changing the location
of the legends to setting the background color of the plot. Many
elements in the theme are hierarchical in that setting the look of the
general axis line affects those of the x and y axes simultaneously.</p>
<p>To tweak the look of the plot, one can use many of the built-in
<code>theme_*()</code> functions and/or detail specific aspects with the
<code>theme()</code> function. The <code>element_*()</code> functions
control the graphical attributes of theme components.</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a><span class="fu">ggplot</span>(mpg, <span class="fu">aes</span>(cty, hwy, <span class="at">colour =</span> class)) <span class="sc">+</span></span>
<span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a> <span class="fu">geom_point</span>() <span class="sc">+</span></span>
<span id="cb7-3"><a href="#cb7-3" tabindex="-1"></a> <span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
<span id="cb7-4"><a href="#cb7-4" tabindex="-1"></a> <span class="fu">theme</span>(</span>
<span id="cb7-5"><a href="#cb7-5" tabindex="-1"></a> <span class="at">legend.position =</span> <span class="st">&quot;top&quot;</span>,</span>
<span id="cb7-6"><a href="#cb7-6" tabindex="-1"></a> <span class="at">axis.line =</span> <span class="fu">element_line</span>(<span class="at">linewidth =</span> <span class="fl">0.75</span>),</span>
<span id="cb7-7"><a href="#cb7-7" tabindex="-1"></a> <span class="at">axis.line.x.bottom =</span> <span class="fu">element_line</span>(<span class="at">colour =</span> <span class="st">&quot;blue&quot;</span>)</span>
<span id="cb7-8"><a href="#cb7-8" tabindex="-1"></a> )</span></code></pre></div>
<p><img src="
</div>
<div id="combining" class="section level2">
<h2>Combining</h2>
<p>As mentioned at the start, you can layer all of the pieces to build a
customized plot of your data, like the one shown at the beginning of
this vignette:</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a><span class="fu">ggplot</span>(mpg, <span class="fu">aes</span>(cty, hwy)) <span class="sc">+</span></span>
<span id="cb8-2"><a href="#cb8-2" tabindex="-1"></a> <span class="fu">geom_point</span>(<span class="at">mapping =</span> <span class="fu">aes</span>(<span class="at">colour =</span> displ)) <span class="sc">+</span></span>
<span id="cb8-3"><a href="#cb8-3" tabindex="-1"></a> <span class="fu">geom_smooth</span>(<span class="at">formula =</span> y <span class="sc">~</span> x, <span class="at">method =</span> <span class="st">&quot;lm&quot;</span>) <span class="sc">+</span></span>
<span id="cb8-4"><a href="#cb8-4" tabindex="-1"></a> <span class="fu">scale_colour_viridis_c</span>() <span class="sc">+</span></span>
<span id="cb8-5"><a href="#cb8-5" tabindex="-1"></a> <span class="fu">facet_grid</span>(year <span class="sc">~</span> drv) <span class="sc">+</span></span>
<span id="cb8-6"><a href="#cb8-6" tabindex="-1"></a> <span class="fu">coord_fixed</span>() <span class="sc">+</span></span>
<span id="cb8-7"><a href="#cb8-7" tabindex="-1"></a> <span class="fu">theme_minimal</span>() <span class="sc">+</span></span>
<span id="cb8-8"><a href="#cb8-8" tabindex="-1"></a> <span class="fu">theme</span>(<span class="at">panel.grid.minor =</span> <span class="fu">element_blank</span>())</span></code></pre></div>
<p><img src="
<p>If you want to learn more, be sure to take a look at the <a href="https://ggplot2-book.org/">ggplot2 book</a>.</p>
</div>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>