1234 lines
1.6 MiB
HTML
1234 lines
1.6 MiB
HTML
|
<!DOCTYPE html>
|
|||
|
|
|||
|
<html>
|
|||
|
|
|||
|
<head>
|
|||
|
|
|||
|
<meta charset="utf-8" />
|
|||
|
<meta name="generator" content="pandoc" />
|
|||
|
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
|
|||
|
|
|||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|||
|
|
|||
|
<meta name="author" content="Joe Cheng (joe@rstudio.com)" />
|
|||
|
|
|||
|
|
|||
|
<title>Case study: converting a Shiny app to async</title>
|
|||
|
|
|||
|
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
|
|||
|
// be compatible with the behavior of Pandoc < 2.8).
|
|||
|
document.addEventListener('DOMContentLoaded', function(e) {
|
|||
|
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
|
|||
|
var i, h, a;
|
|||
|
for (i = 0; i < hs.length; i++) {
|
|||
|
h = hs[i];
|
|||
|
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
|
|||
|
a = h.attributes;
|
|||
|
while (a.length > 0) h.removeAttribute(a[0].name);
|
|||
|
}
|
|||
|
});
|
|||
|
</script>
|
|||
|
|
|||
|
<style type="text/css">
|
|||
|
code{white-space: pre-wrap;}
|
|||
|
span.smallcaps{font-variant: small-caps;}
|
|||
|
span.underline{text-decoration: underline;}
|
|||
|
div.column{display: inline-block; vertical-align: top; width: 50%;}
|
|||
|
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
|||
|
ul.task-list{list-style: none;}
|
|||
|
</style>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style type="text/css">
|
|||
|
code {
|
|||
|
white-space: pre;
|
|||
|
}
|
|||
|
.sourceCode {
|
|||
|
overflow: visible;
|
|||
|
}
|
|||
|
</style>
|
|||
|
<style type="text/css" data-origin="pandoc">
|
|||
|
pre > code.sourceCode { white-space: pre; position: relative; }
|
|||
|
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
|||
|
pre > code.sourceCode > span:empty { height: 1.2em; }
|
|||
|
.sourceCode { overflow: visible; }
|
|||
|
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
|||
|
div.sourceCode { margin: 1em 0; }
|
|||
|
pre.sourceCode { margin: 0; }
|
|||
|
@media screen {
|
|||
|
div.sourceCode { overflow: auto; }
|
|||
|
}
|
|||
|
@media print {
|
|||
|
pre > code.sourceCode { white-space: pre-wrap; }
|
|||
|
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
|||
|
}
|
|||
|
pre.numberSource code
|
|||
|
{ counter-reset: source-line 0; }
|
|||
|
pre.numberSource code > span
|
|||
|
{ position: relative; left: -4em; counter-increment: source-line; }
|
|||
|
pre.numberSource code > span > a:first-child::before
|
|||
|
{ content: counter(source-line);
|
|||
|
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
|||
|
border: none; display: inline-block;
|
|||
|
-webkit-touch-callout: none; -webkit-user-select: none;
|
|||
|
-khtml-user-select: none; -moz-user-select: none;
|
|||
|
-ms-user-select: none; user-select: none;
|
|||
|
padding: 0 4px; width: 4em;
|
|||
|
color: #aaaaaa;
|
|||
|
}
|
|||
|
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
|
|||
|
div.sourceCode
|
|||
|
{ }
|
|||
|
@media screen {
|
|||
|
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
|||
|
}
|
|||
|
code span.al { color: #ff0000; font-weight: bold; }
|
|||
|
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.at { color: #7d9029; }
|
|||
|
code span.bn { color: #40a070; }
|
|||
|
code span.bu { color: #008000; }
|
|||
|
code span.cf { color: #007020; font-weight: bold; }
|
|||
|
code span.ch { color: #4070a0; }
|
|||
|
code span.cn { color: #880000; }
|
|||
|
code span.co { color: #60a0b0; font-style: italic; }
|
|||
|
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.do { color: #ba2121; font-style: italic; }
|
|||
|
code span.dt { color: #902000; }
|
|||
|
code span.dv { color: #40a070; }
|
|||
|
code span.er { color: #ff0000; font-weight: bold; }
|
|||
|
code span.ex { }
|
|||
|
code span.fl { color: #40a070; }
|
|||
|
code span.fu { color: #06287e; }
|
|||
|
code span.im { color: #008000; font-weight: bold; }
|
|||
|
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
code span.kw { color: #007020; font-weight: bold; }
|
|||
|
code span.op { color: #666666; }
|
|||
|
code span.ot { color: #007020; }
|
|||
|
code span.pp { color: #bc7a00; }
|
|||
|
code span.sc { color: #4070a0; }
|
|||
|
code span.ss { color: #bb6688; }
|
|||
|
code span.st { color: #4070a0; }
|
|||
|
code span.va { color: #19177c; }
|
|||
|
code span.vs { color: #4070a0; }
|
|||
|
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
|
|||
|
</style>
|
|||
|
<script>
|
|||
|
// apply pandoc div.sourceCode style to pre.sourceCode instead
|
|||
|
(function() {
|
|||
|
var sheets = document.styleSheets;
|
|||
|
for (var i = 0; i < sheets.length; i++) {
|
|||
|
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
|
|||
|
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
|
|||
|
var j = 0;
|
|||
|
while (j < rules.length) {
|
|||
|
var rule = rules[j];
|
|||
|
// check if there is a div.sourceCode rule
|
|||
|
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
|
|||
|
j++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
var style = rule.style.cssText;
|
|||
|
// check if color or background-color is set
|
|||
|
if (rule.style.color === '' && rule.style.backgroundColor === '') {
|
|||
|
j++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
// replace div.sourceCode by a pre.sourceCode rule
|
|||
|
sheets[i].deleteRule(j);
|
|||
|
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
|
|||
|
}
|
|||
|
}
|
|||
|
})();
|
|||
|
</script>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<style type="text/css">body {
|
|||
|
background-color: #fff;
|
|||
|
margin: 1em auto;
|
|||
|
max-width: 700px;
|
|||
|
overflow: visible;
|
|||
|
padding-left: 2em;
|
|||
|
padding-right: 2em;
|
|||
|
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
|||
|
font-size: 14px;
|
|||
|
line-height: 1.35;
|
|||
|
}
|
|||
|
#TOC {
|
|||
|
clear: both;
|
|||
|
margin: 0 0 10px 10px;
|
|||
|
padding: 4px;
|
|||
|
width: 400px;
|
|||
|
border: 1px solid #CCCCCC;
|
|||
|
border-radius: 5px;
|
|||
|
background-color: #f6f6f6;
|
|||
|
font-size: 13px;
|
|||
|
line-height: 1.3;
|
|||
|
}
|
|||
|
#TOC .toctitle {
|
|||
|
font-weight: bold;
|
|||
|
font-size: 15px;
|
|||
|
margin-left: 5px;
|
|||
|
}
|
|||
|
#TOC ul {
|
|||
|
padding-left: 40px;
|
|||
|
margin-left: -1.5em;
|
|||
|
margin-top: 5px;
|
|||
|
margin-bottom: 5px;
|
|||
|
}
|
|||
|
#TOC ul ul {
|
|||
|
margin-left: -2em;
|
|||
|
}
|
|||
|
#TOC li {
|
|||
|
line-height: 16px;
|
|||
|
}
|
|||
|
table {
|
|||
|
margin: 1em auto;
|
|||
|
border-width: 1px;
|
|||
|
border-color: #DDDDDD;
|
|||
|
border-style: outset;
|
|||
|
border-collapse: collapse;
|
|||
|
}
|
|||
|
table th {
|
|||
|
border-width: 2px;
|
|||
|
padding: 5px;
|
|||
|
border-style: inset;
|
|||
|
}
|
|||
|
table td {
|
|||
|
border-width: 1px;
|
|||
|
border-style: inset;
|
|||
|
line-height: 18px;
|
|||
|
padding: 5px 5px;
|
|||
|
}
|
|||
|
table, table th, table td {
|
|||
|
border-left-style: none;
|
|||
|
border-right-style: none;
|
|||
|
}
|
|||
|
table thead, table tr.even {
|
|||
|
background-color: #f7f7f7;
|
|||
|
}
|
|||
|
p {
|
|||
|
margin: 0.5em 0;
|
|||
|
}
|
|||
|
blockquote {
|
|||
|
background-color: #f6f6f6;
|
|||
|
padding: 0.25em 0.75em;
|
|||
|
}
|
|||
|
hr {
|
|||
|
border-style: solid;
|
|||
|
border: none;
|
|||
|
border-top: 1px solid #777;
|
|||
|
margin: 28px 0;
|
|||
|
}
|
|||
|
dl {
|
|||
|
margin-left: 0;
|
|||
|
}
|
|||
|
dl dd {
|
|||
|
margin-bottom: 13px;
|
|||
|
margin-left: 13px;
|
|||
|
}
|
|||
|
dl dt {
|
|||
|
font-weight: bold;
|
|||
|
}
|
|||
|
ul {
|
|||
|
margin-top: 0;
|
|||
|
}
|
|||
|
ul li {
|
|||
|
list-style: circle outside;
|
|||
|
}
|
|||
|
ul ul {
|
|||
|
margin-bottom: 0;
|
|||
|
}
|
|||
|
pre, code {
|
|||
|
background-color: #f7f7f7;
|
|||
|
border-radius: 3px;
|
|||
|
color: #333;
|
|||
|
white-space: pre-wrap;
|
|||
|
}
|
|||
|
pre {
|
|||
|
border-radius: 3px;
|
|||
|
margin: 5px 0px 10px 0px;
|
|||
|
padding: 10px;
|
|||
|
}
|
|||
|
pre:not([class]) {
|
|||
|
background-color: #f7f7f7;
|
|||
|
}
|
|||
|
code {
|
|||
|
font-family: Consolas, Monaco, 'Courier New', monospace;
|
|||
|
font-size: 85%;
|
|||
|
}
|
|||
|
p > code, li > code {
|
|||
|
padding: 2px 0px;
|
|||
|
}
|
|||
|
div.figure {
|
|||
|
text-align: center;
|
|||
|
}
|
|||
|
img {
|
|||
|
background-color: #FFFFFF;
|
|||
|
padding: 2px;
|
|||
|
border: 1px solid #DDDDDD;
|
|||
|
border-radius: 3px;
|
|||
|
border: 1px solid #CCCCCC;
|
|||
|
margin: 0 5px;
|
|||
|
}
|
|||
|
h1 {
|
|||
|
margin-top: 0;
|
|||
|
font-size: 35px;
|
|||
|
line-height: 40px;
|
|||
|
}
|
|||
|
h2 {
|
|||
|
border-bottom: 4px solid #f7f7f7;
|
|||
|
padding-top: 10px;
|
|||
|
padding-bottom: 2px;
|
|||
|
font-size: 145%;
|
|||
|
}
|
|||
|
h3 {
|
|||
|
border-bottom: 2px solid #f7f7f7;
|
|||
|
padding-top: 10px;
|
|||
|
font-size: 120%;
|
|||
|
}
|
|||
|
h4 {
|
|||
|
border-bottom: 1px solid #f7f7f7;
|
|||
|
margin-left: 8px;
|
|||
|
font-size: 105%;
|
|||
|
}
|
|||
|
h5, h6 {
|
|||
|
border-bottom: 1px solid #ccc;
|
|||
|
font-size: 105%;
|
|||
|
}
|
|||
|
a {
|
|||
|
color: #0033dd;
|
|||
|
text-decoration: none;
|
|||
|
}
|
|||
|
a:hover {
|
|||
|
color: #6666ff; }
|
|||
|
a:visited {
|
|||
|
color: #800080; }
|
|||
|
a:visited:hover {
|
|||
|
color: #BB00BB; }
|
|||
|
a[href^="http:"] {
|
|||
|
text-decoration: underline; }
|
|||
|
a[href^="https:"] {
|
|||
|
text-decoration: underline; }
|
|||
|
|
|||
|
code > span.kw { color: #555; font-weight: bold; }
|
|||
|
code > span.dt { color: #902000; }
|
|||
|
code > span.dv { color: #40a070; }
|
|||
|
code > span.bn { color: #d14; }
|
|||
|
code > span.fl { color: #d14; }
|
|||
|
code > span.ch { color: #d14; }
|
|||
|
code > span.st { color: #d14; }
|
|||
|
code > span.co { color: #888888; font-style: italic; }
|
|||
|
code > span.ot { color: #007020; }
|
|||
|
code > span.al { color: #ff0000; font-weight: bold; }
|
|||
|
code > span.fu { color: #900; font-weight: bold; }
|
|||
|
code > span.er { color: #a61717; background-color: #e3d2d2; }
|
|||
|
</style>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
</head>
|
|||
|
|
|||
|
<body>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<h1 class="title toc-ignore">Case study: converting a Shiny app to
|
|||
|
async</h1>
|
|||
|
<h4 class="author">Joe Cheng (<a href="mailto:joe@rstudio.com" class="email">joe@rstudio.com</a>)</h4>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<p>In this case study, we’ll work through an application of reasonable
|
|||
|
complexity, turning its slowest operations into futures/promises and
|
|||
|
modifying all the downstream reactive expressions and outputs to deal
|
|||
|
with promises.</p>
|
|||
|
<div id="motivation" class="section level2">
|
|||
|
<h2>Motivation</h2>
|
|||
|
<blockquote>
|
|||
|
<p>As a web service increases in popularity, so does the number of rogue
|
|||
|
scripts that abuse it for no apparent reason.</p>
|
|||
|
<p><em>—Cheng’s Law of Why We Can’t Have Nice Things</em></p>
|
|||
|
</blockquote>
|
|||
|
<p>I first noticed this in 2011, when the then-new RStudio IDE was
|
|||
|
starting to gather steam. We had a dashboard that tracked how often
|
|||
|
RStudio was being downloaded, and the numbers were generally tracking
|
|||
|
smoothly upward. But once every few months, we’d have huge spikes in the
|
|||
|
download counts, ten times greater than normal—and invariably, we’d find
|
|||
|
that all of the unexpected increase could be tracked to one or two IP
|
|||
|
addresses.</p>
|
|||
|
<p>For hours or days we’d be inundated with thousands of downloads per
|
|||
|
hour, then just as suddenly, they’d cease. I didn’t know what was
|
|||
|
happening then, and I still don’t know today. Was it the world’s least
|
|||
|
competent denial-of-service attempt? Did someone write a download script
|
|||
|
with an accidental <code>while (TRUE)</code> around it?</p>
|
|||
|
<p>Our application will let us examine downloads from CRAN for this kind
|
|||
|
of behavior. For any given day on CRAN, we’ll see what the top
|
|||
|
downloaders are and how they’re behaving.</p>
|
|||
|
</div>
|
|||
|
<div id="our-source-data" class="section level2">
|
|||
|
<h2>Our source data</h2>
|
|||
|
<p>RStudio maintains the popular <code>0-Cloud</code> CRAN mirror, and
|
|||
|
the log files it generates are freely available at <a href="http://cran-logs.rstudio.com/" class="uri">http://cran-logs.rstudio.com/</a>. Each day is a separate
|
|||
|
gzipped CSV file, and each row is a single package download. For
|
|||
|
privacy, IP addresses are anonymized by substituting each day’s IP
|
|||
|
addresses with unique integer IDs.</p>
|
|||
|
<p>Here are the first few lines of <a href="http://cran-logs.rstudio.com/2018/2018-05-26.csv.gz" class="uri">http://cran-logs.rstudio.com/2018/2018-05-26.csv.gz</a>
|
|||
|
:</p>
|
|||
|
<pre><code>"date","time","size","r_version","r_arch","r_os","package","version","country","ip_id"
|
|||
|
"2018-05-26","20:42:23",450377,"3.4.4","x86_64","linux-gnu","lubridate","1.7.4","NL",1
|
|||
|
"2018-05-26","20:42:30",484348,NA,NA,NA,"homals","0.9-7","GB",2
|
|||
|
"2018-05-26","20:42:21",98484,"3.3.1","x86_64","darwin13.4.0","miniUI","0.1.1.1","NL",1
|
|||
|
"2018-05-26","20:42:27",518,"3.4.4","x86_64","linux-gnu","RCurl","1.95-4.10","US",3</code></pre>
|
|||
|
<p>Fortunately for our purposes, there’s no need to analyze these logs
|
|||
|
at a high level to figure out which days are affected by badly behaved
|
|||
|
download scripts. These CRAN mirrors are popular enough that, according
|
|||
|
to Cheng’s Law, there should be plenty of rogue scripts hitting it every
|
|||
|
day of the year.</p>
|
|||
|
</div>
|
|||
|
<div id="a-tour-of-the-app" class="section level2">
|
|||
|
<h2>A tour of the app</h2>
|
|||
|
<p>The app I built to explore this data, <strong>cranwhales</strong>,
|
|||
|
let us examine the behavior of the top downloaders (“whales”) for any
|
|||
|
given day, at varying levels of detail. You can view this app live at <a href="https://gallery.shinyapps.io/cranwhales/" class="uri">https://gallery.shinyapps.io/cranwhales/</a>, or download
|
|||
|
and run the code yourself at <a href="https://github.com/rstudio/cranwhales" class="uri">https://github.com/rstudio/cranwhales</a>.</p>
|
|||
|
<p>When the app starts, the “All traffic” tab shows you the number of
|
|||
|
package downloads per hour for all users vs. whales. In this screenshot,
|
|||
|
you can see the proportion of files downloaded by the top six
|
|||
|
downloaders on May 28, 2018. It may not look like a huge fraction at
|
|||
|
first, but keep in mind, we are only talking about six downloaders out
|
|||
|
of 52,815 total!</p>
|
|||
|
<p><img src="
|
|||
|
<p>The “Biggest whales” tab simply shows the most prolific downloaders,
|
|||
|
with their number of downloads performed. Each anonymized IP address has
|
|||
|
been assigned an easier-to-remember name, and you can also see the
|
|||
|
country code of the original IP address.</p>
|
|||
|
<p><img src="
|
|||
|
<p>The “Whales by hour” tab shows the hourly download counts for each
|
|||
|
whale individually. In this screenshot, you can see that the
|
|||
|
Netherlands’ <code>relieved_snake</code> downloaded at an extremely
|
|||
|
consistent rate during the whole day, while the American
|
|||
|
<code>curly_capabara</code> was active only during business hours in
|
|||
|
Eastern Standard Time. Still others, like <code>colossal_chicken</code>
|
|||
|
out of Hong Kong, was busy all day but at varying rates.</p>
|
|||
|
<p><img src="
|
|||
|
<p>The “Detail View” has perhaps the most illuminating information. It
|
|||
|
lets you view every download made by a given whale on the day in
|
|||
|
question. The x dimension is time and the y dimension is what package
|
|||
|
they downloaded, so you can see at a glance exactly how many packages
|
|||
|
were downloaded, and how their various package downloads relate to each
|
|||
|
other. In this case, <code>relieved_snake</code> downloaded 104
|
|||
|
different packages, in the same order, continuously, for the entire
|
|||
|
day.</p>
|
|||
|
<p><img src="
|
|||
|
<p>Others behave very differently, like <code>freezing_tapir</code>, who
|
|||
|
downloaded <code>devtools</code>–and <em>only</em>
|
|||
|
<code>devtools</code>–for the whole day, racking up 19,180 downloads
|
|||
|
totalling 7.9 gigabytes for that one package alone!</p>
|
|||
|
<p><img src="
|
|||
|
<p>Sadly, the app can’t tell us any more than that–it can’t explain
|
|||
|
<em>why</em> these downloaders are behaving this way, nor can it tell us
|
|||
|
their street addresses so that we can send ninjas in black RStudio
|
|||
|
helicopters to make them stop.</p>
|
|||
|
</div>
|
|||
|
<div id="the-implementation" class="section level2">
|
|||
|
<h2>The implementation</h2>
|
|||
|
<p>Now that you’ve seen what the app does, let’s talk about how it was
|
|||
|
implemented, then convert it from sync to async.</p>
|
|||
|
<div id="user-interface" class="section level3">
|
|||
|
<h3>User interface</h3>
|
|||
|
<p>The user interface is a pretty typical shinydashboard. It’s important
|
|||
|
to note that the UI part of the app is entirely agnostic to whether the
|
|||
|
server is written in the sync or async style; when we port the app to
|
|||
|
async, we won’t touch the UI at all.</p>
|
|||
|
<p>There are two major pieces of input we need from users: what
|
|||
|
<strong>date</strong> to examine (this app only lets us look at one day
|
|||
|
at a time) and <strong>how many</strong> of the most prolific
|
|||
|
downloaders to look at. We’ll put these two controls in the dashboard
|
|||
|
sidebar.</p>
|
|||
|
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">dashboardSidebar</span>(</span>
|
|||
|
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">dateInput</span>(<span class="st">"date"</span>, <span class="st">"Date"</span>, <span class="at">value =</span> <span class="fu">Sys.Date</span>() <span class="sc">-</span> <span class="dv">2</span>),</span>
|
|||
|
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">numericInput</span>(<span class="st">"count"</span>, <span class="st">"Show top N downloaders:"</span>, <span class="dv">6</span>)</span>
|
|||
|
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
|
|||
|
<p>(We set <code>date</code> to two days ago by default, because there’s
|
|||
|
some lag between when a day ends and when its logs are published.)</p>
|
|||
|
<p>The rest of the UI code is just typical shinydashboard scaffolding,
|
|||
|
plus some <code>shinydashboard::valueBoxOutput</code>s and
|
|||
|
<code>plotOutputs</code>. These are so trivial that they’re hardly worth
|
|||
|
talking about, but I’ll include the code here for completeness. Finally,
|
|||
|
there’s <code>detailViewUI</code>, a <a href="https://shiny.posit.co/r/articles/improve/modules/">Shiny
|
|||
|
module</a> that just contains more of the same (value boxes and
|
|||
|
plots).</p>
|
|||
|
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a> <span class="fu">dashboardBody</span>(</span>
|
|||
|
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="fu">fluidRow</span>(</span>
|
|||
|
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">tabBox</span>(<span class="at">width =</span> <span class="dv">12</span>,</span>
|
|||
|
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">tabPanel</span>(<span class="st">"All traffic"</span>,</span>
|
|||
|
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">fluidRow</span>(</span>
|
|||
|
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">valueBoxOutput</span>(<span class="st">"total_size"</span>, <span class="at">width =</span> <span class="dv">4</span>),</span>
|
|||
|
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">valueBoxOutput</span>(<span class="st">"total_count"</span>, <span class="at">width =</span> <span class="dv">4</span>),</span>
|
|||
|
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">valueBoxOutput</span>(<span class="st">"total_downloaders"</span>, <span class="at">width =</span> <span class="dv">4</span>)</span>
|
|||
|
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> ),</span>
|
|||
|
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">plotOutput</span>(<span class="st">"all_hour"</span>)</span>
|
|||
|
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> ),</span>
|
|||
|
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">tabPanel</span>(<span class="st">"Biggest whales"</span>,</span>
|
|||
|
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">plotOutput</span>(<span class="st">"downloaders"</span>, <span class="at">height =</span> <span class="dv">500</span>)</span>
|
|||
|
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> ),</span>
|
|||
|
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> <span class="fu">tabPanel</span>(<span class="st">"Whales by hour"</span>,</span>
|
|||
|
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> <span class="fu">plotOutput</span>(<span class="st">"downloaders_hour"</span>, <span class="at">height =</span> <span class="dv">500</span>)</span>
|
|||
|
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a> ),</span>
|
|||
|
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">tabPanel</span>(<span class="st">"Detail view"</span>,</span>
|
|||
|
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">detailViewUI</span>(<span class="st">"details"</span>)</span>
|
|||
|
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a> )</span>
|
|||
|
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a> )</span>
|
|||
|
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a> )</span>
|
|||
|
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a> )</span></code></pre></div>
|
|||
|
</div>
|
|||
|
<div id="server-logic" class="section level3">
|
|||
|
<h3>Server logic</h3>
|
|||
|
<p>Based on these inputs and outputs, we’ll write a variety of reactive
|
|||
|
expressions and output renderers to download, manipulate, and visualize
|
|||
|
the relevant log data.</p>
|
|||
|
<p>The reactive expressions:</p>
|
|||
|
<ul>
|
|||
|
<li><code>data</code> (<code>eventReactive</code>): Whenever
|
|||
|
<code>input$date</code> changes, the <code>data</code> reactive
|
|||
|
downloads the full log for that day from <a href="http://cran-logs.rstudio.com" class="uri">http://cran-logs.rstudio.com</a>, and parses it.</li>
|
|||
|
<li><code>whales</code> (<code>reactive</code>): Reads from
|
|||
|
<code>data()</code>, tallies the number of downloads performed by each
|
|||
|
unique IP, and returns a data frame of the top <code>input$count</code>
|
|||
|
most prolific downloaders, along with their download counts.</li>
|
|||
|
<li><code>whale_downloads</code> (<code>reactive</code>): Joins the
|
|||
|
<code>data()</code> and <code>whales()</code> data frames, to return all
|
|||
|
of the details of the cetacean downloads.</li>
|
|||
|
</ul>
|
|||
|
<p>The <code>whales</code> reactive expression depends on
|
|||
|
<code>data</code>, and <code>whale_downloads</code> depends on
|
|||
|
<code>data</code> and <code>whales</code>.</p>
|
|||
|
<p><img src="
|
|||
|
<p>The outputs in this app are mostly either <code>renderPlot</code>s
|
|||
|
that we populate with <code>ggplot2</code>, or
|
|||
|
<code>shinydashboard::renderValueBox</code>es. They all rely on one or
|
|||
|
more of the reactive expressions we just described. We won’t catalog
|
|||
|
them all here, as they’re not individually interesting, but we will look
|
|||
|
at some archetypes below.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="improving-performance-and-scalability" class="section level2">
|
|||
|
<h2>Improving performance and scalability</h2>
|
|||
|
<p>While this article is specifically about async, this is a good time
|
|||
|
to remind you that there are lots of ways to improve the performance of
|
|||
|
a Shiny app. Async is just one tool in the toolbox, and before reaching
|
|||
|
for that hammer, take a moment to consider your other options:</p>
|
|||
|
<ol style="list-style-type: decimal">
|
|||
|
<li>Have I used <a href="https://profvis.r-lib.org/">profvis</a> to
|
|||
|
<strong>profile my code</strong> and determine what’s actually taking so
|
|||
|
long? (Human intuition is a notoriously bad profiler!)</li>
|
|||
|
<li>Can I perform any <strong>calculations, summarizations, and
|
|||
|
aggregations offline</strong>, when my Shiny app isn’t even running, and
|
|||
|
save the results to .rds files to be read by the app?</li>
|
|||
|
<li>Are there any opportunities to <strong>cache</strong>–that is, save
|
|||
|
the results of my calculations and use them if I get the same request
|
|||
|
later? (See <a href="https://cran.r-project.org/package=memoise">memoise</a>, or roll
|
|||
|
your own.)</li>
|
|||
|
<li>Am I effectively leveraging <a href="https://posit.co/resources/videos/reactivity-pt-1-joe-cheng/">reactive
|
|||
|
programming</a> to make sure my reactives are doing as little work as
|
|||
|
possible?</li>
|
|||
|
<li>When deploying my app, am I load balancing across multiple R
|
|||
|
processes and/or multiple servers? (<a href="https://docs.posit.co/shiny-server/">Shiny Server Pro</a>, <a href="https://docs.posit.co/connect/admin/appendix/configuration/">RStudio
|
|||
|
Connect</a>, <a href="https://shiny.posit.co/r/articles/improve/scaling-and-tuning/">ShinyApps.io</a>)</li>
|
|||
|
</ol>
|
|||
|
<p>These options are more generally useful than using async techniques
|
|||
|
because they can dramatically speed up the performance of an app even if
|
|||
|
only a single user is using it. While it obviously depends on the
|
|||
|
particulars of the app itself, a few lines of precomputation or caching
|
|||
|
logic can often lead to 10X-100X better performance. Async, on the other
|
|||
|
hand, generally doesn’t help make a single session faster. Instead, it
|
|||
|
helps a single Shiny process support more concurrent sessions without
|
|||
|
getting bogged down.</p>
|
|||
|
<p>Async can be an essential tool when there is no way around performing
|
|||
|
expensive tasks (i.e. taking multiple seconds) while the user waits. For
|
|||
|
example, an app that analyzes any user-specified Twitter profile may get
|
|||
|
too many unique queries (assuming most people specify their own Twitter
|
|||
|
handle) for caching to be much help. And applications that invite users
|
|||
|
to upload their own datasets won’t have an opportunity to do any offline
|
|||
|
summarizing in advance. If you need to run apps like that and support
|
|||
|
lots of concurrent users, async can be a huge help.</p>
|
|||
|
<p>In that sense, the cranwhales app isn’t a perfect example, because it
|
|||
|
has lots of opportunities for precomputation and caching that we’ll
|
|||
|
willfully ignore today so that I can better illustrate the points I want
|
|||
|
to make about async. When you’re working on your own app, though, please
|
|||
|
think carefully about <em>all</em> of the different techniques you have
|
|||
|
for improving performance.</p>
|
|||
|
</div>
|
|||
|
<div id="converting-to-async" class="section level2">
|
|||
|
<h2>Converting to async</h2>
|
|||
|
<p>To quote the article <a href="https://rstudio.github.io/promises/articles/shiny.html"><em>Using
|
|||
|
promises with Shiny</em></a>, async programming with Shiny boils down to
|
|||
|
following a few steps:</p>
|
|||
|
<ol style="list-style-type: decimal">
|
|||
|
<li>Identify slow operations in your app.</li>
|
|||
|
<li>Convert the slow operations into futures.</li>
|
|||
|
<li>Any code that relies on the result of those operations (if any),
|
|||
|
whether directly or indirectly, now must be converted to promise
|
|||
|
handlers that operate on the future object.</li>
|
|||
|
</ol>
|
|||
|
<p>In this case, the slow operations are easy to identify: the
|
|||
|
downloading and parsing that takes place in the <code>data</code>
|
|||
|
reactive expression can each take several long seconds.</p>
|
|||
|
<p>Converting the download and parsing operations into futures turns out
|
|||
|
to be the most complicated part of the process, for reasons we’ll get
|
|||
|
into later.</p>
|
|||
|
<p>Assuming we do that successfully, the <code>data</code> reactive
|
|||
|
expression will no longer return a data frame, but a
|
|||
|
<code>promise</code> object (that resolves to a data frame). Since the
|
|||
|
<code>whales</code> and <code>whale_downloads</code> reactive
|
|||
|
expressions both rely on <code>data</code>, those will both also need to
|
|||
|
be converted to read and return <code>promise</code> objects. And
|
|||
|
therefore, because the outputs all rely on one or more reactive
|
|||
|
expressions, they will all need to know how to deal with
|
|||
|
<code>promise</code> objects.</p>
|
|||
|
<p>Async code is infectious like that; once you turn the heart of your
|
|||
|
app into a promise, everything downstream must become promise-aware as
|
|||
|
well, all the way through to the observers and outputs.</p>
|
|||
|
<p>With that overview out of the way, let’s dive into the code.</p>
|
|||
|
<p>In the sections below, we’ll take a look at the code behind some
|
|||
|
outputs and reactive expressions. For each element, we’ll look first at
|
|||
|
the sync version, then the async version.</p>
|
|||
|
<p>In some cases, these code snippets may be slightly abridged. See the
|
|||
|
<a href="https://github.com/rstudio/cranwhales">GitHub repository</a>
|
|||
|
for the full code.</p>
|
|||
|
<p>Until you’ve received an introduction to the <code>%...>%</code>
|
|||
|
operator, the async code below will make no sense, so if you haven’t
|
|||
|
read <a href="https://rstudio.github.io/promises/articles/intro.html"><em>An
|
|||
|
informal intro to async programming</em></a> and/or <a href="https://rstudio.github.io/promises/articles/overview.html"><em>Working
|
|||
|
with promises in R</em></a>, I highly recommend doing so before
|
|||
|
continuing!</p>
|
|||
|
<div id="loading-promises-and-future" class="section level3">
|
|||
|
<h3>Loading <code>promises</code> and <code>future</code></h3>
|
|||
|
<p>The first thing we’ll do is load the basic libraries of async
|
|||
|
programming.</p>
|
|||
|
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(promises)</span>
|
|||
|
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(future)</span>
|
|||
|
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="fu">plan</span>(multisession)</span></code></pre></div>
|
|||
|
<p>I originally used <code>multiprocess</code> but file downloading
|
|||
|
inside a future seemed to fail on Mac. (I’ve found that it’s usually not
|
|||
|
worth spending a lot of time trying to figure out why
|
|||
|
<code>multiprocess</code> doesn’t work for some specific code; instead,
|
|||
|
just use <code>multisession</code>, since that’s probably going to be
|
|||
|
the solution anyway.)</p>
|
|||
|
</div>
|
|||
|
<div id="the-data-reactive-future_promise-all-the-things" class="section level3">
|
|||
|
<h3>The <code>data</code> reactive: future_promise() all the things</h3>
|
|||
|
<p>The next thing we’ll do is convert the <code>data</code> event
|
|||
|
reactive to use <code>future</code> for the expensive bits. The original
|
|||
|
code looks lke this:</p>
|
|||
|
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SYNCHRONOUS version</span></span>
|
|||
|
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>data <span class="ot"><-</span> <span class="fu">eventReactive</span>(input<span class="sc">$</span>date, {</span>
|
|||
|
<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a> date <span class="ot"><-</span> input<span class="sc">$</span>date <span class="co"># Example: 2018-05-28</span></span>
|
|||
|
<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a> year <span class="ot"><-</span> lubridate<span class="sc">::</span><span class="fu">year</span>(date) <span class="co"># Example: "2018"</span></span>
|
|||
|
<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a> url <span class="ot"><-</span> <span class="fu">glue</span>(<span class="st">"http://cran-logs.rstudio.com/{year}/{date}.csv.gz"</span>)</span>
|
|||
|
<span id="cb5-8"><a href="#cb5-8" aria-hidden="true" tabindex="-1"></a> path <span class="ot"><-</span> <span class="fu">file.path</span>(<span class="st">"data_cache"</span>, <span class="fu">paste0</span>(date, <span class="st">".csv.gz"</span>))</span>
|
|||
|
<span id="cb5-9"><a href="#cb5-9" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-10"><a href="#cb5-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">withProgress</span>(<span class="at">value =</span> <span class="cn">NULL</span>, {</span>
|
|||
|
<span id="cb5-11"><a href="#cb5-11" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-12"><a href="#cb5-12" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="sc">!</span><span class="fu">file.exists</span>(path)) {</span>
|
|||
|
<span id="cb5-13"><a href="#cb5-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">setProgress</span>(<span class="at">message =</span> <span class="st">"Downloading data..."</span>)</span>
|
|||
|
<span id="cb5-14"><a href="#cb5-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">download.file</span>(url, path)</span>
|
|||
|
<span id="cb5-15"><a href="#cb5-15" aria-hidden="true" tabindex="-1"></a> }</span>
|
|||
|
<span id="cb5-16"><a href="#cb5-16" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-17"><a href="#cb5-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">setProgress</span>(<span class="at">message =</span> <span class="st">"Parsing data..."</span>)</span>
|
|||
|
<span id="cb5-18"><a href="#cb5-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">read_csv</span>(path, <span class="at">col_types =</span> <span class="st">"Dti---c-ci"</span>, <span class="at">progress =</span> <span class="cn">FALSE</span>)</span>
|
|||
|
<span id="cb5-19"><a href="#cb5-19" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb5-20"><a href="#cb5-20" aria-hidden="true" tabindex="-1"></a> })</span>
|
|||
|
<span id="cb5-21"><a href="#cb5-21" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>(Earlier, I said we wouldn’t take advantage of precomputation or
|
|||
|
caching. That wasn’t entirely true; in the code above, we cache the log
|
|||
|
files we download in a <code>data_cache</code> directory. I couldn’t
|
|||
|
bring myself to put my internet connection through that level of abuse,
|
|||
|
as I knew I’d be running this code thousands of times as I load tested
|
|||
|
it.)</p>
|
|||
|
<p>For now, we’ll lose the
|
|||
|
<code>withProgress</code>/<code>setProgress</code> reporting, since
|
|||
|
doing that correctly requires some more advanced techniques that we’ll
|
|||
|
talk about later. We’ll come back and fix this code later, but for
|
|||
|
now:</p>
|
|||
|
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ASYNCHRONOUS version</span></span>
|
|||
|
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>data <span class="ot"><-</span> <span class="fu">eventReactive</span>(input<span class="sc">$</span>date, {</span>
|
|||
|
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a> date <span class="ot"><-</span> input<span class="sc">$</span>date</span>
|
|||
|
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a> year <span class="ot"><-</span> lubridate<span class="sc">::</span><span class="fu">year</span>(date)</span>
|
|||
|
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a> url <span class="ot"><-</span> <span class="fu">glue</span>(<span class="st">"http://cran-logs.rstudio.com/{year}/{date}.csv.gz"</span>)</span>
|
|||
|
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> path <span class="ot"><-</span> <span class="fu">file.path</span>(<span class="st">"data_cache"</span>, <span class="fu">paste0</span>(date, <span class="st">".csv.gz"</span>))</span>
|
|||
|
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">future_promise</span>({</span>
|
|||
|
<span id="cb6-11"><a href="#cb6-11" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="sc">!</span><span class="fu">file.exists</span>(path)) {</span>
|
|||
|
<span id="cb6-12"><a href="#cb6-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">download.file</span>(url, path)</span>
|
|||
|
<span id="cb6-13"><a href="#cb6-13" aria-hidden="true" tabindex="-1"></a> }</span>
|
|||
|
<span id="cb6-14"><a href="#cb6-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">read_csv</span>(path, <span class="at">col_types =</span> <span class="st">"Dti---c-ci"</span>, <span class="at">progress =</span> <span class="cn">FALSE</span>)</span>
|
|||
|
<span id="cb6-15"><a href="#cb6-15" aria-hidden="true" tabindex="-1"></a> })</span>
|
|||
|
<span id="cb6-16"><a href="#cb6-16" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>Pretty straightforward. This reactive now returns a future (which
|
|||
|
counts as a promise), not a data frame.</p>
|
|||
|
<p>Remember that we <strong>must</strong> read any reactive values
|
|||
|
(including <code>input</code>) and reactive expressions <a href="https://rstudio.github.io/promises/articles/shiny.html#shiny-specific-caveats-and-limitations">from
|
|||
|
<strong>outside</strong> the future</a>. (You will get an error if you
|
|||
|
attempt to read one from inside the future.)</p>
|
|||
|
<p>At this point, since there are no other long-running operations we
|
|||
|
want to make asynchronous, we’re actually done interacting directly with
|
|||
|
the <code>future</code> package. The rest of the reactive expressions
|
|||
|
will deal with the future returned by <code>data</code> using general
|
|||
|
async functions and operators from <code>promises</code>.</p>
|
|||
|
</div>
|
|||
|
<div id="the-whales-reactive-simple-pipelines-are-simple" class="section level3">
|
|||
|
<h3>The <code>whales</code> reactive: simple pipelines are simple</h3>
|
|||
|
<p>The <code>whales</code> reactive takes the data frame from
|
|||
|
<code>data</code>, and uses dplyr to find the top
|
|||
|
<code>input$count</code> most prolific downloaders.</p>
|
|||
|
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SYNCHRONOUS version</span></span>
|
|||
|
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>whales <span class="ot"><-</span> <span class="fu">reactive</span>({</span>
|
|||
|
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">data</span>() <span class="sc">%>%</span></span>
|
|||
|
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">count</span>(ip_id) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">arrange</span>(<span class="fu">desc</span>(n)) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">head</span>(input<span class="sc">$</span>count)</span>
|
|||
|
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>Since <code>data()</code> now returns a promise, the whole function
|
|||
|
needs to be modified to deal with promises.</p>
|
|||
|
<p>This is basically a best-case scenario for working with
|
|||
|
<code>promises</code>. The whole expression consists of a single
|
|||
|
magrittr pipeline. There’s only one object (<code>data()</code>) that’s
|
|||
|
been converted to a promise. The promise object only appears once, at
|
|||
|
the head of the pipeline.</p>
|
|||
|
<p>When the stars align like this, converting this code to async is
|
|||
|
literally as easy as replacing each <code>%>%</code> with
|
|||
|
<code>%...>%</code>:</p>
|
|||
|
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ASYNCHRONOUS version</span></span>
|
|||
|
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>whales <span class="ot"><-</span> <span class="fu">reactive</span>({</span>
|
|||
|
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">data</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb8-5"><a href="#cb8-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">count</span>(ip_id) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb8-6"><a href="#cb8-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">arrange</span>(<span class="fu">desc</span>(n)) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb8-7"><a href="#cb8-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">head</span>(input<span class="sc">$</span>count)</span>
|
|||
|
<span id="cb8-8"><a href="#cb8-8" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>The input (<code>data()</code>) is a promise, the resulting output
|
|||
|
object is a promise, each stage of the pipeline returns a promise; but
|
|||
|
we can read and write this code almost as easily as the synchronous
|
|||
|
version!</p>
|
|||
|
<p>An example this simple may seem reductive, but this best-case
|
|||
|
scenario happens surprisingly often, if your coding style is influenced
|
|||
|
by the tidyverse. In this example app, <strong>59%</strong> of the
|
|||
|
reactives, observers, and outputs were converted using nothing more than
|
|||
|
replacing <code>%>%</code> with <code>%...>%</code>.</p>
|
|||
|
<p>One last thing before we move on. In the last section, I emphasized
|
|||
|
that reactive values cannot be read from inside a future. Here, we’re
|
|||
|
using <code>head(input$count)</code> inside a promise-pipeline; since
|
|||
|
<code>data()</code> is written using a future, doesn’t that mean… well…
|
|||
|
isn’t this wrong?</p>
|
|||
|
<p>Nope—this code is just fine. The prohibition is against reading
|
|||
|
reactive values/expressions from <em>inside</em> a future, because code
|
|||
|
inside a future is executed in a totally different R process. The steps
|
|||
|
in a promise-pipeline aren’t futures, but promise handlers. These aren’t
|
|||
|
executed in a different process; rather, they’re executed back in the
|
|||
|
original R process after a promise is resolved. We’re allowed and
|
|||
|
expected to access reactive values and expressions from these
|
|||
|
handlers.</p>
|
|||
|
</div>
|
|||
|
<div id="the-whale_downloads-reactive-reading-from-multiple-promises" class="section level3">
|
|||
|
<h3>The <code>whale_downloads</code> reactive: reading from multiple
|
|||
|
promises</h3>
|
|||
|
<p>The <code>whale_downloads</code> reactive is a bit more complicated
|
|||
|
case.</p>
|
|||
|
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SYNCHRONOUS version</span></span>
|
|||
|
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>whale_downloads <span class="ot"><-</span> <span class="fu">reactive</span>({</span>
|
|||
|
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">data</span>() <span class="sc">%>%</span></span>
|
|||
|
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">inner_join</span>(<span class="fu">whales</span>(), <span class="st">"ip_id"</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>n)</span>
|
|||
|
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>Looks simple, but we can’t just do a simple replacement this time.
|
|||
|
Can you see why?</p>
|
|||
|
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co"># BAD VERSION DOESN'T WORK</span></span>
|
|||
|
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>whale_downloads <span class="ot"><-</span> <span class="fu">reactive</span>({</span>
|
|||
|
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">data</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">inner_join</span>(<span class="fu">whales</span>(), <span class="st">"ip_id"</span>) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>n)</span>
|
|||
|
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>Remember, both <code>data()</code> and <code>whales()</code> now
|
|||
|
return a promise object, not a data frame. None of the dplyr verbs know
|
|||
|
how to deal with promises natively (and the same is true for almost
|
|||
|
every other R function, anywhere in the R universe).</p>
|
|||
|
<p>We’re able to use <code>%...>%</code> with promises on the
|
|||
|
left-hand side and regular dplyr calls on the right-hand side, only
|
|||
|
because the <code>%...>%</code> operator “unwraps” the promise object
|
|||
|
for us, yielding a regular object (data frame or whatever) to be passed
|
|||
|
to dplyr. But in this case, we’re passing <code>whales()</code>, which a
|
|||
|
promise object, directly to <code>inner_join</code>, and
|
|||
|
<code>inner_join</code> has no idea what to do with it.</p>
|
|||
|
<p>The fundamental thing to pattern-match on here, is that <strong>we
|
|||
|
have a block of code that relies on more than one promise
|
|||
|
object</strong>, and that means <code>%...>%</code> won’t be enough.
|
|||
|
This is a pretty common situation as well, and occurs in
|
|||
|
<strong>12%</strong> of reactives and outputs in this example app.</p>
|
|||
|
<p>Here’s what the real solution looks like:</p>
|
|||
|
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ASYNCHRONOUS version</span></span>
|
|||
|
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>whale_downloads <span class="ot"><-</span> <span class="fu">reactive</span>({</span>
|
|||
|
<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">promise_all</span>(<span class="at">data_df =</span> <span class="fu">data</span>(), <span class="at">whales_df =</span> <span class="fu">whales</span>()) <span class="sc">%...>%</span> <span class="fu">with</span>({</span>
|
|||
|
<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a> data_df <span class="sc">%>%</span></span>
|
|||
|
<span id="cb11-6"><a href="#cb11-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">inner_join</span>(whales_df, <span class="st">"ip_id"</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb11-7"><a href="#cb11-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">select</span>(<span class="sc">-</span>n)</span>
|
|||
|
<span id="cb11-8"><a href="#cb11-8" aria-hidden="true" tabindex="-1"></a> })</span>
|
|||
|
<span id="cb11-9"><a href="#cb11-9" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<div id="promises-the-gathering" class="section level4">
|
|||
|
<h4>Promises: the Gathering</h4>
|
|||
|
<p>This solution uses the <a href="https://rstudio.github.io/promises/articles/combining.html#gathering">promise
|
|||
|
gathering</a> pattern, which combines <code>promises_all</code>,
|
|||
|
<code>%...>%</code>, and <code>with</code>.</p>
|
|||
|
<ul>
|
|||
|
<li>The <code>promise_all</code> function gathers multiple promise
|
|||
|
objects together, and returns a single promise object. This new promise
|
|||
|
object doesn’t resolve until all the input promise objects are resolved,
|
|||
|
and it yields a list of those results.</li>
|
|||
|
</ul>
|
|||
|
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="sc">></span> <span class="fu">promise_all</span>(<span class="at">a =</span> <span class="fu">future_promise</span>(<span class="st">"Hello"</span>), <span class="at">b =</span> <span class="fu">future_promise</span>(<span class="st">"World"</span>)) <span class="sc">%...>%</span> <span class="fu">print</span>()</span>
|
|||
|
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="sc">$</span>a</span>
|
|||
|
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>[<span class="dv">1</span>] <span class="st">"Hello"</span></span>
|
|||
|
<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="sc">$</span>b</span>
|
|||
|
<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a>[<span class="dv">1</span>] <span class="st">"World"</span></span></code></pre></div>
|
|||
|
<ul>
|
|||
|
<li>The <code>%...>%</code>, as before, “unwraps” the promise object
|
|||
|
and passes the result to its right hand side.</li>
|
|||
|
<li>The <code>with</code> function (from base R) takes a named list, and
|
|||
|
makes it into a sort of virtual parent environment while evaluating a
|
|||
|
code block you pass it.</li>
|
|||
|
</ul>
|
|||
|
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="sc">></span> x <span class="sc">+</span> y</span>
|
|||
|
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>Error<span class="sc">:</span> object <span class="st">'x'</span> not found</span>
|
|||
|
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a><span class="sc">></span> <span class="fu">with</span>(<span class="fu">list</span>(<span class="at">x =</span> <span class="dv">1</span>, <span class="at">y =</span> <span class="dv">2</span>), {</span>
|
|||
|
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a><span class="sc">+</span> x <span class="sc">+</span> y</span>
|
|||
|
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a><span class="sc">+</span> })</span>
|
|||
|
<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a>[<span class="dv">1</span>] <span class="dv">3</span></span></code></pre></div>
|
|||
|
<p>Let’s once again combine the three, with the simplest possible
|
|||
|
example of the gathering pattern:</p>
|
|||
|
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="sc">></span> <span class="fu">promise_all</span>(<span class="at">x =</span> <span class="fu">future_promise</span>(<span class="st">"Hello"</span>), <span class="at">y =</span> <span class="fu">future_promise</span>(<span class="st">"World"</span>)) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="sc">+</span> <span class="fu">with</span>({ <span class="fu">paste</span>(x, y) }) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="sc">+</span> <span class="fu">print</span>()</span>
|
|||
|
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a>[<span class="dv">1</span>] <span class="st">"Hello World"</span></span></code></pre></div>
|
|||
|
<p>You can make use of this pattern without remembering exactly how
|
|||
|
these pieces combine. Just remember that the arguments to
|
|||
|
<code>promise_all</code> provide the promise objects
|
|||
|
(<code>future_promise(1)</code> and <code>future_promise(2)</code>),
|
|||
|
along with the names you want to use to refer to their yielded values
|
|||
|
(<code>x</code> and <code>y</code>); and the code block you put in
|
|||
|
<code>with()</code> can refer to those names without worrying about the
|
|||
|
fact that they were ever promises to begin with.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="the-total_downloaders-value-box-simple-pipelines-are-for-output-too" class="section level3">
|
|||
|
<h3>The <code>total_downloaders</code> value box: simple pipelines are
|
|||
|
for output, too</h3>
|
|||
|
<p><img src="
|
|||
|
<p>All of the value boxes in this app ended up looking a lot like
|
|||
|
this:</p>
|
|||
|
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SYNCHRONOUS version</span></span>
|
|||
|
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>output<span class="sc">$</span>total_downloaders <span class="ot"><-</span> <span class="fu">renderValueBox</span>({</span>
|
|||
|
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">data</span>() <span class="sc">%>%</span></span>
|
|||
|
<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">pull</span>(ip_id) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">unique</span>() <span class="sc">%>%</span></span>
|
|||
|
<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">length</span>() <span class="sc">%>%</span></span>
|
|||
|
<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">format</span>(<span class="at">big.mark =</span> <span class="st">","</span>) <span class="sc">%>%</span></span>
|
|||
|
<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">valueBox</span>(<span class="st">"unique downloaders"</span>)</span>
|
|||
|
<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>This is structurally no different than the <code>whales</code>
|
|||
|
best-case scenario reactive. One thing worth pointing out is that an
|
|||
|
async <code>renderValueBox</code> means you return a promise that
|
|||
|
returns a <code>valueBox</code>; you <em>don’t</em> return a
|
|||
|
<code>valueBox</code> to whom you have passed a promise.</p>
|
|||
|
<p>Meaning, you <em>don’t</em> do this:</p>
|
|||
|
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># BAD VERSION DOESN'T WORK</span></span>
|
|||
|
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>output<span class="sc">$</span>total_downloaders <span class="ot"><-</span> <span class="fu">renderValueBox</span>({</span>
|
|||
|
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">valueBox</span>(</span>
|
|||
|
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">data</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">pull</span>(ip_id) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">unique</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">length</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">format</span>(<span class="at">big.mark =</span> <span class="st">","</span>),</span>
|
|||
|
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a> <span class="st">"unique downloaders"</span></span>
|
|||
|
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a> )</span>
|
|||
|
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>Instead, you do this:</p>
|
|||
|
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ASYNCHRONOUS version</span></span>
|
|||
|
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>output<span class="sc">$</span>total_downloaders <span class="ot"><-</span> <span class="fu">renderValueBox</span>({</span>
|
|||
|
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">data</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">pull</span>(ip_id) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">unique</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">length</span>() <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">format</span>(<span class="at">big.mark =</span> <span class="st">","</span>) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a> <span class="fu">valueBox</span>(<span class="st">"unique downloaders"</span>)</span>
|
|||
|
<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>The other trick worth nothing is the <code>pull</code> verb, which is
|
|||
|
used to retrieve a specific column of a data frame as a vector (similar
|
|||
|
to <code>$</code> or <code>[[</code>). In this case,
|
|||
|
<code>pull(data, ip_id)</code> is equivalent to
|
|||
|
<code>data[["ip_id"]]</code>. Note that <code>pull</code> is part of
|
|||
|
dplyr and isn’t specific to promises.</p>
|
|||
|
</div>
|
|||
|
<div id="the-biggest_whales-plot-getting-untidy" class="section level3">
|
|||
|
<h3>The <code>biggest_whales</code> plot: getting untidy</h3>
|
|||
|
<p>In a cruel twist of API design fate, one of the cornerstone packages
|
|||
|
of the tidyverse lacks a tidy API. I’m referring, of course, to
|
|||
|
<code>ggplot2</code>:</p>
|
|||
|
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SYNCHRONOUS version</span></span>
|
|||
|
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>output<span class="sc">$</span>downloaders <span class="ot"><-</span> <span class="fu">renderPlot</span>({</span>
|
|||
|
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">whales</span>() <span class="sc">%>%</span></span>
|
|||
|
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(<span class="fu">aes</span>(ip_name, n)) <span class="sc">+</span></span>
|
|||
|
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_bar</span>(<span class="at">stat =</span> <span class="st">"identity"</span>) <span class="sc">+</span></span>
|
|||
|
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">ylab</span>(<span class="st">"Downloads on this day"</span>)</span>
|
|||
|
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>While <code>dplyr</code> and other tidyverse packages are designed to
|
|||
|
link calls together with <code>%>%</code>, the older
|
|||
|
<code>ggplot2</code> package uses the <code>+</code> operator. This is
|
|||
|
mostly a small aesthetic wart when synchronous code, but it’s a real
|
|||
|
problem with async, because the <code>promises</code> package doesn’t
|
|||
|
currently have a promise-aware replacement for <code>+</code> like it
|
|||
|
does for <code>%>%</code>.</p>
|
|||
|
<p>Fortunately, there’s a pretty good escape hatch for
|
|||
|
<code>%>%</code>, and <code>%...>%</code> inherited it too.
|
|||
|
Instead of a pipeline stage being a simple function call, you can put a
|
|||
|
<code>{</code> and <code>}</code> delimited code block, and inside of
|
|||
|
that code block, you can access the “it” value using a period
|
|||
|
(<code>.</code>).</p>
|
|||
|
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ASYNCHRONOUS version</span></span>
|
|||
|
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>output<span class="sc">$</span>downloaders <span class="ot"><-</span> <span class="fu">renderPlot</span>({</span>
|
|||
|
<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">whales</span>() <span class="sc">%...>%</span> {</span>
|
|||
|
<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a> whale_df <span class="ot"><-</span> .</span>
|
|||
|
<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">ggplot</span>(whale_df, <span class="fu">aes</span>(ip_name, n)) <span class="sc">+</span></span>
|
|||
|
<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">geom_bar</span>(<span class="at">stat =</span> <span class="st">"identity"</span>) <span class="sc">+</span></span>
|
|||
|
<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a> <span class="fu">ylab</span>(<span class="st">"Downloads on this day"</span>)</span>
|
|||
|
<span id="cb19-9"><a href="#cb19-9" aria-hidden="true" tabindex="-1"></a> }</span>
|
|||
|
<span id="cb19-10"><a href="#cb19-10" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p><strong>The importance of this pattern cannot be overstated!</strong>
|
|||
|
Using <code>%...>%</code> and simple calls alone, you’re restricted
|
|||
|
to doing pipeline-compatible operations. But <code>%...>%</code>
|
|||
|
together with a curly-brace code block means your handler code can be
|
|||
|
any shape or size. Once inside that code block, you have a regular,
|
|||
|
non-promise value in <code>.</code> (if you even want to use
|
|||
|
it—sometimes you don’t, as we’ll see later). You can have zero, one, or
|
|||
|
more statements. You can use the <code>.</code> multiple times, in
|
|||
|
nested expressions, whatever.</p>
|
|||
|
<p>Tip: if you have extensive or complex code to put in a code block,
|
|||
|
start the block by creating a properly named variable to store the value
|
|||
|
of <code>.</code>. The reason for this is that <code>.</code> may
|
|||
|
acquire a different meaning than you intend as you add code to the code
|
|||
|
block. For example, if a magrittr pipeline starts with <code>.</code>,
|
|||
|
instead of evaluating the pipeline and returning a value, it creates a
|
|||
|
function that takes a single argument. So the following code wouldn’t
|
|||
|
filter the resolved value of <code>whales()</code>, but instead, create
|
|||
|
an anonymous function that calls <code>filter(n > 1000)</code> on
|
|||
|
whatever you pass it.</p>
|
|||
|
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="fu">whales</span>() <span class="sc">%...>%</span> {</span>
|
|||
|
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a> . <span class="sc">%>%</span> <span class="fu">filter</span>(n <span class="sc">></span> <span class="dv">1000</span>)</span>
|
|||
|
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div>
|
|||
|
<p>This fixes it:</p>
|
|||
|
<div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">whales</span>() <span class="sc">%...>%</span> {</span>
|
|||
|
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a> whales_df <span class="ot"><-</span> .</span>
|
|||
|
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a> whales_df <span class="sc">%>%</span> <span class="fu">filter</span>(n <span class="sc">></span> <span class="dv">1000</span>)</span>
|
|||
|
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div>
|
|||
|
<p>There are other ways to work around the above problem as well, but I
|
|||
|
like this fix because it doesn’t require any thought or care. Just give
|
|||
|
the <code>.</code> value a new name, and forget the <code>.</code>
|
|||
|
exists.</p>
|
|||
|
<p>For untidy code with a single promise object, just remember: pair a
|
|||
|
single <code>%...>%</code> with a code block and you should be able
|
|||
|
to do almost anything.</p>
|
|||
|
</div>
|
|||
|
<div id="revisiting-the-data-reactive-progress-support" class="section level3">
|
|||
|
<h3>Revisiting the <code>data</code> reactive: progress support</h3>
|
|||
|
<p>Now that we have discussed a few techniques for writing async code,
|
|||
|
let’s come back to our original <code>data</code> event reactive, and
|
|||
|
this time do a more faithful async conversion that preserves the
|
|||
|
progress reporting functionality of the original.</p>
|
|||
|
<p>Again, here’s the original sync code:</p>
|
|||
|
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SYNCHRONOUS version</span></span>
|
|||
|
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>data <span class="ot"><-</span> <span class="fu">eventReactive</span>(input<span class="sc">$</span>date, {</span>
|
|||
|
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a> date <span class="ot"><-</span> input<span class="sc">$</span>date <span class="co"># Example: 2018-05-28</span></span>
|
|||
|
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a> year <span class="ot"><-</span> lubridate<span class="sc">::</span><span class="fu">year</span>(date) <span class="co"># Example: "2018"</span></span>
|
|||
|
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a> url <span class="ot"><-</span> <span class="fu">glue</span>(<span class="st">"http://cran-logs.rstudio.com/{year}/{date}.csv.gz"</span>)</span>
|
|||
|
<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a> path <span class="ot"><-</span> <span class="fu">file.path</span>(<span class="st">"data_cache"</span>, <span class="fu">paste0</span>(date, <span class="st">".csv.gz"</span>))</span>
|
|||
|
<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">withProgress</span>(<span class="at">value =</span> <span class="cn">NULL</span>, {</span>
|
|||
|
<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb22-12"><a href="#cb22-12" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="sc">!</span><span class="fu">file.exists</span>(path)) {</span>
|
|||
|
<span id="cb22-13"><a href="#cb22-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">setProgress</span>(<span class="at">message =</span> <span class="st">"Downloading data..."</span>)</span>
|
|||
|
<span id="cb22-14"><a href="#cb22-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">download.file</span>(url, path)</span>
|
|||
|
<span id="cb22-15"><a href="#cb22-15" aria-hidden="true" tabindex="-1"></a> }</span>
|
|||
|
<span id="cb22-16"><a href="#cb22-16" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb22-17"><a href="#cb22-17" aria-hidden="true" tabindex="-1"></a> <span class="fu">setProgress</span>(<span class="at">message =</span> <span class="st">"Parsing data..."</span>)</span>
|
|||
|
<span id="cb22-18"><a href="#cb22-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">read_csv</span>(path, <span class="at">col_types =</span> <span class="st">"Dti---c-ci"</span>, <span class="at">progress =</span> <span class="cn">FALSE</span>)</span>
|
|||
|
<span id="cb22-19"><a href="#cb22-19" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb22-20"><a href="#cb22-20" aria-hidden="true" tabindex="-1"></a> })</span>
|
|||
|
<span id="cb22-21"><a href="#cb22-21" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>Progress reporting currently presents two challenges for future.</p>
|
|||
|
<p>First, the <code>withProgress({...})</code> function cannot be used
|
|||
|
with async. <code>withProgress</code> is designed to wrap a slow
|
|||
|
synchronous action, and dismisses its progress dialog when the block of
|
|||
|
code it wraps is done executing. Since the call to
|
|||
|
<code>future_promise()</code> will return immediately even though the
|
|||
|
actual task is far from done, using <code>withProgress</code> won’t
|
|||
|
work; the progress dialog would be dismissed before the download even
|
|||
|
got going.</p>
|
|||
|
<p>It’s conceivable that <code>withProgress</code> could gain promise
|
|||
|
compatibility someday, but it’s not in Shiny v1.1.0. In the meantime, we
|
|||
|
can work around this by using the alternative, <a href="https://shiny.posit.co/r/reference/shiny/latest/progress.html">object-oriented
|
|||
|
progress API</a> that Shiny offers. It’s a bit more verbose and fiddly
|
|||
|
than <code>withProgress</code>/<code>setProgress</code>, but it is
|
|||
|
flexible enough to work with futures/promises.</p>
|
|||
|
<p>Second, progress messages can’t be sent from futures. This is simply
|
|||
|
because futures are executed in child processes, which don’t have direct
|
|||
|
access to the browser like the main Shiny process does.</p>
|
|||
|
<p>It’s conceivable that <code>future</code> could gain the ability for
|
|||
|
child processes to communicate back to their parents, but no good
|
|||
|
solution exists at the time of this writing. In the meantime, we can
|
|||
|
work around this by taking the one future that does both downloading and
|
|||
|
parsing, and splitting it into two separate futures. After the download
|
|||
|
future has completed, we can send a progress message that parsing is
|
|||
|
beginning, and then start the parsing future.</p>
|
|||
|
<p>The regrettably complicated solution is below.</p>
|
|||
|
<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ASYNCHRONOUS version</span></span>
|
|||
|
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>data <span class="ot"><-</span> <span class="fu">eventReactive</span>(input<span class="sc">$</span>date, {</span>
|
|||
|
<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a> date <span class="ot"><-</span> input<span class="sc">$</span>date</span>
|
|||
|
<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a> year <span class="ot"><-</span> lubridate<span class="sc">::</span><span class="fu">year</span>(date)</span>
|
|||
|
<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a> url <span class="ot"><-</span> <span class="fu">glue</span>(<span class="st">"http://cran-logs.rstudio.com/{year}/{date}.csv.gz"</span>)</span>
|
|||
|
<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a> path <span class="ot"><-</span> <span class="fu">file.path</span>(<span class="st">"data_cache"</span>, <span class="fu">paste0</span>(date, <span class="st">".csv.gz"</span>))</span>
|
|||
|
<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a></span>
|
|||
|
<span id="cb23-10"><a href="#cb23-10" aria-hidden="true" tabindex="-1"></a> p <span class="ot"><-</span> Progress<span class="sc">$</span><span class="fu">new</span>()</span>
|
|||
|
<span id="cb23-11"><a href="#cb23-11" aria-hidden="true" tabindex="-1"></a> p<span class="sc">$</span><span class="fu">set</span>(<span class="at">value =</span> <span class="cn">NULL</span>, <span class="at">message =</span> <span class="st">"Downloading data..."</span>)</span>
|
|||
|
<span id="cb23-12"><a href="#cb23-12" aria-hidden="true" tabindex="-1"></a> <span class="fu">future_promise</span>({</span>
|
|||
|
<span id="cb23-13"><a href="#cb23-13" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> (<span class="sc">!</span><span class="fu">file.exists</span>(path)) {</span>
|
|||
|
<span id="cb23-14"><a href="#cb23-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">download.file</span>(url, path)</span>
|
|||
|
<span id="cb23-15"><a href="#cb23-15" aria-hidden="true" tabindex="-1"></a> }</span>
|
|||
|
<span id="cb23-16"><a href="#cb23-16" aria-hidden="true" tabindex="-1"></a> }) <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb23-17"><a href="#cb23-17" aria-hidden="true" tabindex="-1"></a> { p<span class="sc">$</span><span class="fu">set</span>(<span class="at">message =</span> <span class="st">"Parsing data..."</span>) } <span class="sc">%...>%</span></span>
|
|||
|
<span id="cb23-18"><a href="#cb23-18" aria-hidden="true" tabindex="-1"></a> { <span class="fu">future_promise</span>(<span class="fu">read_csv</span>(path, <span class="at">col_types =</span> <span class="st">"Dti---c-ci"</span>, <span class="at">progress =</span> <span class="cn">FALSE</span>)) } <span class="sc">%>%</span></span>
|
|||
|
<span id="cb23-19"><a href="#cb23-19" aria-hidden="true" tabindex="-1"></a> <span class="fu">finally</span>(<span class="sc">~</span>p<span class="sc">$</span><span class="fu">close</span>())</span>
|
|||
|
<span id="cb23-20"><a href="#cb23-20" aria-hidden="true" tabindex="-1"></a>})</span></code></pre></div>
|
|||
|
<p>The single future we wrote earlier has now become a pipeline of
|
|||
|
promises:</p>
|
|||
|
<ol style="list-style-type: decimal">
|
|||
|
<li>future (download)</li>
|
|||
|
<li>send progress message</li>
|
|||
|
<li>future (parse)</li>
|
|||
|
<li>dismiss progress dialog</li>
|
|||
|
</ol>
|
|||
|
<p>Note that neither the R6 call <code>p$set(message = ...)</code> nor
|
|||
|
the second <code>future_promise()</code> call are tidy, so they use
|
|||
|
curly-brace blocks, as discussed in the above section about
|
|||
|
<code>biggest_whales</code>.</p>
|
|||
|
<p>The final step of dismissing the progress dialog doesn’t use
|
|||
|
<code>%...>%</code> at all; because we want the progress dialog to
|
|||
|
dismiss whether the download and parse operations succeed or fail, we
|
|||
|
use the regular pipe <code>%>%</code> and <code>finally()</code>
|
|||
|
function instead. See the relevant section in <a href="https://rstudio.github.io/promises/articles/overview.html#cleaning-up-with-finally"><em>Working
|
|||
|
with promises in R</em></a> to learn more.</p>
|
|||
|
<p>With these changes in place, we’ve now covered all of the changes to
|
|||
|
the application. You can see the full changes side-by-side via <a href="https://github.com/rstudio/cranwhales/compare/sync...async?diff=split">this
|
|||
|
GitHub diff</a>.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="measuring-scalability" class="section level2">
|
|||
|
<h2>Measuring scalability</h2>
|
|||
|
<p>It was a fair amount of work to do the sync-to-async conversion. Now
|
|||
|
we’d like to know if the conversion to async had the desired effect:
|
|||
|
improved responsiveness (i.e. lower latency) when the number of
|
|||
|
simultaneous visitors increases.</p>
|
|||
|
<div id="load-testing-with-shiny-coming-soon" class="section level3">
|
|||
|
<h3>Load testing with Shiny (coming soon)</h3>
|
|||
|
<p>At the time of this writing, we are working on a suite of load
|
|||
|
testing tools for Shiny that is not publicly available yet, but was
|
|||
|
previewed by Sean Lopp during his <a href="https://posit.co/resources/">epic rstudio::conf 2018 talk</a>
|
|||
|
about running a Shiny load test with 10,000 simulated concurrent
|
|||
|
users.</p>
|
|||
|
<p>You use these tools to easily <strong>record</strong> yourself using
|
|||
|
your Shiny app, which creates a test script; then <strong>play
|
|||
|
back</strong> that test script, but multiplied by
|
|||
|
dozens/hundreds/thousands of simulated concurrent users; and finally,
|
|||
|
<strong>analyze</strong> the timing data generated during the playback
|
|||
|
step to see what kind of latency the simulated users experienced.</p>
|
|||
|
<p>To examine the effects of my async refactor, I recorded a simple test
|
|||
|
script by loading up the app, waiting for the first tab to appear, then
|
|||
|
clicking through each of the other tabs, pausing for several seconds
|
|||
|
each time before moving on to the next. When using the app without any
|
|||
|
other visitors, the homepage fully loads in less than a second, and the
|
|||
|
initial loading of data and rendering of the plot on the default tab
|
|||
|
takes about 7 seconds. After that, each tab takes no more than a couple
|
|||
|
of seconds to load. Overall, the entire test script, including time
|
|||
|
where the user is thinking, takes about 40 seconds under ideal settings
|
|||
|
(i.e. only a single concurrent user).</p>
|
|||
|
<p>I then used this test script to generate load against the Shiny app
|
|||
|
running in my local RStudio. With the settings I chose, the playback
|
|||
|
tool introduced one new “user” session every 5 seconds, until 50
|
|||
|
sessions total had been launched; then it waited until all the sessions
|
|||
|
were complete. I ran this test on both the sync and async versions in
|
|||
|
turn, which generated the following results.</p>
|
|||
|
</div>
|
|||
|
<div id="sync-vs.-async-performance" class="section level3">
|
|||
|
<h3>Sync vs. async performance</h3>
|
|||
|
<p><img src="
|
|||
|
<p>In this plot, each row represents a single session, and the x
|
|||
|
dimension represents time. Each of the rectangles represents a single
|
|||
|
“step” in the test script, be it downloading the HTML for the homepage,
|
|||
|
fetching one of the two dozen JavaScript/CSS files, or waiting for the
|
|||
|
server to update outputs. So the wider a rectangle is, the longer the
|
|||
|
user had to wait. (The empty gaps between rectangles represents time the
|
|||
|
app is waiting for the user to click an input; their widths are
|
|||
|
hard-coded into the test script.)</p>
|
|||
|
<p>Of particular importance are the red and pink rectangles, as these
|
|||
|
represent the initial page load. While these are taking place, the user
|
|||
|
is staring at a blank page, probably wondering if the server is down.
|
|||
|
Long waits during this stage are not only undesirable, but surprising
|
|||
|
and incomprehensible to the user; whereas the same user is probably
|
|||
|
prepared to wait a little while for a complicated visualization to be
|
|||
|
rendered in response to an input change.</p>
|
|||
|
<p>And as you can see from this plot, the behavior of the async app is
|
|||
|
much improved in the critical metric of homepage/JS/CSS loading time.
|
|||
|
The sync version of the app starts displaying unacceptably long red/pink
|
|||
|
loading times as early as session 15, and by session #44 the maximum
|
|||
|
page load time has exceeded one minute. The async version at that point
|
|||
|
is showing 25 second load times, which is far from great, but still a
|
|||
|
significant step in the right direction.</p>
|
|||
|
</div>
|
|||
|
<div id="further-optimizations" class="section level3">
|
|||
|
<h3>Further optimizations</h3>
|
|||
|
<p>I was surprised that the async version’s page load times weren’t even
|
|||
|
faster, and even more surprised to see that the blue rectangles were
|
|||
|
just as wide as the sync version. Why isn’t the async version way
|
|||
|
faster? The sync version does all of its work on a single thread, and I
|
|||
|
specifically designed this app to be a nightmare for scalability by
|
|||
|
having each session kick off by parsing hundreds of megabytes of CSV, an
|
|||
|
operation that is quite expensive. The async version gets to spread
|
|||
|
these jobs across several workers. Why aren’t we seeing a greater time
|
|||
|
savings?</p>
|
|||
|
<p>Mostly, it’s because calling
|
|||
|
<code>future_promise(read_csv("big_file.csv"))</code> is almost a
|
|||
|
worst-case scenario for future and async. <code>read_csv</code> is
|
|||
|
generally fast, but because the CRAN log files are so big,
|
|||
|
<code>read_csv("big_file.csv")</code> is slow. The value it returns is a
|
|||
|
very large data frame, that has now been loaded not into the Shiny
|
|||
|
process, but a <code>future</code> worker process. In order to return
|
|||
|
that data frame to the Shiny process, that data must first be serialized
|
|||
|
(I believe <code>future</code> essentially uses <code>saveRDS</code> for
|
|||
|
this), transmitted to the Shiny process, and then deserialized; to make
|
|||
|
matters worse, the transmitting and deserialization steps happen on the
|
|||
|
main R thread that we’re working so hard to try to keep idle.
|
|||
|
<strong>The larger the data we send back and forth to the future, the
|
|||
|
more performance suffers,</strong> and in this case we’re sending back
|
|||
|
quite a lot of data.</p>
|
|||
|
<p>We can make our code significantly faster by doing more summarizing,
|
|||
|
aggregation, and filtering <em>inside</em> the future; not only does
|
|||
|
this make more of the work happen in parallel, but by returning the data
|
|||
|
in already-processed form, we can have much less data to transfer from
|
|||
|
the worker process back to the Shiny process. (For example, the data for
|
|||
|
May 31, 2018 weighs 75MB before optimization, and 8.8MB afterwards.)</p>
|
|||
|
<p>Compare all three runs in the image below (the newly optimized
|
|||
|
version is labelled “async2”). The homepage load times have dropped
|
|||
|
further, and the calculation times are now dramatically faster than the
|
|||
|
sync code.</p>
|
|||
|
<p><img src="
|
|||
|
<p>Looking at the “async2” graph, the leading (bottom-left) edge has the
|
|||
|
same shape as before, as that’s simply the rate at which the load
|
|||
|
testing tool launches new sessions. But notice how much more closely the
|
|||
|
trailing (upper-right) edge matches the leading edge! It means that even
|
|||
|
as the number of active sessions ramped up, the amount of latency didn’t
|
|||
|
get dramatically worse, unlike with the “sync” and “async” versions. And
|
|||
|
each of the individual blue rectangles in the “async2” are comparatively
|
|||
|
tiny, meaning that users never have to wait more than a dozen seconds at
|
|||
|
the most for plots to update.</p>
|
|||
|
<p>This last plot shows the same data as above, but with the sessions
|
|||
|
aligned by start time. You can clearly see how the sessions are both
|
|||
|
shorter and less variable in “async2” compared to the others. I’ve added
|
|||
|
a yellow vertical line at the 10 second mark; if the page load
|
|||
|
(red/pink) has not completed at this point, it’s likely that your
|
|||
|
visitor has left in disgust. While “async” does better than “sync”, they
|
|||
|
both break through the 10 second mark early and often. In contrast, the
|
|||
|
“async2” version just barely peeks over the line three times.</p>
|
|||
|
<p><img src="
|
|||
|
<p>To get a visceral sense for what it feels like to use the app under
|
|||
|
load, here’s a video that shows what it’s like to browse the app while
|
|||
|
the load test is running at its peak. The left side of the screen shows
|
|||
|
“sync”, the right shows “async2”. In both cases, I navigated to the app
|
|||
|
when session #40 was started.</p>
|
|||
|
<p class="embed-responsive embed-responsive-16by9">
|
|||
|
<iframe class="embed-responsive-item" src="data:text/html; charset=utf-8;charset=utf-8,%3C%21DOCTYPE%20html%3E%3Chtml%20lang%3D%22en%22%20dir%3D%22ltr%22%20data%2Dcast%2Dapi%2Denabled%3D%22true%22%3E%3Chead%3E%3Cmeta%20name%3D%22viewport%22%20content%3D%22width%3Ddevice%2Dwidth%2C%20initial%2Dscale%3D1%22%3E%3Cscript%20nonce%3D%22gs%5FNjvqW5Vy60Q2ish%5FMcg%22%3Eif%20%28%27undefined%27%20%3D%3D%20typeof%20Symbol%20%7C%7C%20%27undefined%27%20%3D%3D%20typeof%20Symbol%2Eiterator%29%20%7Bdelete%20Array%2Eprototype%2Eentries%3B%7D%3C%2Fscript%3E%3Cstyle%20name%3D%22www%2Droboto%22%20nonce%3D%22%5F3GHaS1ka29TBozsoQbGpQ%22%3E%40font%2Dface%7Bfont%2Dfamily%3A%27Roboto%27%3Bfont%2Dstyle%3Anormal%3Bfont%2Dweight%3A400%3Bsrc%3Aurl%28%2F%2Ffonts%2Egstatic%2Ecom%2Fs%2Froboto%2Fv18%2FKFOmCnqEu92Fr1Mu4mxP%2Ettf%29format%28%27truetype%27%29%3B%7D%40font%2Dface%7Bfont%2Dfamily%3A%27Roboto%27%3Bfont%2Dstyle%3Anormal%3Bfont%2Dweight%3A500%3Bsrc%3Aurl%28%2F%2Ffonts%2Egstatic%2Ecom%2Fs%2Froboto%2Fv18%2FKFOlCnqEu92Fr1MmEU9fBBc9%2Ettf%29format%28%27truetype%27%29%3B%7D%3C%2Fstyle%3E%3Cscript%20name%3D%22www%2Droboto%22%20nonce%3D%22gs%5FNjvqW5Vy60Q2ish%5FMcg%22%3Eif%20%28document%2Efonts%20%26%26%20document%2Efonts%2Eload%29%20%7Bdocument%2Efonts%2Eload%28%22400%2010pt%20Roboto%22%2C%20%22E%22%29%3B%20document%2Efonts%2Eload%28%22500%2010pt%20Roboto%22%2C%20%22E%22%29%3B%7D%3C%2Fscript%3E%3Clink%20rel%3D%22stylesheet%22%20href%3D%22%2Fs%2Fplayer%2Fb46bb280%2Fwww%2Dplayer%2Ecss%22%20name%3D%22www%2Dplayer%22%20nonce%3D%22%5F3GHaS1ka29TBozsoQbGpQ%22%3E%3Cstyle%20nonce%3D%22%5F3GHaS1ka29TBozsoQbGpQ%22%3Ehtml%20%7Boverflow%3A%20hidden%3B%7Dbody%20%7Bfont%3A%2012px%20Roboto%2C%20Arial%2C%20sans%2Dserif%3B%20background%2Dcolor%3A%20%23000%3B%20color%3A%20%23fff%3B%20height%3A%20100%25%3B%20width%3A%20100%25%3B%20overflow%3A%20hidden%3B%20position%3A%20absolute%3B%20margin%3A%200%3B%20padding%3A%200%3B%7D%23player%20%7Bwidth%3A%20100%25%3B%20height%3A%20100%25%3B%7Dh1%20%7Btext%2Dalign%3A%20center%3B%20color%3A%20%23fff%3B%7Dh3%20%7Bmargin%2Dtop%3A%206px%3B%20margin%2Dbottom%3A%203px%3B%7D%2Eplayer%2Dunavailable%20%7Bposition%3A%20absolute%3B%20top%3A%200%3B%20left%3A%200%3B%20right%3A%200%3B%20bottom%3A%200%3B%20padding%3A%2025px%3B%20font%2Dsize%3A%2013px%3B%20background%3A%20url%28%2Fimg%2Fmeh7%2Epng%29%2050%25%2065%25%20no%2Drepeat%3B%7D%2Eplayer%2Dunavailable%20%2Emessage%20%7Btext%2Dalign%3A%20left%3B%20margin%3A%200%20%2D5px%2015px%3B%20padding%3A%200%205px%2014px%3B%20border%2Dbottom%3A%201px%20solid%20%23888%3B%20font%2Dsize%3A%2019px%3B%20font%2Dweight%3A%20normal%3B%7D%2Eplayer%2Dunavailable%20a%20%7Bcolor%3A%20%23167ac6%3B%20text%2Ddecoration%3A%20none%3B%7D%3C%2Fstyle%3E%3Cscript%20nonce%3D%22gs%5FNjvqW5Vy60Q2ish%5FMcg%22%3Evar%20ytcsi%3D%7Bgt%3Afunction%28n%29%7Bn%3D%28n%7C%7C%22%22%29%2B%22data%5F%22%3Breturn%20ytcsi%5Bn%5D%7C%7C%28ytcsi%5Bn%5D%3D%7Btick%3A%7B%7D%2Cinfo%3A%7B%7D%2Cgel%3A%7BpreLoggedGelInfos%3A%5B%5D%7D%7D%29%7D%2Cnow%3Awindow%2Eperformance%26%26window%2Eperformance%2Etiming%26%26window%2Eperformance%2Enow%26%26window%2Eperformance%2Etiming%2EnavigationStart%3Ffunction%28%29%7Breturn%20window%2Eperformance%2Etiming%2EnavigationStart%2Bwindow%2Eperformance%2Enow%28%29%7D%3Afunction%28%29%7Breturn%28new%20Date%29%2EgetTime%28%29%7D%2Ctick%3Afunction%28l%2Ct%2Cn%29%7Bvar%20ticks%3Dytcsi%2Egt%28n%29%2Etick%3Bvar%20v%3Dt%7C%7Cytcsi%2Enow%28%29%3Bif%28ticks%5Bl%5D%29%7Bticks%5B%22%5F%22%2Bl%5D%3Dticks%5B%22%5F%22%2Bl%5D%7C%7C%5Bticks%5Bl%5D%5D%3Bticks%5B%22%5F%22%2Bl%5D%2Epush%28v%29%7Dticks%5Bl%5D%3D%0Av%7D%2Cinfo%3Afunction%28k%2Cv%2Cn%29%7Bytcsi%2Egt%28n%29%2Einfo%5Bk%5D%3Dv%7D%2CinfoGel%3Afunction%28p%2Cn%29%7Bytcsi%2Egt%28n%29%2Egel%2EpreLoggedGelInfos%2Epush%28p%29%7D%2CsetStart%3Afunction%28t%2Cn%29%7Bytcsi%2Etick%28%22%5Fstart%22%2Ct%2Cn%29%7D%7D%3B%0A%28function%28w%2Cd%29%7Bfunction%20isGecko%28%29%7Bif%28%21w%2Enavigator%29return%20false%3Btry%7Bif%28w%2Enavigator%2EuserAgentData%26%26w%2Enavigator%2EuserAgentData%2Ebrands%26%26w%2Enavigator%2EuserAgentData%2Ebrands%2Elength%29%7Bvar%20brands%3Dw%2Enavigator%2Euser
|
|||
|
</iframe>
|
|||
|
</p>
|
|||
|
<p>Take a look at the <a href="https://github.com/rstudio/cranwhales/compare/async...async2?diff=split">code
|
|||
|
diff for async vs. async2</a>. While the code has not changed very
|
|||
|
dramatically, it has lost a little elegance and maintainability: the
|
|||
|
code for each of the affected outputs now has one foot in the the render
|
|||
|
function and one foot in the future. If your app’s total audience is a
|
|||
|
team of a hundred analysts and execs, you may choose to forgo the extra
|
|||
|
performance and stick with the original async (or even sync) code. But
|
|||
|
if you have serious scaling needs, the refactoring is probably a small
|
|||
|
price to pay.</p>
|
|||
|
<p>Let’s get real for a second, though. If this weren’t an example app
|
|||
|
written for exposition purposes, but a real production app that was
|
|||
|
intended to scale to thousands of concurrent users across dozens of R
|
|||
|
processes, we wouldn’t download and parse CSV files on the fly. Instead,
|
|||
|
we’d establish a proper <a href="https://solutions.posit.co/gallery/twitter-etl/">ETL procedure</a>
|
|||
|
to run every night and put the results into a properly indexed database
|
|||
|
table, or RDS files with just the data we need. As I said <a href="#improving-performance-and-scalability">earlier</a>, a little
|
|||
|
precomputation and caching can make a huge difference!</p>
|
|||
|
<p>Much of the remaining latency for the async2 branch is from ggplot2
|
|||
|
plotting. <a href="https://posit.co/resources/">Sean’s talk</a> alluded
|
|||
|
to some upcoming plot caching features we’re adding to Shiny, and I
|
|||
|
imagine they will have as dramatic an effect for this test as they did
|
|||
|
for Sean.</p>
|
|||
|
</div>
|
|||
|
</div>
|
|||
|
<div id="summing-up" class="section level2">
|
|||
|
<h2>Summing up</h2>
|
|||
|
<p>With async programming, expensive computations and tasks no longer
|
|||
|
need to be the scalability killers that they once were for Shiny. Armed
|
|||
|
with this and other common techniques like precomputation, caching, and
|
|||
|
load balancing, it’s possible to write responsive and scalable Shiny
|
|||
|
applications that can be safely deployed to thousands of concurrent
|
|||
|
users.</p>
|
|||
|
</div>
|
|||
|
|
|||
|
|
|||
|
|
|||
|
<!-- code folding -->
|
|||
|
|
|||
|
|
|||
|
<!-- dynamically load mathjax for compatibility with self-contained -->
|
|||
|
<script>
|
|||
|
(function () {
|
|||
|
var script = document.createElement("script");
|
|||
|
script.type = "text/javascript";
|
|||
|
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
|
|||
|
document.getElementsByTagName("head")[0].appendChild(script);
|
|||
|
})();
|
|||
|
</script>
|
|||
|
|
|||
|
</body>
|
|||
|
</html>
|