2025-01-12 00:52:51 +08:00

812 lines
50 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Type and size stability</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">Type and size stability</h1>
<p>This vignette introduces the ideas of type-stability and
size-stability. If a function possesses these properties, it is
substantially easier to reason about because to predict the “shape” of
the output you only need to know the “shape”s of the inputs.</p>
<p>This work is partly motivated by a common pattern that I noticed when
reviewing code: if I read the code (without running it!), and I cant
predict the type of each variable, I feel very uneasy about the code.
This sense is important because most unit tests explore typical inputs,
rather than exhaustively testing the strange and unusual. Analysing the
types (and size) of variables makes it possible to spot unpleasant edge
cases.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">library</span>(vctrs)</span>
<span id="cb1-2"><a href="#cb1-2" tabindex="-1"></a><span class="fu">library</span>(rlang)</span>
<span id="cb1-3"><a href="#cb1-3" tabindex="-1"></a><span class="fu">library</span>(zeallot)</span></code></pre></div>
<div id="definitions" class="section level2">
<h2>Definitions</h2>
<p>We say a function is <strong>type-stable</strong> iff:</p>
<ol style="list-style-type: decimal">
<li>You can predict the output type knowing only the input types.</li>
<li>The order of arguments in … does not affect the output type.</li>
</ol>
<p>Similarly, a function is <strong>size-stable</strong> iff:</p>
<ol style="list-style-type: decimal">
<li>You can predict the output size knowing only the input sizes, or
there is a single numeric input that specifies the output size.</li>
</ol>
<p>Very few base R functions are size-stable, so Ill also define a
slightly weaker condition. Ill call a function
<strong>length-stable</strong> iff:</p>
<ol style="list-style-type: decimal">
<li>You can predict the output <em>length</em> knowing only the input
<em>lengths</em>, or there is a single numeric input that specifies the
output <em>length</em>.</li>
</ol>
<p>(But note that length-stable is not a particularly robust definition
because <code>length()</code> returns a value for things that are not
vectors.)</p>
<p>Well call functions that dont obey these principles
<strong>type-unstable</strong> and <strong>size-unstable</strong>
respectively.</p>
<p>On top of type- and size-stability its also desirable to have a
single set of rules that are applied consistently. We want one set of
type-coercion and size-recycling rules that apply everywhere, not many
sets of rules that apply to different functions.</p>
<p>The goal of these principles is to minimise cognitive overhead.
Rather than having to memorise many special cases, you should be able to
learn one set of principles and apply them again and again.</p>
<div id="examples" class="section level3">
<h3>Examples</h3>
<p>To make these ideas concrete, lets apply them to a few base
functions:</p>
<ol style="list-style-type: decimal">
<li><p><code>mean()</code> is trivially type-stable and size-stable
because it always returns a double vector of length 1 (or it throws an
error).</p></li>
<li><p>Surprisingly, <code>median()</code> is type-unstable:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">median</span>(<span class="fu">c</span>(<span class="dv">1</span><span class="dt">L</span>, <span class="dv">1</span><span class="dt">L</span>)))</span>
<span id="cb2-2"><a href="#cb2-2" tabindex="-1"></a><span class="co">#&gt; Prototype: double</span></span>
<span id="cb2-3"><a href="#cb2-3" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">median</span>(<span class="fu">c</span>(<span class="dv">1</span><span class="dt">L</span>, <span class="dv">1</span><span class="dt">L</span>, <span class="dv">1</span><span class="dt">L</span>)))</span>
<span id="cb2-4"><a href="#cb2-4" tabindex="-1"></a><span class="co">#&gt; Prototype: integer</span></span></code></pre></div>
<p>It is, however, size-stable, since it always returns a vector of
length 1.</p></li>
<li><p><code>sapply()</code> is type-unstable because you cant predict
the output type only knowing the input types:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">sapply</span>(<span class="dv">1</span><span class="dt">L</span>, <span class="cf">function</span>(x) <span class="fu">c</span>(x, x)))</span>
<span id="cb3-2"><a href="#cb3-2" tabindex="-1"></a><span class="co">#&gt; Prototype: integer[,1]</span></span>
<span id="cb3-3"><a href="#cb3-3" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">sapply</span>(<span class="fu">integer</span>(), <span class="cf">function</span>(x) <span class="fu">c</span>(x, x)))</span>
<span id="cb3-4"><a href="#cb3-4" tabindex="-1"></a><span class="co">#&gt; Prototype: list</span></span></code></pre></div>
<p>Its not quite size-stable; <code>vec_size(sapply(x, f))</code> is
<code>vec_size(x)</code> for vectors but not for matrices (the output is
transposed) or data frames (it iterates over the columns).</p></li>
<li><p><code>vapply()</code> is a type-stable version of
<code>sapply()</code> because
<code>vec_ptype_show(vapply(x, fn, template))</code> is always
<code>vec_ptype_show(template)</code>.<br />
It is size-unstable for the same reasons as
<code>sapply()</code>.</p></li>
<li><p><code>c()</code> is type-unstable because <code>c(x, y)</code>
doesnt always output the same type as <code>c(y, x)</code>.</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">c</span>(<span class="cn">NA</span>, <span class="fu">Sys.Date</span>()))</span>
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a><span class="co">#&gt; Prototype: double</span></span>
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">c</span>(<span class="fu">Sys.Date</span>(), <span class="cn">NA</span>))</span>
<span id="cb4-4"><a href="#cb4-4" tabindex="-1"></a><span class="co">#&gt; Prototype: date</span></span></code></pre></div>
<p><code>c()</code> is <em>almost always</em> length-stable because
<code>length(c(x, y))</code> <em>almost always</em> equals
<code>length(x) + length(y)</code>. One common source of instability
here is dealing with non-vectors (see the later section
“Non-vectors”):</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a>env <span class="ot">&lt;-</span> <span class="fu">new.env</span>(<span class="at">parent =</span> <span class="fu">emptyenv</span>())</span>
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a><span class="fu">length</span>(env)</span>
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a><span class="co">#&gt; [1] 0</span></span>
<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a><span class="fu">length</span>(mean)</span>
<span id="cb5-5"><a href="#cb5-5" tabindex="-1"></a><span class="co">#&gt; [1] 1</span></span>
<span id="cb5-6"><a href="#cb5-6" tabindex="-1"></a><span class="fu">length</span>(<span class="fu">c</span>(env, mean))</span>
<span id="cb5-7"><a href="#cb5-7" tabindex="-1"></a><span class="co">#&gt; [1] 2</span></span></code></pre></div></li>
<li><p><code>paste(x1, x2)</code> is length-stable because
<code>length(paste(x1, x2))</code> equals
<code>max(length(x1), length(x2))</code>. However, it doesnt follow the
usual arithmetic recycling rules because <code>paste(1:2, 1:3)</code>
doesnt generate a warning.</p></li>
<li><p><code>ifelse()</code> is length-stable because
<code>length(ifelse(cond, true, false))</code> is always
<code>length(cond)</code>. <code>ifelse()</code> is type-unstable
because the output type depends on the value of <code>cond</code>:</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">ifelse</span>(<span class="cn">NA</span>, <span class="dv">1</span><span class="dt">L</span>, <span class="dv">1</span><span class="dt">L</span>))</span>
<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a><span class="co">#&gt; Prototype: logical</span></span>
<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a><span class="fu">vec_ptype_show</span>(<span class="fu">ifelse</span>(<span class="cn">FALSE</span>, <span class="dv">1</span><span class="dt">L</span>, <span class="dv">1</span><span class="dt">L</span>))</span>
<span id="cb6-4"><a href="#cb6-4" tabindex="-1"></a><span class="co">#&gt; Prototype: integer</span></span></code></pre></div></li>
<li><p><code>read.csv(file)</code> is type-unstable and size-unstable
because, while you know it will return a data frame, you dont know what
columns it will return or how many rows it will have. Similarly,
<code>df[[i]]</code> is not type-stable because the result depends on
the <em>value</em> of <code>i</code>. There are many important functions
that can not be made type-stable or size-stable!</p></li>
</ol>
<p>With this understanding of type- and size-stability in hand, well
use them to analyse some base R functions in greater depth and then
propose alternatives with better properties.</p>
</div>
</div>
<div id="c-and-vctrsvec_c" class="section level2">
<h2><code>c()</code> and <code>vctrs::vec_c()</code></h2>
<p>In this section well compare and contrast <code>c()</code> and
<code>vec_c()</code>. <code>vec_c()</code> is both type- and size-stable
because it possesses the following invariants:</p>
<ul>
<li><code>vec_ptype(vec_c(x, y))</code> equals
<code>vec_ptype_common(x, y)</code>.</li>
<li><code>vec_size(vec_c(x, y))</code> equals
<code>vec_size(x) + vec_size(y)</code>.</li>
</ul>
<p><code>c()</code> has another undesirable property in that its not
consistent with <code>unlist()</code>; i.e.,
<code>unlist(list(x, y))</code> does not always equal
<code>c(x, y)</code>; i.e., base R has multiple sets of type-coercion
rules. I wont consider this problem further here.</p>
<p>I have two goals here:</p>
<ul>
<li><p>To fully document the quirks of <code>c()</code>, hence
motivating the development of an alternative.</p></li>
<li><p>To discuss non-obvious consequences of the type- and
size-stability above.</p></li>
</ul>
<div id="atomic-vectors" class="section level3">
<h3>Atomic vectors</h3>
<p>If we only consider atomic vectors, <code>c()</code> is type-stable
because it uses a hierarchy of types: character &gt; complex &gt; double
&gt; integer &gt; logical.</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a><span class="fu">c</span>(<span class="cn">FALSE</span>, <span class="dv">1</span><span class="dt">L</span>, <span class="fl">2.5</span>)</span>
<span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a><span class="co">#&gt; [1] 0.0 1.0 2.5</span></span></code></pre></div>
<p><code>vec_c()</code> obeys similar rules:</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a><span class="fu">vec_c</span>(<span class="cn">FALSE</span>, <span class="dv">1</span><span class="dt">L</span>, <span class="fl">2.5</span>)</span>
<span id="cb8-2"><a href="#cb8-2" tabindex="-1"></a><span class="co">#&gt; [1] 0.0 1.0 2.5</span></span></code></pre></div>
<p>But it does not automatically coerce to character vectors or
lists:</p>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a><span class="fu">c</span>(<span class="cn">FALSE</span>, <span class="st">&quot;x&quot;</span>)</span>
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a><span class="co">#&gt; [1] &quot;FALSE&quot; &quot;x&quot;</span></span>
<span id="cb9-3"><a href="#cb9-3" tabindex="-1"></a><span class="fu">vec_c</span>(<span class="cn">FALSE</span>, <span class="st">&quot;x&quot;</span>)</span>
<span id="cb9-4"><a href="#cb9-4" tabindex="-1"></a><span class="co">#&gt; Error in `vec_c()`:</span></span>
<span id="cb9-5"><a href="#cb9-5" tabindex="-1"></a><span class="co">#&gt; ! Can&#39;t combine `..1` &lt;logical&gt; and `..2` &lt;character&gt;.</span></span>
<span id="cb9-6"><a href="#cb9-6" tabindex="-1"></a></span>
<span id="cb9-7"><a href="#cb9-7" tabindex="-1"></a><span class="fu">c</span>(<span class="cn">FALSE</span>, <span class="fu">list</span>(<span class="dv">1</span>))</span>
<span id="cb9-8"><a href="#cb9-8" tabindex="-1"></a><span class="co">#&gt; [[1]]</span></span>
<span id="cb9-9"><a href="#cb9-9" tabindex="-1"></a><span class="co">#&gt; [1] FALSE</span></span>
<span id="cb9-10"><a href="#cb9-10" tabindex="-1"></a><span class="co">#&gt; </span></span>
<span id="cb9-11"><a href="#cb9-11" tabindex="-1"></a><span class="co">#&gt; [[2]]</span></span>
<span id="cb9-12"><a href="#cb9-12" tabindex="-1"></a><span class="co">#&gt; [1] 1</span></span>
<span id="cb9-13"><a href="#cb9-13" tabindex="-1"></a><span class="fu">vec_c</span>(<span class="cn">FALSE</span>, <span class="fu">list</span>(<span class="dv">1</span>))</span>
<span id="cb9-14"><a href="#cb9-14" tabindex="-1"></a><span class="co">#&gt; Error in `vec_c()`:</span></span>
<span id="cb9-15"><a href="#cb9-15" tabindex="-1"></a><span class="co">#&gt; ! Can&#39;t combine `..1` &lt;logical&gt; and `..2` &lt;list&gt;.</span></span></code></pre></div>
</div>
<div id="incompatible-vectors-and-non-vectors" class="section level3">
<h3>Incompatible vectors and non-vectors</h3>
<p>In general, most base methods do not throw an error:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a><span class="fu">c</span>(<span class="fl">10.5</span>, <span class="fu">factor</span>(<span class="st">&quot;x&quot;</span>))</span>
<span id="cb10-2"><a href="#cb10-2" tabindex="-1"></a><span class="co">#&gt; [1] 10.5 1.0</span></span></code></pre></div>
<p>If the inputs arent vectors, <code>c()</code> automatically puts
them in a list:</p>
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a><span class="fu">c</span>(mean, <span class="fu">globalenv</span>())</span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a><span class="co">#&gt; [[1]]</span></span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a><span class="co">#&gt; function (x, ...) </span></span>
<span id="cb11-4"><a href="#cb11-4" tabindex="-1"></a><span class="co">#&gt; UseMethod(&quot;mean&quot;)</span></span>
<span id="cb11-5"><a href="#cb11-5" tabindex="-1"></a><span class="co">#&gt; &lt;bytecode: 0x103a05448&gt;</span></span>
<span id="cb11-6"><a href="#cb11-6" tabindex="-1"></a><span class="co">#&gt; &lt;environment: namespace:base&gt;</span></span>
<span id="cb11-7"><a href="#cb11-7" tabindex="-1"></a><span class="co">#&gt; </span></span>
<span id="cb11-8"><a href="#cb11-8" tabindex="-1"></a><span class="co">#&gt; [[2]]</span></span>
<span id="cb11-9"><a href="#cb11-9" tabindex="-1"></a><span class="co">#&gt; &lt;environment: R_GlobalEnv&gt;</span></span></code></pre></div>
<p>For numeric versions, this depends on the order of inputs. Version
first is an error, otherwise the input is wrapped in a list:</p>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a><span class="fu">c</span>(<span class="fu">getRversion</span>(), <span class="st">&quot;x&quot;</span>)</span>
<span id="cb12-2"><a href="#cb12-2" tabindex="-1"></a><span class="co">#&gt; Error: invalid version specification &#39;x&#39;</span></span>
<span id="cb12-3"><a href="#cb12-3" tabindex="-1"></a></span>
<span id="cb12-4"><a href="#cb12-4" tabindex="-1"></a><span class="fu">c</span>(<span class="st">&quot;x&quot;</span>, <span class="fu">getRversion</span>())</span>
<span id="cb12-5"><a href="#cb12-5" tabindex="-1"></a><span class="co">#&gt; [[1]]</span></span>
<span id="cb12-6"><a href="#cb12-6" tabindex="-1"></a><span class="co">#&gt; [1] &quot;x&quot;</span></span>
<span id="cb12-7"><a href="#cb12-7" tabindex="-1"></a><span class="co">#&gt; </span></span>
<span id="cb12-8"><a href="#cb12-8" tabindex="-1"></a><span class="co">#&gt; [[2]]</span></span>
<span id="cb12-9"><a href="#cb12-9" tabindex="-1"></a><span class="co">#&gt; [1] 4 3 1</span></span></code></pre></div>
<p><code>vec_c()</code> throws an error if the inputs are not vectors or
not automatically coercible:</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a><span class="fu">vec_c</span>(mean, <span class="fu">globalenv</span>())</span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a><span class="co">#&gt; Error in `vec_c()`:</span></span>
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a><span class="co">#&gt; ! `..1` must be a vector, not a function.</span></span>
<span id="cb13-4"><a href="#cb13-4" tabindex="-1"></a></span>
<span id="cb13-5"><a href="#cb13-5" tabindex="-1"></a><span class="fu">vec_c</span>(<span class="fu">Sys.Date</span>(), <span class="fu">factor</span>(<span class="st">&quot;x&quot;</span>), <span class="st">&quot;x&quot;</span>)</span>
<span id="cb13-6"><a href="#cb13-6" tabindex="-1"></a><span class="co">#&gt; Error in `vec_c()`:</span></span>
<span id="cb13-7"><a href="#cb13-7" tabindex="-1"></a><span class="co">#&gt; ! Can&#39;t combine `..1` &lt;date&gt; and `..2` &lt;factor&lt;bf275&gt;&gt;.</span></span></code></pre></div>
</div>
<div id="factors" class="section level3">
<h3>Factors</h3>
<p>Combining two factors returns an integer vector:</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>fa <span class="ot">&lt;-</span> <span class="fu">factor</span>(<span class="st">&quot;a&quot;</span>)</span>
<span id="cb14-2"><a href="#cb14-2" tabindex="-1"></a>fb <span class="ot">&lt;-</span> <span class="fu">factor</span>(<span class="st">&quot;b&quot;</span>)</span>
<span id="cb14-3"><a href="#cb14-3" tabindex="-1"></a></span>
<span id="cb14-4"><a href="#cb14-4" tabindex="-1"></a><span class="fu">c</span>(fa, fb)</span>
<span id="cb14-5"><a href="#cb14-5" tabindex="-1"></a><span class="co">#&gt; [1] a b</span></span>
<span id="cb14-6"><a href="#cb14-6" tabindex="-1"></a><span class="co">#&gt; Levels: a b</span></span></code></pre></div>
<p>(This is documented in <code>c()</code> but is still
undesirable.)</p>
<p><code>vec_c()</code> returns a factor taking the union of the levels.
This behaviour is motivated by pragmatics: there are many places in base
R that automatically convert character vectors to factors, so enforcing
stricter behaviour would be unnecessarily onerous. (This is backed up by
experience with <code>dplyr::bind_rows()</code>, which is stricter and
is a common source of user difficulty.)</p>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a><span class="fu">vec_c</span>(fa, fb)</span>
<span id="cb15-2"><a href="#cb15-2" tabindex="-1"></a><span class="co">#&gt; [1] a b</span></span>
<span id="cb15-3"><a href="#cb15-3" tabindex="-1"></a><span class="co">#&gt; Levels: a b</span></span>
<span id="cb15-4"><a href="#cb15-4" tabindex="-1"></a><span class="fu">vec_c</span>(fb, fa)</span>
<span id="cb15-5"><a href="#cb15-5" tabindex="-1"></a><span class="co">#&gt; [1] b a</span></span>
<span id="cb15-6"><a href="#cb15-6" tabindex="-1"></a><span class="co">#&gt; Levels: b a</span></span></code></pre></div>
</div>
<div id="date-times" class="section level3">
<h3>Date-times</h3>
<p><code>c()</code> strips the time zone associated with date-times:</p>
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a>datetime_nz <span class="ot">&lt;-</span> <span class="fu">as.POSIXct</span>(<span class="st">&quot;2020-01-01 09:00&quot;</span>, <span class="at">tz =</span> <span class="st">&quot;Pacific/Auckland&quot;</span>)</span>
<span id="cb16-2"><a href="#cb16-2" tabindex="-1"></a><span class="fu">c</span>(datetime_nz)</span>
<span id="cb16-3"><a href="#cb16-3" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 09:00:00 NZDT&quot;</span></span></code></pre></div>
<p>This behaviour is documented in <code>?DateTimeClasses</code> but is
the source of considerable user pain.</p>
<p><code>vec_c()</code> preserves time zones:</p>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a><span class="fu">vec_c</span>(datetime_nz)</span>
<span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 09:00:00 NZDT&quot;</span></span></code></pre></div>
<p>What time zone should the output have if inputs have different time
zones? One option would be to be strict and force the user to manually
align all the time zones. However, this is onerous (particularly because
theres no easy way to change the time zone in base R), so vctrs chooses
to use the first non-local time zone:</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a>datetime_local <span class="ot">&lt;-</span> <span class="fu">as.POSIXct</span>(<span class="st">&quot;2020-01-01 09:00&quot;</span>)</span>
<span id="cb18-2"><a href="#cb18-2" tabindex="-1"></a>datetime_houston <span class="ot">&lt;-</span> <span class="fu">as.POSIXct</span>(<span class="st">&quot;2020-01-01 09:00&quot;</span>, <span class="at">tz =</span> <span class="st">&quot;US/Central&quot;</span>)</span>
<span id="cb18-3"><a href="#cb18-3" tabindex="-1"></a></span>
<span id="cb18-4"><a href="#cb18-4" tabindex="-1"></a><span class="fu">vec_c</span>(datetime_local, datetime_houston, datetime_nz)</span>
<span id="cb18-5"><a href="#cb18-5" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 08:00:00 CST&quot; &quot;2020-01-01 09:00:00 CST&quot;</span></span>
<span id="cb18-6"><a href="#cb18-6" tabindex="-1"></a><span class="co">#&gt; [3] &quot;2019-12-31 14:00:00 CST&quot;</span></span>
<span id="cb18-7"><a href="#cb18-7" tabindex="-1"></a><span class="fu">vec_c</span>(datetime_houston, datetime_nz)</span>
<span id="cb18-8"><a href="#cb18-8" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 09:00:00 CST&quot; &quot;2019-12-31 14:00:00 CST&quot;</span></span>
<span id="cb18-9"><a href="#cb18-9" tabindex="-1"></a><span class="fu">vec_c</span>(datetime_nz, datetime_houston)</span>
<span id="cb18-10"><a href="#cb18-10" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 09:00:00 NZDT&quot; &quot;2020-01-02 04:00:00 NZDT&quot;</span></span></code></pre></div>
</div>
<div id="dates-and-date-times" class="section level3">
<h3>Dates and date-times</h3>
<p>Combining dates and date-times with <code>c()</code> gives silently
incorrect results:</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" tabindex="-1"></a>date <span class="ot">&lt;-</span> <span class="fu">as.Date</span>(<span class="st">&quot;2020-01-01&quot;</span>)</span>
<span id="cb19-2"><a href="#cb19-2" tabindex="-1"></a>datetime <span class="ot">&lt;-</span> <span class="fu">as.POSIXct</span>(<span class="st">&quot;2020-01-01 09:00&quot;</span>)</span>
<span id="cb19-3"><a href="#cb19-3" tabindex="-1"></a></span>
<span id="cb19-4"><a href="#cb19-4" tabindex="-1"></a><span class="fu">c</span>(date, datetime)</span>
<span id="cb19-5"><a href="#cb19-5" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01&quot; &quot;2020-01-01&quot;</span></span>
<span id="cb19-6"><a href="#cb19-6" tabindex="-1"></a><span class="fu">c</span>(datetime, date)</span>
<span id="cb19-7"><a href="#cb19-7" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 09:00:00 EST&quot; &quot;2019-12-31 19:00:00 EST&quot;</span></span></code></pre></div>
<p>This behaviour arises because neither <code>c.Date()</code> nor
<code>c.POSIXct()</code> check that all inputs are of the same type.</p>
<p><code>vec_c()</code> uses a standard set of rules to avoid this
problem. When you mix dates and date-times, vctrs returns a date-time
and converts dates to date-times at midnight (in the timezone of the
date-time).</p>
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a><span class="fu">vec_c</span>(date, datetime)</span>
<span id="cb20-2"><a href="#cb20-2" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 00:00:00 EST&quot; &quot;2020-01-01 09:00:00 EST&quot;</span></span>
<span id="cb20-3"><a href="#cb20-3" tabindex="-1"></a><span class="fu">vec_c</span>(date, datetime_nz)</span>
<span id="cb20-4"><a href="#cb20-4" tabindex="-1"></a><span class="co">#&gt; [1] &quot;2020-01-01 00:00:00 NZDT&quot; &quot;2020-01-01 09:00:00 NZDT&quot;</span></span></code></pre></div>
</div>
<div id="missing-values" class="section level3">
<h3>Missing values</h3>
<p>If a missing value comes at the beginning of the inputs,
<code>c()</code> falls back to the internal behaviour, which strips all
attributes:</p>
<div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" tabindex="-1"></a><span class="fu">c</span>(<span class="cn">NA</span>, fa)</span>
<span id="cb21-2"><a href="#cb21-2" tabindex="-1"></a><span class="co">#&gt; [1] NA 1</span></span>
<span id="cb21-3"><a href="#cb21-3" tabindex="-1"></a><span class="fu">c</span>(<span class="cn">NA</span>, date)</span>
<span id="cb21-4"><a href="#cb21-4" tabindex="-1"></a><span class="co">#&gt; [1] NA 18262</span></span>
<span id="cb21-5"><a href="#cb21-5" tabindex="-1"></a><span class="fu">c</span>(<span class="cn">NA</span>, datetime)</span>
<span id="cb21-6"><a href="#cb21-6" tabindex="-1"></a><span class="co">#&gt; [1] NA 1577887200</span></span></code></pre></div>
<p><code>vec_c()</code> takes a different approach treating a logical
vector consisting only of <code>NA</code> as the
<code>unspecified()</code> class which can be converted to any other 1d
type:</p>
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" tabindex="-1"></a><span class="fu">vec_c</span>(<span class="cn">NA</span>, fa)</span>
<span id="cb22-2"><a href="#cb22-2" tabindex="-1"></a><span class="co">#&gt; [1] &lt;NA&gt; a </span></span>
<span id="cb22-3"><a href="#cb22-3" tabindex="-1"></a><span class="co">#&gt; Levels: a</span></span>
<span id="cb22-4"><a href="#cb22-4" tabindex="-1"></a><span class="fu">vec_c</span>(<span class="cn">NA</span>, date)</span>
<span id="cb22-5"><a href="#cb22-5" tabindex="-1"></a><span class="co">#&gt; [1] NA &quot;2020-01-01&quot;</span></span>
<span id="cb22-6"><a href="#cb22-6" tabindex="-1"></a><span class="fu">vec_c</span>(<span class="cn">NA</span>, datetime)</span>
<span id="cb22-7"><a href="#cb22-7" tabindex="-1"></a><span class="co">#&gt; [1] NA &quot;2020-01-01 09:00:00 EST&quot;</span></span></code></pre></div>
</div>
<div id="data-frames" class="section level3">
<h3>Data frames</h3>
<p>Because it is <em>almost always</em> length-stable, <code>c()</code>
combines data frames column wise (into a list):</p>
<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" tabindex="-1"></a>df1 <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">x =</span> <span class="dv">1</span>)</span>
<span id="cb23-2"><a href="#cb23-2" tabindex="-1"></a>df2 <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">x =</span> <span class="dv">2</span>)</span>
<span id="cb23-3"><a href="#cb23-3" tabindex="-1"></a><span class="fu">str</span>(<span class="fu">c</span>(df1, df1))</span>
<span id="cb23-4"><a href="#cb23-4" tabindex="-1"></a><span class="co">#&gt; List of 2</span></span>
<span id="cb23-5"><a href="#cb23-5" tabindex="-1"></a><span class="co">#&gt; $ x: num 1</span></span>
<span id="cb23-6"><a href="#cb23-6" tabindex="-1"></a><span class="co">#&gt; $ x: num 1</span></span></code></pre></div>
<p><code>vec_c()</code> is size-stable, which implies it will row-bind
data frames:</p>
<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" tabindex="-1"></a><span class="fu">vec_c</span>(df1, df2)</span>
<span id="cb24-2"><a href="#cb24-2" tabindex="-1"></a><span class="co">#&gt; x</span></span>
<span id="cb24-3"><a href="#cb24-3" tabindex="-1"></a><span class="co">#&gt; 1 1</span></span>
<span id="cb24-4"><a href="#cb24-4" tabindex="-1"></a><span class="co">#&gt; 2 2</span></span></code></pre></div>
</div>
<div id="matrices-and-arrays" class="section level3">
<h3>Matrices and arrays</h3>
<p>The same reasoning applies to matrices:</p>
<div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" tabindex="-1"></a>m <span class="ot">&lt;-</span> <span class="fu">matrix</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>, <span class="at">nrow =</span> <span class="dv">2</span>)</span>
<span id="cb25-2"><a href="#cb25-2" tabindex="-1"></a><span class="fu">c</span>(m, m)</span>
<span id="cb25-3"><a href="#cb25-3" tabindex="-1"></a><span class="co">#&gt; [1] 1 2 3 4 1 2 3 4</span></span>
<span id="cb25-4"><a href="#cb25-4" tabindex="-1"></a><span class="fu">vec_c</span>(m, m)</span>
<span id="cb25-5"><a href="#cb25-5" tabindex="-1"></a><span class="co">#&gt; [,1] [,2]</span></span>
<span id="cb25-6"><a href="#cb25-6" tabindex="-1"></a><span class="co">#&gt; [1,] 1 3</span></span>
<span id="cb25-7"><a href="#cb25-7" tabindex="-1"></a><span class="co">#&gt; [2,] 2 4</span></span>
<span id="cb25-8"><a href="#cb25-8" tabindex="-1"></a><span class="co">#&gt; [3,] 1 3</span></span>
<span id="cb25-9"><a href="#cb25-9" tabindex="-1"></a><span class="co">#&gt; [4,] 2 4</span></span></code></pre></div>
<p>One difference is that <code>vec_c()</code> will “broadcast” a vector
to match the dimensions of a matrix:</p>
<div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" tabindex="-1"></a><span class="fu">c</span>(m, <span class="dv">1</span>)</span>
<span id="cb26-2"><a href="#cb26-2" tabindex="-1"></a><span class="co">#&gt; [1] 1 2 3 4 1</span></span>
<span id="cb26-3"><a href="#cb26-3" tabindex="-1"></a></span>
<span id="cb26-4"><a href="#cb26-4" tabindex="-1"></a><span class="fu">vec_c</span>(m, <span class="dv">1</span>)</span>
<span id="cb26-5"><a href="#cb26-5" tabindex="-1"></a><span class="co">#&gt; [,1] [,2]</span></span>
<span id="cb26-6"><a href="#cb26-6" tabindex="-1"></a><span class="co">#&gt; [1,] 1 3</span></span>
<span id="cb26-7"><a href="#cb26-7" tabindex="-1"></a><span class="co">#&gt; [2,] 2 4</span></span>
<span id="cb26-8"><a href="#cb26-8" tabindex="-1"></a><span class="co">#&gt; [3,] 1 1</span></span></code></pre></div>
</div>
<div id="implementation" class="section level3">
<h3>Implementation</h3>
<p>The basic implementation of <code>vec_c()</code> is reasonably
simple. We first figure out the properties of the output, i.e. the
common type and total size, and then allocate it with
<code>vec_init()</code>, and then insert each input into the correct
place in the output.</p>
<div class="sourceCode" id="cb27"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1" tabindex="-1"></a>vec_c <span class="ot">&lt;-</span> <span class="cf">function</span>(...) {</span>
<span id="cb27-2"><a href="#cb27-2" tabindex="-1"></a> args <span class="ot">&lt;-</span> <span class="fu">compact</span>(<span class="fu">list2</span>(...))</span>
<span id="cb27-3"><a href="#cb27-3" tabindex="-1"></a></span>
<span id="cb27-4"><a href="#cb27-4" tabindex="-1"></a> ptype <span class="ot">&lt;-</span> <span class="fu">vec_ptype_common</span>(<span class="sc">!!!</span>args)</span>
<span id="cb27-5"><a href="#cb27-5" tabindex="-1"></a> <span class="cf">if</span> (<span class="fu">is.null</span>(ptype))</span>
<span id="cb27-6"><a href="#cb27-6" tabindex="-1"></a> <span class="fu">return</span>(<span class="cn">NULL</span>)</span>
<span id="cb27-7"><a href="#cb27-7" tabindex="-1"></a></span>
<span id="cb27-8"><a href="#cb27-8" tabindex="-1"></a> ns <span class="ot">&lt;-</span> <span class="fu">map_int</span>(args, vec_size)</span>
<span id="cb27-9"><a href="#cb27-9" tabindex="-1"></a> out <span class="ot">&lt;-</span> <span class="fu">vec_init</span>(ptype, <span class="fu">sum</span>(ns))</span>
<span id="cb27-10"><a href="#cb27-10" tabindex="-1"></a></span>
<span id="cb27-11"><a href="#cb27-11" tabindex="-1"></a> pos <span class="ot">&lt;-</span> <span class="dv">1</span></span>
<span id="cb27-12"><a href="#cb27-12" tabindex="-1"></a> <span class="cf">for</span> (i <span class="cf">in</span> <span class="fu">seq_along</span>(ns)) {</span>
<span id="cb27-13"><a href="#cb27-13" tabindex="-1"></a> n <span class="ot">&lt;-</span> ns[[i]]</span>
<span id="cb27-14"><a href="#cb27-14" tabindex="-1"></a> </span>
<span id="cb27-15"><a href="#cb27-15" tabindex="-1"></a> x <span class="ot">&lt;-</span> <span class="fu">vec_cast</span>(args[[i]], <span class="at">to =</span> ptype)</span>
<span id="cb27-16"><a href="#cb27-16" tabindex="-1"></a> <span class="fu">vec_slice</span>(out, pos<span class="sc">:</span>(pos <span class="sc">+</span> n <span class="sc">-</span> <span class="dv">1</span>)) <span class="ot">&lt;-</span> x</span>
<span id="cb27-17"><a href="#cb27-17" tabindex="-1"></a> pos <span class="ot">&lt;-</span> pos <span class="sc">+</span> n</span>
<span id="cb27-18"><a href="#cb27-18" tabindex="-1"></a> }</span>
<span id="cb27-19"><a href="#cb27-19" tabindex="-1"></a></span>
<span id="cb27-20"><a href="#cb27-20" tabindex="-1"></a> out</span>
<span id="cb27-21"><a href="#cb27-21" tabindex="-1"></a>}</span></code></pre></div>
<p>(The real <code>vec_c()</code> is a bit more complicated in order to
handle inner and outer names).</p>
</div>
</div>
<div id="ifelse" class="section level2">
<h2><code>ifelse()</code></h2>
<p>One of the functions that motivate the development of vctrs is
<code>ifelse()</code>. It has the surprising property that the result
value is “A vector of the same length and attributes (including
dimensions and class) as <code>test</code>”. To me, it seems more
reasonable for the type of the output to be controlled by the type of
the <code>yes</code> and <code>no</code> arguments.</p>
<p>In <code>dplyr::if_else()</code> I swung too far towards strictness:
it throws an error if <code>yes</code> and <code>no</code> are not the
same type. This is annoying in practice because it requires typed
missing values (<code>NA_character_</code> etc), and because the checks
are only on the class (not the full prototype), its easy to create
invalid output.</p>
<p>I found it much easier to understand what <code>ifelse()</code>
<em>should</em> do once I internalised the ideas of type- and
size-stability:</p>
<ul>
<li><p>The first argument must be logical.</p></li>
<li><p><code>vec_ptype(if_else(test, yes, no))</code> equals
<code>vec_ptype_common(yes, no)</code>. Unlike <code>ifelse()</code>
this implies that <code>if_else()</code> must always evaluate both
<code>yes</code> and <code>no</code> in order to figure out the correct
type. I think this is consistent with <code>&amp;&amp;</code> (scalar
operation, short circuits) and <code>&amp;</code> (vectorised, evaluates
both sides).</p></li>
<li><p><code>vec_size(if_else(test, yes, no))</code> equals
<code>vec_size_common(test, yes, no)</code>. I think the output could
have the same size as <code>test</code> (i.e., the same behaviour as
<code>ifelse</code>), but I <em>think</em> as a general rule that your
inputs should either be mutually recycling or not.</p></li>
</ul>
<p>This leads to the following implementation:</p>
<div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" tabindex="-1"></a>if_else <span class="ot">&lt;-</span> <span class="cf">function</span>(test, yes, no) {</span>
<span id="cb28-2"><a href="#cb28-2" tabindex="-1"></a> <span class="cf">if</span> (<span class="sc">!</span><span class="fu">is_logical</span>(test)) {</span>
<span id="cb28-3"><a href="#cb28-3" tabindex="-1"></a> <span class="fu">abort</span>(<span class="st">&quot;`test` must be a logical vector.&quot;</span>)</span>
<span id="cb28-4"><a href="#cb28-4" tabindex="-1"></a> }</span>
<span id="cb28-5"><a href="#cb28-5" tabindex="-1"></a> </span>
<span id="cb28-6"><a href="#cb28-6" tabindex="-1"></a> <span class="fu">c</span>(yes, no) <span class="sc">%&lt;-%</span> <span class="fu">vec_cast_common</span>(yes, no)</span>
<span id="cb28-7"><a href="#cb28-7" tabindex="-1"></a> <span class="fu">c</span>(test, yes, no) <span class="sc">%&lt;-%</span> <span class="fu">vec_recycle_common</span>(test, yes, no)</span>
<span id="cb28-8"><a href="#cb28-8" tabindex="-1"></a></span>
<span id="cb28-9"><a href="#cb28-9" tabindex="-1"></a> out <span class="ot">&lt;-</span> <span class="fu">vec_init</span>(yes, <span class="fu">vec_size</span>(yes))</span>
<span id="cb28-10"><a href="#cb28-10" tabindex="-1"></a> <span class="fu">vec_slice</span>(out, test) <span class="ot">&lt;-</span> <span class="fu">vec_slice</span>(yes, test)</span>
<span id="cb28-11"><a href="#cb28-11" tabindex="-1"></a> <span class="fu">vec_slice</span>(out, <span class="sc">!</span>test) <span class="ot">&lt;-</span> <span class="fu">vec_slice</span>(no, <span class="sc">!</span>test)</span>
<span id="cb28-12"><a href="#cb28-12" tabindex="-1"></a></span>
<span id="cb28-13"><a href="#cb28-13" tabindex="-1"></a> out</span>
<span id="cb28-14"><a href="#cb28-14" tabindex="-1"></a>}</span>
<span id="cb28-15"><a href="#cb28-15" tabindex="-1"></a></span>
<span id="cb28-16"><a href="#cb28-16" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="cn">NA</span>, <span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>)</span>
<span id="cb28-17"><a href="#cb28-17" tabindex="-1"></a><span class="fu">if_else</span>(x <span class="sc">&gt;</span> <span class="dv">2</span>, <span class="st">&quot;small&quot;</span>, <span class="st">&quot;big&quot;</span>)</span>
<span id="cb28-18"><a href="#cb28-18" tabindex="-1"></a><span class="co">#&gt; [1] NA &quot;big&quot; &quot;big&quot; &quot;small&quot; &quot;small&quot;</span></span>
<span id="cb28-19"><a href="#cb28-19" tabindex="-1"></a><span class="fu">if_else</span>(x <span class="sc">&gt;</span> <span class="dv">2</span>, <span class="fu">factor</span>(<span class="st">&quot;small&quot;</span>), <span class="fu">factor</span>(<span class="st">&quot;big&quot;</span>))</span>
<span id="cb28-20"><a href="#cb28-20" tabindex="-1"></a><span class="co">#&gt; [1] &lt;NA&gt; big big small small</span></span>
<span id="cb28-21"><a href="#cb28-21" tabindex="-1"></a><span class="co">#&gt; Levels: small big</span></span>
<span id="cb28-22"><a href="#cb28-22" tabindex="-1"></a><span class="fu">if_else</span>(x <span class="sc">&gt;</span> <span class="dv">2</span>, <span class="fu">Sys.Date</span>(), <span class="fu">Sys.Date</span>() <span class="sc">+</span> <span class="dv">7</span>)</span>
<span id="cb28-23"><a href="#cb28-23" tabindex="-1"></a><span class="co">#&gt; [1] NA &quot;2023-12-08&quot; &quot;2023-12-08&quot; &quot;2023-12-01&quot; &quot;2023-12-01&quot;</span></span></code></pre></div>
<p>By using <code>vec_size()</code> and <code>vec_slice()</code>, this
definition of <code>if_else()</code> automatically works with
data.frames and matrices:</p>
<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" tabindex="-1"></a><span class="fu">if_else</span>(x <span class="sc">&gt;</span> <span class="dv">2</span>, <span class="fu">data.frame</span>(<span class="at">x =</span> <span class="dv">1</span>), <span class="fu">data.frame</span>(<span class="at">y =</span> <span class="dv">2</span>))</span>
<span id="cb29-2"><a href="#cb29-2" tabindex="-1"></a><span class="co">#&gt; x y</span></span>
<span id="cb29-3"><a href="#cb29-3" tabindex="-1"></a><span class="co">#&gt; 1 NA NA</span></span>
<span id="cb29-4"><a href="#cb29-4" tabindex="-1"></a><span class="co">#&gt; 2 NA 2</span></span>
<span id="cb29-5"><a href="#cb29-5" tabindex="-1"></a><span class="co">#&gt; 3 NA 2</span></span>
<span id="cb29-6"><a href="#cb29-6" tabindex="-1"></a><span class="co">#&gt; 4 1 NA</span></span>
<span id="cb29-7"><a href="#cb29-7" tabindex="-1"></a><span class="co">#&gt; 5 1 NA</span></span>
<span id="cb29-8"><a href="#cb29-8" tabindex="-1"></a></span>
<span id="cb29-9"><a href="#cb29-9" tabindex="-1"></a><span class="fu">if_else</span>(x <span class="sc">&gt;</span> <span class="dv">2</span>, <span class="fu">matrix</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">10</span>, <span class="at">ncol =</span> <span class="dv">2</span>), <span class="fu">cbind</span>(<span class="dv">30</span>, <span class="dv">30</span>))</span>
<span id="cb29-10"><a href="#cb29-10" tabindex="-1"></a><span class="co">#&gt; [,1] [,2]</span></span>
<span id="cb29-11"><a href="#cb29-11" tabindex="-1"></a><span class="co">#&gt; [1,] NA NA</span></span>
<span id="cb29-12"><a href="#cb29-12" tabindex="-1"></a><span class="co">#&gt; [2,] 30 30</span></span>
<span id="cb29-13"><a href="#cb29-13" tabindex="-1"></a><span class="co">#&gt; [3,] 30 30</span></span>
<span id="cb29-14"><a href="#cb29-14" tabindex="-1"></a><span class="co">#&gt; [4,] 4 9</span></span>
<span id="cb29-15"><a href="#cb29-15" tabindex="-1"></a><span class="co">#&gt; [5,] 5 10</span></span></code></pre></div>
</div>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>