281 lines
27 KiB
HTML
Raw Normal View History

2025-01-12 00:52:51 +08:00
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="author" content="Jim Hester" />
<meta name="date" content="2021-11-24" />
<title>URL Validation</title>
<script src="data:application/javascript;base64,Ly8gUGFuZG9jIDIuOSBhZGRzIGF0dHJpYnV0ZXMgb24gYm90aCBoZWFkZXIgYW5kIGRpdi4gV2UgcmVtb3ZlIHRoZSBmb3JtZXIgKHRvCi8vIGJlIGNvbXBhdGlibGUgd2l0aCB0aGUgYmVoYXZpb3Igb2YgUGFuZG9jIDwgMi44KS4KZG9jdW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignRE9NQ29udGVudExvYWRlZCcsIGZ1bmN0aW9uKGUpIHsKICB2YXIgaHMgPSBkb2N1bWVudC5xdWVyeVNlbGVjdG9yQWxsKCJkaXYuc2VjdGlvbltjbGFzcyo9J2xldmVsJ10gPiA6Zmlyc3QtY2hpbGQiKTsKICB2YXIgaSwgaCwgYTsKICBmb3IgKGkgPSAwOyBpIDwgaHMubGVuZ3RoOyBpKyspIHsKICAgIGggPSBoc1tpXTsKICAgIGlmICghL15oWzEtNl0kL2kudGVzdChoLnRhZ05hbWUpKSBjb250aW51ZTsgIC8vIGl0IHNob3VsZCBiZSBhIGhlYWRlciBoMS1oNgogICAgYSA9IGguYXR0cmlidXRlczsKICAgIHdoaWxlIChhLmxlbmd0aCA+IDApIGgucmVtb3ZlQXR0cmlidXRlKGFbMF0ubmFtZSk7CiAgfQp9KTsK"></script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
for (var j = 0; j < rules.length; j++) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue;
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') continue;
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<link rel="stylesheet" href="data:text/css,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%
</head>
<body>
<h1 class="title toc-ignore">URL Validation</h1>
<h4 class="author">Jim Hester</h4>
<h4 class="date">2021-11-24</h4>
<p>Consider the task of correctly <a href="https://mathiasbynens.be/demo/url-regex">validating a URL</a>. From that page two conclusions can be made.</p>
<ol style="list-style-type: decimal">
<li>Validating URLs require complex regular expressions.</li>
<li>Creating a correct regular expression is hard! (only 1 out of 13 regexs were valid for all cases).</li>
</ol>
<p>Because of this one may be tempted to simply copy the best regex you can find (<a href="https://gist.github.com/dperini/729294">gist</a>).</p>
<p>The problem with this is that while you can copy it now, what happens later when you find a case that is not handled correctly? Can you correctly interpret and modify this?</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="st">&quot;^(?:(?:http(?:s)?|ftp)://)(?:</span><span class="sc">\\</span><span class="st">S+(?::(?:</span><span class="sc">\\</span><span class="st">S)*)?@)?(?:(?:[a-z0-9\u00a1-\uffff](?:-)*)*(?:[a-z0-9\u00a1-\uffff])+)(?:</span><span class="sc">\\</span><span class="st">.(?:[a-z0-9\u00a1-\uffff](?:-)*)*(?:[a-z0-9\u00a1-\uffff])+)*(?:</span><span class="sc">\\</span><span class="st">.(?:[a-z0-9\u00a1-\uffff]){2,})(?::(?:</span><span class="sc">\\</span><span class="st">d){2,5})?(?:/(?:</span><span class="sc">\\</span><span class="st">S)*)?$&quot;</span></span></code></pre></div>
<p>However if you re-create the regex with <code>rex</code> it is much easier to understand and modify later if needed.</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(rex)</span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(magrittr)</span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>valid_chars <span class="ot">&lt;-</span> <span class="fu">rex</span>(<span class="fu">except_some_of</span>(<span class="st">&quot;.&quot;</span>, <span class="st">&quot;/&quot;</span>, <span class="st">&quot; &quot;</span>, <span class="st">&quot;-&quot;</span>))</span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>re <span class="ot">&lt;-</span> <span class="fu">rex</span>(</span>
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a> start,</span>
<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a> <span class="co"># protocol identifier (optional) + //</span></span>
<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a> <span class="fu">group</span>(<span class="fu">list</span>(<span class="st">&quot;http&quot;</span>, <span class="fu">maybe</span>(<span class="st">&quot;s&quot;</span>)) <span class="sc">%or%</span> <span class="st">&quot;ftp&quot;</span>, <span class="st">&quot;://&quot;</span>),</span>
<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a> <span class="co"># user:pass authentication (optional)</span></span>
<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a> <span class="fu">maybe</span>(non_spaces,</span>
<span id="cb2-14"><a href="#cb2-14" aria-hidden="true" tabindex="-1"></a> <span class="fu">maybe</span>(<span class="st">&quot;:&quot;</span>, <span class="fu">zero_or_more</span>(non_space)),</span>
<span id="cb2-15"><a href="#cb2-15" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;@&quot;</span>),</span>
<span id="cb2-16"><a href="#cb2-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a> <span class="co">#host name</span></span>
<span id="cb2-18"><a href="#cb2-18" aria-hidden="true" tabindex="-1"></a> <span class="fu">group</span>(<span class="fu">zero_or_more</span>(valid_chars, <span class="fu">zero_or_more</span>(<span class="st">&quot;-&quot;</span>)), <span class="fu">one_or_more</span>(valid_chars)),</span>
<span id="cb2-19"><a href="#cb2-19" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-20"><a href="#cb2-20" aria-hidden="true" tabindex="-1"></a> <span class="co">#domain name</span></span>
<span id="cb2-21"><a href="#cb2-21" aria-hidden="true" tabindex="-1"></a> <span class="fu">zero_or_more</span>(<span class="st">&quot;.&quot;</span>, <span class="fu">zero_or_more</span>(valid_chars, <span class="fu">zero_or_more</span>(<span class="st">&quot;-&quot;</span>)), <span class="fu">one_or_more</span>(valid_chars)),</span>
<span id="cb2-22"><a href="#cb2-22" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-23"><a href="#cb2-23" aria-hidden="true" tabindex="-1"></a> <span class="co">#TLD identifier</span></span>
<span id="cb2-24"><a href="#cb2-24" aria-hidden="true" tabindex="-1"></a> <span class="fu">group</span>(<span class="st">&quot;.&quot;</span>, valid_chars <span class="sc">%&gt;%</span> <span class="fu">at_least</span>(<span class="dv">2</span>)),</span>
<span id="cb2-25"><a href="#cb2-25" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-26"><a href="#cb2-26" aria-hidden="true" tabindex="-1"></a> <span class="co"># server port number (optional)</span></span>
<span id="cb2-27"><a href="#cb2-27" aria-hidden="true" tabindex="-1"></a> <span class="fu">maybe</span>(<span class="st">&quot;:&quot;</span>, digit <span class="sc">%&gt;%</span> <span class="fu">between</span>(<span class="dv">2</span>, <span class="dv">5</span>)),</span>
<span id="cb2-28"><a href="#cb2-28" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-29"><a href="#cb2-29" aria-hidden="true" tabindex="-1"></a> <span class="co"># resource path (optional)</span></span>
<span id="cb2-30"><a href="#cb2-30" aria-hidden="true" tabindex="-1"></a> <span class="fu">maybe</span>(<span class="st">&quot;/&quot;</span>, non_space <span class="sc">%&gt;%</span> <span class="fu">zero_or_more</span>()),</span>
<span id="cb2-31"><a href="#cb2-31" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb2-32"><a href="#cb2-32" aria-hidden="true" tabindex="-1"></a> end</span>
<span id="cb2-33"><a href="#cb2-33" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
<p>We can then validate that it correctly identifies both good and bad URLs. (<em>IP address validation removed</em>)</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>good <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">&quot;http://foo.com/blah_blah&quot;</span>,</span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.com/blah_blah/&quot;</span>,</span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.com/blah_blah_(wikipedia)&quot;</span>,</span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.com/blah_blah_(wikipedia)_(again)&quot;</span>,</span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://www.example.com/wpstyle/?p=364&quot;</span>,</span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;https://www.example.com/foo/?bar=baz&amp;inga=42&amp;quux&quot;</span>,</span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://✪df.ws/123&quot;</span>,</span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid:password@example.com:8080&quot;</span>,</span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid:password@example.com:8080/&quot;</span>,</span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid@example.com&quot;</span>,</span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid@example.com/&quot;</span>,</span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid@example.com:8080&quot;</span>,</span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid@example.com:8080/&quot;</span>,</span>
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid:password@example.com&quot;</span>,</span>
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://userid:password@example.com/&quot;</span>,</span>
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://➡.ws/䨹&quot;</span>,</span>
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://⌘.ws&quot;</span>,</span>
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://⌘.ws/&quot;</span>,</span>
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.com/blah_(wikipedia)#cite-1&quot;</span>,</span>
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.com/blah_(wikipedia)_blah#cite-1&quot;</span>,</span>
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.com/unicode_(✪)_in_parens&quot;</span>,</span>
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.com/(something)?after=parens&quot;</span>,</span>
<span id="cb3-23"><a href="#cb3-23" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://☺.damowmow.com/&quot;</span>,</span>
<span id="cb3-24"><a href="#cb3-24" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://code.google.com/events/#&amp;product=browser&quot;</span>,</span>
<span id="cb3-25"><a href="#cb3-25" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://j.mp&quot;</span>,</span>
<span id="cb3-26"><a href="#cb3-26" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;ftp://foo.bar/baz&quot;</span>,</span>
<span id="cb3-27"><a href="#cb3-27" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.bar/?q=Test%20URL-encoded%20stuff&quot;</span>,</span>
<span id="cb3-28"><a href="#cb3-28" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://مثال.إختبار&quot;</span>,</span>
<span id="cb3-29"><a href="#cb3-29" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://例子.测试&quot;</span>,</span>
<span id="cb3-30"><a href="#cb3-30" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://-.~_!$&amp;&#39;()*+,;=:%40:80%2f::::::@example.com&quot;</span>,</span>
<span id="cb3-31"><a href="#cb3-31" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://1337.net&quot;</span>,</span>
<span id="cb3-32"><a href="#cb3-32" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://a.b-c.de&quot;</span>,</span>
<span id="cb3-33"><a href="#cb3-33" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://223.255.255.254&quot;</span>)</span>
<span id="cb3-34"><a href="#cb3-34" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-35"><a href="#cb3-35" aria-hidden="true" tabindex="-1"></a>bad <span class="ot">&lt;-</span> <span class="fu">c</span>(</span>
<span id="cb3-36"><a href="#cb3-36" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://&quot;</span>,</span>
<span id="cb3-37"><a href="#cb3-37" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://.&quot;</span>,</span>
<span id="cb3-38"><a href="#cb3-38" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://..&quot;</span>,</span>
<span id="cb3-39"><a href="#cb3-39" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://../&quot;</span>,</span>
<span id="cb3-40"><a href="#cb3-40" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://?&quot;</span>,</span>
<span id="cb3-41"><a href="#cb3-41" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://??&quot;</span>,</span>
<span id="cb3-42"><a href="#cb3-42" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://??/&quot;</span>,</span>
<span id="cb3-43"><a href="#cb3-43" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://#&quot;</span>,</span>
<span id="cb3-44"><a href="#cb3-44" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://##&quot;</span>,</span>
<span id="cb3-45"><a href="#cb3-45" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://##/&quot;</span>,</span>
<span id="cb3-46"><a href="#cb3-46" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.bar?q=Spaces should be encoded&quot;</span>,</span>
<span id="cb3-47"><a href="#cb3-47" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;//&quot;</span>,</span>
<span id="cb3-48"><a href="#cb3-48" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;//a&quot;</span>,</span>
<span id="cb3-49"><a href="#cb3-49" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;///a&quot;</span>,</span>
<span id="cb3-50"><a href="#cb3-50" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;///&quot;</span>,</span>
<span id="cb3-51"><a href="#cb3-51" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http:///a&quot;</span>,</span>
<span id="cb3-52"><a href="#cb3-52" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;foo.com&quot;</span>,</span>
<span id="cb3-53"><a href="#cb3-53" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;rdar://1234&quot;</span>,</span>
<span id="cb3-54"><a href="#cb3-54" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;h://test&quot;</span>,</span>
<span id="cb3-55"><a href="#cb3-55" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http:// shouldfail.com&quot;</span>,</span>
<span id="cb3-56"><a href="#cb3-56" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;:// should fail&quot;</span>,</span>
<span id="cb3-57"><a href="#cb3-57" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://foo.bar/foo(bar)baz quux&quot;</span>,</span>
<span id="cb3-58"><a href="#cb3-58" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;ftps://foo.bar/&quot;</span>,</span>
<span id="cb3-59"><a href="#cb3-59" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://-error-.invalid/&quot;</span>,</span>
<span id="cb3-60"><a href="#cb3-60" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://-a.b.co&quot;</span>,</span>
<span id="cb3-61"><a href="#cb3-61" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://a.b-.co&quot;</span>,</span>
<span id="cb3-62"><a href="#cb3-62" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://0.0.0.0&quot;</span>,</span>
<span id="cb3-63"><a href="#cb3-63" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://3628126748&quot;</span>,</span>
<span id="cb3-64"><a href="#cb3-64" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://.www.foo.bar/&quot;</span>,</span>
<span id="cb3-65"><a href="#cb3-65" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://www.foo.bar./&quot;</span>,</span>
<span id="cb3-66"><a href="#cb3-66" aria-hidden="true" tabindex="-1"></a> <span class="st">&quot;http://.www.foo.bar./&quot;</span>)</span>
<span id="cb3-67"><a href="#cb3-67" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-68"><a href="#cb3-68" aria-hidden="true" tabindex="-1"></a><span class="fu">all</span>(<span class="fu">grepl</span>(re, good) <span class="sc">==</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<pre><code>## [1] TRUE</code></pre>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">all</span>(<span class="fu">grepl</span>(re, bad) <span class="sc">==</span> <span class="cn">FALSE</span>)</span></code></pre></div>
<pre><code>## [1] TRUE</code></pre>
<p>You can now see the power and expressiveness of building regular expressions with <code>rex</code>!</p>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>