143 lines
4.2 KiB
R
143 lines
4.2 KiB
R
## ----include = FALSE----------------------------------------------------------
|
||
library(stringr)
|
||
knitr::opts_chunk$set(
|
||
comment = "#>",
|
||
collapse = TRUE
|
||
)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_length("abc")
|
||
|
||
## -----------------------------------------------------------------------------
|
||
x <- c("abcdef", "ghifjk")
|
||
|
||
# The 3rd letter
|
||
str_sub(x, 3, 3)
|
||
|
||
# The 2nd to 2nd-to-last character
|
||
str_sub(x, 2, -2)
|
||
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_sub(x, 3, 3) <- "X"
|
||
x
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_dup(x, c(2, 3))
|
||
|
||
## -----------------------------------------------------------------------------
|
||
x <- c("abc", "defghi")
|
||
str_pad(x, 10) # default pads on left
|
||
str_pad(x, 10, "both")
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_pad(x, 4)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
x <- c("Short", "This is a long string")
|
||
|
||
x %>%
|
||
str_trunc(10) %>%
|
||
str_pad(10, "right")
|
||
|
||
## -----------------------------------------------------------------------------
|
||
x <- c(" a ", "b ", " c")
|
||
str_trim(x)
|
||
str_trim(x, "left")
|
||
|
||
## -----------------------------------------------------------------------------
|
||
jabberwocky <- str_c(
|
||
"`Twas brillig, and the slithy toves ",
|
||
"did gyre and gimble in the wabe: ",
|
||
"All mimsy were the borogoves, ",
|
||
"and the mome raths outgrabe. "
|
||
)
|
||
cat(str_wrap(jabberwocky, width = 40))
|
||
|
||
## -----------------------------------------------------------------------------
|
||
x <- "I like horses."
|
||
str_to_upper(x)
|
||
str_to_title(x)
|
||
|
||
str_to_lower(x)
|
||
# Turkish has two sorts of i: with and without the dot
|
||
str_to_lower(x, "tr")
|
||
|
||
## -----------------------------------------------------------------------------
|
||
x <- c("y", "i", "k")
|
||
str_order(x)
|
||
|
||
str_sort(x)
|
||
# In Lithuanian, y comes between i and k
|
||
str_sort(x, locale = "lt")
|
||
|
||
## -----------------------------------------------------------------------------
|
||
strings <- c(
|
||
"apple",
|
||
"219 733 8965",
|
||
"329-293-8753",
|
||
"Work: 579-499-7527; Home: 543.355.3679"
|
||
)
|
||
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
|
||
|
||
## -----------------------------------------------------------------------------
|
||
# Which strings contain phone numbers?
|
||
str_detect(strings, phone)
|
||
str_subset(strings, phone)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
# How many phone numbers in each string?
|
||
str_count(strings, phone)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
# Where in the string is the phone number located?
|
||
(loc <- str_locate(strings, phone))
|
||
str_locate_all(strings, phone)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
# What are the phone numbers?
|
||
str_extract(strings, phone)
|
||
str_extract_all(strings, phone)
|
||
str_extract_all(strings, phone, simplify = TRUE)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
# Pull out the three components of the match
|
||
str_match(strings, phone)
|
||
str_match_all(strings, phone)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_replace(strings, phone, "XXX-XXX-XXXX")
|
||
str_replace_all(strings, phone, "XXX-XXX-XXXX")
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_split("a-b-c", "-")
|
||
str_split_fixed("a-b-c", "-", n = 2)
|
||
|
||
## -----------------------------------------------------------------------------
|
||
a1 <- "\u00e1"
|
||
a2 <- "a\u0301"
|
||
c(a1, a2)
|
||
a1 == a2
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_detect(a1, fixed(a2))
|
||
str_detect(a1, coll(a2))
|
||
|
||
## -----------------------------------------------------------------------------
|
||
i <- c("I", "İ", "i", "ı")
|
||
i
|
||
|
||
str_subset(i, coll("i", ignore_case = TRUE))
|
||
str_subset(i, coll("i", ignore_case = TRUE, locale = "tr"))
|
||
|
||
## -----------------------------------------------------------------------------
|
||
x <- "This is a sentence."
|
||
str_split(x, boundary("word"))
|
||
str_count(x, boundary("word"))
|
||
str_extract_all(x, boundary("word"))
|
||
|
||
## -----------------------------------------------------------------------------
|
||
str_split(x, "")
|
||
str_count(x, "")
|
||
|