143 lines
4.2 KiB
R
143 lines
4.2 KiB
R
|
## ----include = FALSE----------------------------------------------------------
|
|||
|
library(stringr)
|
|||
|
knitr::opts_chunk$set(
|
|||
|
comment = "#>",
|
|||
|
collapse = TRUE
|
|||
|
)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_length("abc")
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
x <- c("abcdef", "ghifjk")
|
|||
|
|
|||
|
# The 3rd letter
|
|||
|
str_sub(x, 3, 3)
|
|||
|
|
|||
|
# The 2nd to 2nd-to-last character
|
|||
|
str_sub(x, 2, -2)
|
|||
|
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_sub(x, 3, 3) <- "X"
|
|||
|
x
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_dup(x, c(2, 3))
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
x <- c("abc", "defghi")
|
|||
|
str_pad(x, 10) # default pads on left
|
|||
|
str_pad(x, 10, "both")
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_pad(x, 4)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
x <- c("Short", "This is a long string")
|
|||
|
|
|||
|
x %>%
|
|||
|
str_trunc(10) %>%
|
|||
|
str_pad(10, "right")
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
x <- c(" a ", "b ", " c")
|
|||
|
str_trim(x)
|
|||
|
str_trim(x, "left")
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
jabberwocky <- str_c(
|
|||
|
"`Twas brillig, and the slithy toves ",
|
|||
|
"did gyre and gimble in the wabe: ",
|
|||
|
"All mimsy were the borogoves, ",
|
|||
|
"and the mome raths outgrabe. "
|
|||
|
)
|
|||
|
cat(str_wrap(jabberwocky, width = 40))
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
x <- "I like horses."
|
|||
|
str_to_upper(x)
|
|||
|
str_to_title(x)
|
|||
|
|
|||
|
str_to_lower(x)
|
|||
|
# Turkish has two sorts of i: with and without the dot
|
|||
|
str_to_lower(x, "tr")
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
x <- c("y", "i", "k")
|
|||
|
str_order(x)
|
|||
|
|
|||
|
str_sort(x)
|
|||
|
# In Lithuanian, y comes between i and k
|
|||
|
str_sort(x, locale = "lt")
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
strings <- c(
|
|||
|
"apple",
|
|||
|
"219 733 8965",
|
|||
|
"329-293-8753",
|
|||
|
"Work: 579-499-7527; Home: 543.355.3679"
|
|||
|
)
|
|||
|
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
# Which strings contain phone numbers?
|
|||
|
str_detect(strings, phone)
|
|||
|
str_subset(strings, phone)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
# How many phone numbers in each string?
|
|||
|
str_count(strings, phone)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
# Where in the string is the phone number located?
|
|||
|
(loc <- str_locate(strings, phone))
|
|||
|
str_locate_all(strings, phone)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
# What are the phone numbers?
|
|||
|
str_extract(strings, phone)
|
|||
|
str_extract_all(strings, phone)
|
|||
|
str_extract_all(strings, phone, simplify = TRUE)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
# Pull out the three components of the match
|
|||
|
str_match(strings, phone)
|
|||
|
str_match_all(strings, phone)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_replace(strings, phone, "XXX-XXX-XXXX")
|
|||
|
str_replace_all(strings, phone, "XXX-XXX-XXXX")
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_split("a-b-c", "-")
|
|||
|
str_split_fixed("a-b-c", "-", n = 2)
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
a1 <- "\u00e1"
|
|||
|
a2 <- "a\u0301"
|
|||
|
c(a1, a2)
|
|||
|
a1 == a2
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_detect(a1, fixed(a2))
|
|||
|
str_detect(a1, coll(a2))
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
i <- c("I", "İ", "i", "ı")
|
|||
|
i
|
|||
|
|
|||
|
str_subset(i, coll("i", ignore_case = TRUE))
|
|||
|
str_subset(i, coll("i", ignore_case = TRUE, locale = "tr"))
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
x <- "This is a sentence."
|
|||
|
str_split(x, boundary("word"))
|
|||
|
str_count(x, boundary("word"))
|
|||
|
str_extract_all(x, boundary("word"))
|
|||
|
|
|||
|
## -----------------------------------------------------------------------------
|
|||
|
str_split(x, "")
|
|||
|
str_count(x, "")
|
|||
|
|