143 lines
4.2 KiB
R
Raw Permalink Normal View History

2025-01-12 00:52:51 +08:00
## ----include = FALSE----------------------------------------------------------
library(stringr)
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE
)
## -----------------------------------------------------------------------------
str_length("abc")
## -----------------------------------------------------------------------------
x <- c("abcdef", "ghifjk")
# The 3rd letter
str_sub(x, 3, 3)
# The 2nd to 2nd-to-last character
str_sub(x, 2, -2)
## -----------------------------------------------------------------------------
str_sub(x, 3, 3) <- "X"
x
## -----------------------------------------------------------------------------
str_dup(x, c(2, 3))
## -----------------------------------------------------------------------------
x <- c("abc", "defghi")
str_pad(x, 10) # default pads on left
str_pad(x, 10, "both")
## -----------------------------------------------------------------------------
str_pad(x, 4)
## -----------------------------------------------------------------------------
x <- c("Short", "This is a long string")
x %>%
str_trunc(10) %>%
str_pad(10, "right")
## -----------------------------------------------------------------------------
x <- c(" a ", "b ", " c")
str_trim(x)
str_trim(x, "left")
## -----------------------------------------------------------------------------
jabberwocky <- str_c(
"`Twas brillig, and the slithy toves ",
"did gyre and gimble in the wabe: ",
"All mimsy were the borogoves, ",
"and the mome raths outgrabe. "
)
cat(str_wrap(jabberwocky, width = 40))
## -----------------------------------------------------------------------------
x <- "I like horses."
str_to_upper(x)
str_to_title(x)
str_to_lower(x)
# Turkish has two sorts of i: with and without the dot
str_to_lower(x, "tr")
## -----------------------------------------------------------------------------
x <- c("y", "i", "k")
str_order(x)
str_sort(x)
# In Lithuanian, y comes between i and k
str_sort(x, locale = "lt")
## -----------------------------------------------------------------------------
strings <- c(
"apple",
"219 733 8965",
"329-293-8753",
"Work: 579-499-7527; Home: 543.355.3679"
)
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
## -----------------------------------------------------------------------------
# Which strings contain phone numbers?
str_detect(strings, phone)
str_subset(strings, phone)
## -----------------------------------------------------------------------------
# How many phone numbers in each string?
str_count(strings, phone)
## -----------------------------------------------------------------------------
# Where in the string is the phone number located?
(loc <- str_locate(strings, phone))
str_locate_all(strings, phone)
## -----------------------------------------------------------------------------
# What are the phone numbers?
str_extract(strings, phone)
str_extract_all(strings, phone)
str_extract_all(strings, phone, simplify = TRUE)
## -----------------------------------------------------------------------------
# Pull out the three components of the match
str_match(strings, phone)
str_match_all(strings, phone)
## -----------------------------------------------------------------------------
str_replace(strings, phone, "XXX-XXX-XXXX")
str_replace_all(strings, phone, "XXX-XXX-XXXX")
## -----------------------------------------------------------------------------
str_split("a-b-c", "-")
str_split_fixed("a-b-c", "-", n = 2)
## -----------------------------------------------------------------------------
a1 <- "\u00e1"
a2 <- "a\u0301"
c(a1, a2)
a1 == a2
## -----------------------------------------------------------------------------
str_detect(a1, fixed(a2))
str_detect(a1, coll(a2))
## -----------------------------------------------------------------------------
i <- c("I", "İ", "i", "ı")
i
str_subset(i, coll("i", ignore_case = TRUE))
str_subset(i, coll("i", ignore_case = TRUE, locale = "tr"))
## -----------------------------------------------------------------------------
x <- "This is a sentence."
str_split(x, boundary("word"))
str_count(x, boundary("word"))
str_extract_all(x, boundary("word"))
## -----------------------------------------------------------------------------
str_split(x, "")
str_count(x, "")