2025-01-12 04:36:52 +08:00

143 lines
4.2 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

## ----include = FALSE----------------------------------------------------------
library(stringr)
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE
)
## -----------------------------------------------------------------------------
str_length("abc")
## -----------------------------------------------------------------------------
x <- c("abcdef", "ghifjk")
# The 3rd letter
str_sub(x, 3, 3)
# The 2nd to 2nd-to-last character
str_sub(x, 2, -2)
## -----------------------------------------------------------------------------
str_sub(x, 3, 3) <- "X"
x
## -----------------------------------------------------------------------------
str_dup(x, c(2, 3))
## -----------------------------------------------------------------------------
x <- c("abc", "defghi")
str_pad(x, 10) # default pads on left
str_pad(x, 10, "both")
## -----------------------------------------------------------------------------
str_pad(x, 4)
## -----------------------------------------------------------------------------
x <- c("Short", "This is a long string")
x %>%
str_trunc(10) %>%
str_pad(10, "right")
## -----------------------------------------------------------------------------
x <- c(" a ", "b ", " c")
str_trim(x)
str_trim(x, "left")
## -----------------------------------------------------------------------------
jabberwocky <- str_c(
"`Twas brillig, and the slithy toves ",
"did gyre and gimble in the wabe: ",
"All mimsy were the borogoves, ",
"and the mome raths outgrabe. "
)
cat(str_wrap(jabberwocky, width = 40))
## -----------------------------------------------------------------------------
x <- "I like horses."
str_to_upper(x)
str_to_title(x)
str_to_lower(x)
# Turkish has two sorts of i: with and without the dot
str_to_lower(x, "tr")
## -----------------------------------------------------------------------------
x <- c("y", "i", "k")
str_order(x)
str_sort(x)
# In Lithuanian, y comes between i and k
str_sort(x, locale = "lt")
## -----------------------------------------------------------------------------
strings <- c(
"apple",
"219 733 8965",
"329-293-8753",
"Work: 579-499-7527; Home: 543.355.3679"
)
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
## -----------------------------------------------------------------------------
# Which strings contain phone numbers?
str_detect(strings, phone)
str_subset(strings, phone)
## -----------------------------------------------------------------------------
# How many phone numbers in each string?
str_count(strings, phone)
## -----------------------------------------------------------------------------
# Where in the string is the phone number located?
(loc <- str_locate(strings, phone))
str_locate_all(strings, phone)
## -----------------------------------------------------------------------------
# What are the phone numbers?
str_extract(strings, phone)
str_extract_all(strings, phone)
str_extract_all(strings, phone, simplify = TRUE)
## -----------------------------------------------------------------------------
# Pull out the three components of the match
str_match(strings, phone)
str_match_all(strings, phone)
## -----------------------------------------------------------------------------
str_replace(strings, phone, "XXX-XXX-XXXX")
str_replace_all(strings, phone, "XXX-XXX-XXXX")
## -----------------------------------------------------------------------------
str_split("a-b-c", "-")
str_split_fixed("a-b-c", "-", n = 2)
## -----------------------------------------------------------------------------
a1 <- "\u00e1"
a2 <- "a\u0301"
c(a1, a2)
a1 == a2
## -----------------------------------------------------------------------------
str_detect(a1, fixed(a2))
str_detect(a1, coll(a2))
## -----------------------------------------------------------------------------
i <- c("I", "İ", "i", "ı")
i
str_subset(i, coll("i", ignore_case = TRUE))
str_subset(i, coll("i", ignore_case = TRUE, locale = "tr"))
## -----------------------------------------------------------------------------
x <- "This is a sentence."
str_split(x, boundary("word"))
str_count(x, boundary("word"))
str_extract_all(x, boundary("word"))
## -----------------------------------------------------------------------------
str_split(x, "")
str_count(x, "")