2025-01-12 04:36:52 +08:00

327 lines
9.5 KiB
R

## -----------------------------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library(stringr)
library(magrittr)
## -----------------------------------------------------------------------------
data_stringr_base_diff <- tibble::tribble(
~stringr, ~base_r,
"str_detect(string, pattern)", "grepl(pattern, x)",
"str_dup(string, times)", "strrep(x, times)",
"str_extract(string, pattern)", "regmatches(x, m = regexpr(pattern, text))",
"str_extract_all(string, pattern)", "regmatches(x, m = gregexpr(pattern, text))",
"str_length(string)", "nchar(x)",
"str_locate(string, pattern)", "regexpr(pattern, text)",
"str_locate_all(string, pattern)", "gregexpr(pattern, text)",
"str_match(string, pattern)", "regmatches(x, m = regexec(pattern, text))",
"str_order(string)", "order(...)",
"str_replace(string, pattern, replacement)", "sub(pattern, replacement, x)",
"str_replace_all(string, pattern, replacement)", "gsub(pattern, replacement, x)",
"str_sort(string)", "sort(x)",
"str_split(string, pattern)", "strsplit(x, split)",
"str_sub(string, start, end)", "substr(x, start, stop)",
"str_subset(string, pattern)", "grep(pattern, x, value = TRUE)",
"str_to_lower(string)", "tolower(x)",
"str_to_title(string)", "tools::toTitleCase(text)",
"str_to_upper(string)", "toupper(x)",
"str_trim(string)", "trimws(x)",
"str_which(string, pattern)", "grep(pattern, x)",
"str_wrap(string)", "strwrap(x)"
)
# create MD table, arranged alphabetically by stringr fn name
data_stringr_base_diff %>%
dplyr::mutate(dplyr::across(.fns = ~ paste0("`", .x, "`"))) %>%
dplyr::arrange(stringr) %>%
dplyr::rename(`base R` = base_r) %>%
gt::gt() %>%
gt::fmt_markdown(columns = everything()) %>%
gt::tab_options(column_labels.font.weight = "bold")
## -----------------------------------------------------------------------------
fruit <- c("apple", "banana", "pear", "pineapple")
# base
grepl(pattern = "a", x = fruit)
# stringr
str_detect(fruit, pattern = "a")
## -----------------------------------------------------------------------------
# base
grep(pattern = "a", x = fruit)
# stringr
str_which(fruit, pattern = "a")
## -----------------------------------------------------------------------------
# base
loc <- gregexpr(pattern = "a", text = fruit, fixed = TRUE)
sapply(loc, function(x) length(attr(x, "match.length")))
# stringr
str_count(fruit, pattern = "a")
## -----------------------------------------------------------------------------
fruit3 <- c("papaya", "lime", "apple")
# base
str(gregexpr(pattern = "p", text = fruit3))
# stringr
str_locate(fruit3, pattern = "p")
str_locate_all(fruit3, pattern = "p")
## -----------------------------------------------------------------------------
hw <- "Hadley Wickham"
# base
substr(hw, start = 1, stop = 6)
substring(hw, first = 1)
# stringr
str_sub(hw, start = 1, end = 6)
str_sub(hw, start = 1)
str_sub(hw, end = 6)
## -----------------------------------------------------------------------------
str_sub(hw, start = 1, end = -1)
str_sub(hw, start = -5, end = -2)
## -----------------------------------------------------------------------------
al <- "Ada Lovelace"
# base
substr(c(hw,al), start = 1, stop = 6)
substr(c(hw,al), start = c(1,1), stop = c(6,7))
# stringr
str_sub(c(hw,al), start = 1, end = -1)
str_sub(c(hw,al), start = c(1,1), end = c(-1,-2))
## -----------------------------------------------------------------------------
str_sub(hw, start = 1:5)
## -----------------------------------------------------------------------------
substr(hw, start = 1:5, stop = 15)
## -----------------------------------------------------------------------------
# base
x <- "ABCDEF"
substr(x, 1, 3) <- "x"
x
## -----------------------------------------------------------------------------
# stringr
x <- "ABCDEF"
str_sub(x, 1, 3) <- "x"
x
## -----------------------------------------------------------------------------
# base
grep(pattern = "g", x = fruit, value = TRUE)
# stringr
str_subset(fruit, pattern = "g")
## -----------------------------------------------------------------------------
shopping_list <- c("apples x4", "bag of flour", "10", "milk x2")
# base
matches <- regexpr(pattern = "\\d+", text = shopping_list) # digits
regmatches(shopping_list, m = matches)
matches <- gregexpr(pattern = "[a-z]+", text = shopping_list) # words
regmatches(shopping_list, m = matches)
# stringr
str_extract(shopping_list, pattern = "\\d+")
str_extract_all(shopping_list, "[a-z]+")
## -----------------------------------------------------------------------------
head(sentences)
noun <- "([A]a|[Tt]he) ([^ ]+)"
# base
matches <- regexec(pattern = noun, text = head(sentences))
do.call("rbind", regmatches(x = head(sentences), m = matches))
# stringr
str_match(head(sentences), pattern = noun)
## -----------------------------------------------------------------------------
# base
nchar(letters)
# stringr
str_length(letters)
## -----------------------------------------------------------------------------
# base
nchar(factor("abc"))
## -----------------------------------------------------------------------------
# stringr
str_length(factor("abc"))
## -----------------------------------------------------------------------------
x <- c("\u00fc", "u\u0308")
x
nchar(x)
str_length(x)
## -----------------------------------------------------------------------------
# base
sprintf("%30s", "hadley")
sprintf("%-30s", "hadley")
# "both" is not as straightforward
# stringr
rbind(
str_pad("hadley", 30, "left"),
str_pad("hadley", 30, "right"),
str_pad("hadley", 30, "both")
)
## -----------------------------------------------------------------------------
x <- "This string is moderately long"
# stringr
rbind(
str_trunc(x, 20, "right"),
str_trunc(x, 20, "left"),
str_trunc(x, 20, "center")
)
## -----------------------------------------------------------------------------
# base
trimws(" String with trailing and leading white space\t")
trimws("\n\nString with trailing and leading white space\n\n")
# stringr
str_trim(" String with trailing and leading white space\t")
str_trim("\n\nString with trailing and leading white space\n\n")
## -----------------------------------------------------------------------------
# stringr
str_squish(" String with trailing, middle, and leading white space\t")
str_squish("\n\nString with excess, trailing and leading white space\n\n")
## -----------------------------------------------------------------------------
gettysburg <- "Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal."
# base
cat(strwrap(gettysburg, width = 60), sep = "\n")
# stringr
cat(str_wrap(gettysburg, width = 60), "\n")
## -----------------------------------------------------------------------------
fruits <- c("apple", "banana", "pear", "pineapple")
# base
sub("[aeiou]", "-", fruits)
gsub("[aeiou]", "-", fruits)
# stringr
str_replace(fruits, "[aeiou]", "-")
str_replace_all(fruits, "[aeiou]", "-")
## -----------------------------------------------------------------------------
dog <- "The quick brown dog"
# base
toupper(dog)
tolower(dog)
tools::toTitleCase(dog)
# stringr
str_to_upper(dog)
str_to_lower(dog)
str_to_title(dog)
## -----------------------------------------------------------------------------
# stringr
str_to_upper("i") # English
str_to_upper("i", locale = "tr") # Turkish
## -----------------------------------------------------------------------------
# base
paste0(letters, collapse = "-")
# stringr
str_flatten(letters, collapse = "-")
## -----------------------------------------------------------------------------
fruit <- c("apple", "pear", "banana")
# base
strrep(fruit, 2)
strrep(fruit, 1:3)
# stringr
str_dup(fruit, 2)
str_dup(fruit, 1:3)
## -----------------------------------------------------------------------------
fruits <- c(
"apples and oranges and pears and bananas",
"pineapples and mangos and guavas"
)
# base
strsplit(fruits, " and ")
# stringr
str_split(fruits, " and ")
## -----------------------------------------------------------------------------
# stringr
str_split(fruits, " and ", n = 3)
str_split(fruits, " and ", n = 2)
## -----------------------------------------------------------------------------
name <- "Fred"
age <- 50
anniversary <- as.Date("1991-10-12")
# base
sprintf(
"My name is %s my age next year is %s and my anniversary is %s.",
name,
age + 1,
format(anniversary, "%A, %B %d, %Y")
)
# stringr
str_glue(
"My name is {name}, ",
"my age next year is {age + 1}, ",
"and my anniversary is {format(anniversary, '%A, %B %d, %Y')}."
)
## -----------------------------------------------------------------------------
# base
order(letters)
sort(letters)
# stringr
str_order(letters)
str_sort(letters)
## -----------------------------------------------------------------------------
x <- c("å", "a", "z")
str_sort(x)
str_sort(x, locale = "no")
## -----------------------------------------------------------------------------
# stringr
x <- c("100a10", "100a5", "2b", "2a")
str_sort(x)
str_sort(x, numeric = TRUE)