327 lines
9.5 KiB
R
327 lines
9.5 KiB
R
## -----------------------------------------------------------------------------
|
|
knitr::opts_chunk$set(
|
|
collapse = TRUE,
|
|
comment = "#>"
|
|
)
|
|
|
|
library(stringr)
|
|
library(magrittr)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
data_stringr_base_diff <- tibble::tribble(
|
|
~stringr, ~base_r,
|
|
"str_detect(string, pattern)", "grepl(pattern, x)",
|
|
"str_dup(string, times)", "strrep(x, times)",
|
|
"str_extract(string, pattern)", "regmatches(x, m = regexpr(pattern, text))",
|
|
"str_extract_all(string, pattern)", "regmatches(x, m = gregexpr(pattern, text))",
|
|
"str_length(string)", "nchar(x)",
|
|
"str_locate(string, pattern)", "regexpr(pattern, text)",
|
|
"str_locate_all(string, pattern)", "gregexpr(pattern, text)",
|
|
"str_match(string, pattern)", "regmatches(x, m = regexec(pattern, text))",
|
|
"str_order(string)", "order(...)",
|
|
"str_replace(string, pattern, replacement)", "sub(pattern, replacement, x)",
|
|
"str_replace_all(string, pattern, replacement)", "gsub(pattern, replacement, x)",
|
|
"str_sort(string)", "sort(x)",
|
|
"str_split(string, pattern)", "strsplit(x, split)",
|
|
"str_sub(string, start, end)", "substr(x, start, stop)",
|
|
"str_subset(string, pattern)", "grep(pattern, x, value = TRUE)",
|
|
"str_to_lower(string)", "tolower(x)",
|
|
"str_to_title(string)", "tools::toTitleCase(text)",
|
|
"str_to_upper(string)", "toupper(x)",
|
|
"str_trim(string)", "trimws(x)",
|
|
"str_which(string, pattern)", "grep(pattern, x)",
|
|
"str_wrap(string)", "strwrap(x)"
|
|
)
|
|
|
|
# create MD table, arranged alphabetically by stringr fn name
|
|
data_stringr_base_diff %>%
|
|
dplyr::mutate(dplyr::across(.fns = ~ paste0("`", .x, "`"))) %>%
|
|
dplyr::arrange(stringr) %>%
|
|
dplyr::rename(`base R` = base_r) %>%
|
|
gt::gt() %>%
|
|
gt::fmt_markdown(columns = everything()) %>%
|
|
gt::tab_options(column_labels.font.weight = "bold")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
fruit <- c("apple", "banana", "pear", "pineapple")
|
|
|
|
# base
|
|
grepl(pattern = "a", x = fruit)
|
|
|
|
# stringr
|
|
str_detect(fruit, pattern = "a")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
grep(pattern = "a", x = fruit)
|
|
|
|
# stringr
|
|
str_which(fruit, pattern = "a")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
loc <- gregexpr(pattern = "a", text = fruit, fixed = TRUE)
|
|
sapply(loc, function(x) length(attr(x, "match.length")))
|
|
|
|
# stringr
|
|
str_count(fruit, pattern = "a")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
fruit3 <- c("papaya", "lime", "apple")
|
|
|
|
# base
|
|
str(gregexpr(pattern = "p", text = fruit3))
|
|
|
|
# stringr
|
|
str_locate(fruit3, pattern = "p")
|
|
str_locate_all(fruit3, pattern = "p")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
hw <- "Hadley Wickham"
|
|
|
|
# base
|
|
substr(hw, start = 1, stop = 6)
|
|
substring(hw, first = 1)
|
|
|
|
# stringr
|
|
str_sub(hw, start = 1, end = 6)
|
|
str_sub(hw, start = 1)
|
|
str_sub(hw, end = 6)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
str_sub(hw, start = 1, end = -1)
|
|
str_sub(hw, start = -5, end = -2)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
al <- "Ada Lovelace"
|
|
|
|
# base
|
|
substr(c(hw,al), start = 1, stop = 6)
|
|
substr(c(hw,al), start = c(1,1), stop = c(6,7))
|
|
|
|
# stringr
|
|
str_sub(c(hw,al), start = 1, end = -1)
|
|
str_sub(c(hw,al), start = c(1,1), end = c(-1,-2))
|
|
|
|
## -----------------------------------------------------------------------------
|
|
str_sub(hw, start = 1:5)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
substr(hw, start = 1:5, stop = 15)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
x <- "ABCDEF"
|
|
substr(x, 1, 3) <- "x"
|
|
x
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# stringr
|
|
x <- "ABCDEF"
|
|
str_sub(x, 1, 3) <- "x"
|
|
x
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
grep(pattern = "g", x = fruit, value = TRUE)
|
|
|
|
# stringr
|
|
str_subset(fruit, pattern = "g")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
shopping_list <- c("apples x4", "bag of flour", "10", "milk x2")
|
|
|
|
# base
|
|
matches <- regexpr(pattern = "\\d+", text = shopping_list) # digits
|
|
regmatches(shopping_list, m = matches)
|
|
|
|
matches <- gregexpr(pattern = "[a-z]+", text = shopping_list) # words
|
|
regmatches(shopping_list, m = matches)
|
|
|
|
# stringr
|
|
str_extract(shopping_list, pattern = "\\d+")
|
|
str_extract_all(shopping_list, "[a-z]+")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
head(sentences)
|
|
noun <- "([A]a|[Tt]he) ([^ ]+)"
|
|
|
|
# base
|
|
matches <- regexec(pattern = noun, text = head(sentences))
|
|
do.call("rbind", regmatches(x = head(sentences), m = matches))
|
|
|
|
# stringr
|
|
str_match(head(sentences), pattern = noun)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
nchar(letters)
|
|
|
|
# stringr
|
|
str_length(letters)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
nchar(factor("abc"))
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# stringr
|
|
str_length(factor("abc"))
|
|
|
|
## -----------------------------------------------------------------------------
|
|
x <- c("\u00fc", "u\u0308")
|
|
x
|
|
|
|
nchar(x)
|
|
str_length(x)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
sprintf("%30s", "hadley")
|
|
sprintf("%-30s", "hadley")
|
|
# "both" is not as straightforward
|
|
|
|
# stringr
|
|
rbind(
|
|
str_pad("hadley", 30, "left"),
|
|
str_pad("hadley", 30, "right"),
|
|
str_pad("hadley", 30, "both")
|
|
)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
x <- "This string is moderately long"
|
|
|
|
# stringr
|
|
rbind(
|
|
str_trunc(x, 20, "right"),
|
|
str_trunc(x, 20, "left"),
|
|
str_trunc(x, 20, "center")
|
|
)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
trimws(" String with trailing and leading white space\t")
|
|
trimws("\n\nString with trailing and leading white space\n\n")
|
|
|
|
# stringr
|
|
str_trim(" String with trailing and leading white space\t")
|
|
str_trim("\n\nString with trailing and leading white space\n\n")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# stringr
|
|
str_squish(" String with trailing, middle, and leading white space\t")
|
|
str_squish("\n\nString with excess, trailing and leading white space\n\n")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
gettysburg <- "Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal."
|
|
|
|
# base
|
|
cat(strwrap(gettysburg, width = 60), sep = "\n")
|
|
|
|
# stringr
|
|
cat(str_wrap(gettysburg, width = 60), "\n")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
fruits <- c("apple", "banana", "pear", "pineapple")
|
|
|
|
# base
|
|
sub("[aeiou]", "-", fruits)
|
|
gsub("[aeiou]", "-", fruits)
|
|
|
|
# stringr
|
|
str_replace(fruits, "[aeiou]", "-")
|
|
str_replace_all(fruits, "[aeiou]", "-")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
dog <- "The quick brown dog"
|
|
|
|
# base
|
|
toupper(dog)
|
|
tolower(dog)
|
|
tools::toTitleCase(dog)
|
|
|
|
# stringr
|
|
str_to_upper(dog)
|
|
str_to_lower(dog)
|
|
str_to_title(dog)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# stringr
|
|
str_to_upper("i") # English
|
|
str_to_upper("i", locale = "tr") # Turkish
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
paste0(letters, collapse = "-")
|
|
|
|
# stringr
|
|
str_flatten(letters, collapse = "-")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
fruit <- c("apple", "pear", "banana")
|
|
|
|
# base
|
|
strrep(fruit, 2)
|
|
strrep(fruit, 1:3)
|
|
|
|
# stringr
|
|
str_dup(fruit, 2)
|
|
str_dup(fruit, 1:3)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
fruits <- c(
|
|
"apples and oranges and pears and bananas",
|
|
"pineapples and mangos and guavas"
|
|
)
|
|
# base
|
|
strsplit(fruits, " and ")
|
|
|
|
# stringr
|
|
str_split(fruits, " and ")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# stringr
|
|
str_split(fruits, " and ", n = 3)
|
|
str_split(fruits, " and ", n = 2)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
name <- "Fred"
|
|
age <- 50
|
|
anniversary <- as.Date("1991-10-12")
|
|
|
|
# base
|
|
sprintf(
|
|
"My name is %s my age next year is %s and my anniversary is %s.",
|
|
name,
|
|
age + 1,
|
|
format(anniversary, "%A, %B %d, %Y")
|
|
)
|
|
|
|
# stringr
|
|
str_glue(
|
|
"My name is {name}, ",
|
|
"my age next year is {age + 1}, ",
|
|
"and my anniversary is {format(anniversary, '%A, %B %d, %Y')}."
|
|
)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# base
|
|
order(letters)
|
|
sort(letters)
|
|
|
|
# stringr
|
|
str_order(letters)
|
|
str_sort(letters)
|
|
|
|
## -----------------------------------------------------------------------------
|
|
x <- c("å", "a", "z")
|
|
str_sort(x)
|
|
str_sort(x, locale = "no")
|
|
|
|
## -----------------------------------------------------------------------------
|
|
# stringr
|
|
x <- c("100a10", "100a5", "2b", "2a")
|
|
str_sort(x)
|
|
str_sort(x, numeric = TRUE)
|
|
|