356 lines
12 KiB
R
356 lines
12 KiB
R
|
|
## tests for digest, taken from the examples in the manual page
|
|
|
|
suppressMessages(library(digest))
|
|
|
|
## Standard RFC 1321 test vectors
|
|
md5Input <-
|
|
c("",
|
|
"a",
|
|
"abc",
|
|
"message digest",
|
|
"abcdefghijklmnopqrstuvwxyz",
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
|
|
paste("12345678901234567890123456789012345678901234567890123456789012",
|
|
"345678901234567890", sep=""))
|
|
md5Output <-
|
|
c("d41d8cd98f00b204e9800998ecf8427e",
|
|
"0cc175b9c0f1b6a831c399e269772661",
|
|
"900150983cd24fb0d6963f7d28e17f72",
|
|
"f96b697d7cb7938d525a2f31aaf161d0",
|
|
"c3fcd3d76192e4007dfb496cca67e13b",
|
|
"d174ab98d277d9f5a5611c2c9f419d9f",
|
|
"57edf4a22be3c955ac49da2e2107b67a")
|
|
|
|
for (i in seq(along.with=md5Input)) {
|
|
md5 <- digest(md5Input[i], serialize=FALSE)
|
|
expect_true(identical(md5, md5Output[i]))
|
|
#cat(md5, "\n")
|
|
}
|
|
|
|
md5 <- getVDigest()
|
|
expect_identical(md5(md5Input, serialize = FALSE), md5Output)
|
|
|
|
expect_identical(digest(NULL),
|
|
md5(NULL))
|
|
expect_identical(digest(character(0)),
|
|
md5(character(0)))
|
|
expect_identical(digest(list("abc")),
|
|
md5(list(list("abc"))))
|
|
expect_identical(digest(list(NULL)),
|
|
md5(list(list(NULL))))
|
|
expect_identical(digest(character(0), serialize = FALSE),
|
|
md5(character(0), serialize = FALSE))
|
|
|
|
|
|
## md5 raw output test
|
|
for (i in seq(along.with=md5Input)) {
|
|
md5 <- digest(md5Input[i], serialize=FALSE, raw=TRUE)
|
|
md5 <- gsub(" ","",capture.output(cat(md5)))
|
|
expect_true(identical(md5, md5Output[i]))
|
|
#cat(md5, "\n")
|
|
}
|
|
|
|
sha1Input <-
|
|
c("abc",
|
|
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
|
NULL)
|
|
sha1Output <-
|
|
c("a9993e364706816aba3e25717850c26c9cd0d89d",
|
|
"84983e441c3bd26ebaae4aa1f95129e5e54670f1",
|
|
"34aa973cd4c4daa4f61eeb2bdbad27316534016f")
|
|
|
|
for (i in seq(along.with=sha1Input)) {
|
|
sha1 <- digest(sha1Input[i], algo="sha1", serialize=FALSE)
|
|
expect_true(identical(sha1, sha1Output[i]))
|
|
#cat(sha1, "\n")
|
|
}
|
|
|
|
sha1 <- getVDigest(algo = 'sha1')
|
|
expect_identical(sha1(sha1Input, serialize = FALSE), sha1Output[1:2])
|
|
|
|
## sha1 raw output test
|
|
for (i in seq(along.with=sha1Input)) {
|
|
sha1 <- digest(sha1Input[i], algo="sha1", serialize=FALSE, raw=TRUE)
|
|
#print(sha1)
|
|
sha1 <- gsub(" ","",capture.output(cat(sha1)))
|
|
#print(sha1)
|
|
#print(sha1Output[i])
|
|
expect_true(identical(sha1, sha1Output[i]))
|
|
#cat(sha1, "\n")
|
|
}
|
|
|
|
## sha512 test
|
|
sha512Input <-c(
|
|
"",
|
|
"The quick brown fox jumps over the lazy dog."
|
|
)
|
|
sha512Output <- c(
|
|
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
|
|
"91ea1245f20d46ae9a037a989f54f1f790f0a47607eeb8a14d12890cea77a1bbc6c7ed9cf205e67b7f2b8fd4c7dfd3a7a8617e45f3c463d481c7e586c39ac1ed")
|
|
|
|
for (i in seq(along.with=sha512Input)) {
|
|
sha512 <- digest(sha512Input[i], algo="sha512", serialize=FALSE)
|
|
expect_true(identical(sha512, sha512Output[i]))
|
|
#cat(sha512, "\n")
|
|
}
|
|
|
|
sha512 <- getVDigest(algo = 'sha512')
|
|
expect_identical(sha512(sha512Input, serialize = FALSE), sha512Output[1:2])
|
|
|
|
## sha512 raw output test
|
|
for (i in seq(along.with=sha512Input)) {
|
|
sha512 <- digest(sha512Input[i], algo="sha512", serialize=FALSE, raw=TRUE)
|
|
#print(sha512)
|
|
|
|
sha512 <- gsub(" ","",capture.output(cat(sha512)))
|
|
#print(sha512)
|
|
#print(sha512Output[i])
|
|
expect_true(identical(sha512, sha512Output[i]))
|
|
#cat(sha512, "\n")
|
|
}
|
|
|
|
crc32Input <-
|
|
c("abc",
|
|
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
|
NULL)
|
|
crc32Output <-
|
|
c("352441c2",
|
|
"171a3f5f",
|
|
"2ef80172")
|
|
|
|
for (i in seq(along.with=crc32Input)) {
|
|
crc32 <- digest(crc32Input[i], algo="crc32", serialize=FALSE)
|
|
expect_true(identical(crc32, crc32Output[i]))
|
|
#cat(crc32, "\n")
|
|
}
|
|
|
|
crc32 <- getVDigest(algo = 'crc32')
|
|
expect_identical(crc32(crc32Input, serialize = FALSE), crc32Output[1:2])
|
|
|
|
|
|
## one of the FIPS-
|
|
sha1 <- digest("abc", algo="sha1", serialize=FALSE)
|
|
expect_true(identical(sha1, "a9993e364706816aba3e25717850c26c9cd0d89d"))
|
|
|
|
## This one seems to give slightly different output depending on the R version used
|
|
##
|
|
## # example of a digest of a standard R list structure
|
|
## cat(digest(list(LETTERS, data.frame(a=letters[1:5],
|
|
## b=matrix(1:10,
|
|
## ncol=2)))), "\n")
|
|
|
|
## these outputs were calculated using xxh32sum
|
|
## [ Correction: These reproduce via the Python xxhash package and its hexdigest() output
|
|
## but not the xxh64sum command-line tool as the original comment here implies. ]
|
|
xxhash32Input <-
|
|
c("abc",
|
|
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
|
"")
|
|
xxhash32Output <-
|
|
c("32d153ff",
|
|
"89ea60c3",
|
|
"02cc5d05")
|
|
|
|
for (i in seq(along.with=xxhash32Input)) {
|
|
xxhash32 <- digest(xxhash32Input[i], algo="xxhash32", serialize=FALSE)
|
|
#cat(xxhash32, "\n")
|
|
expect_true(identical(xxhash32, xxhash32Output[i]))
|
|
}
|
|
|
|
xxhash32 <- getVDigest(algo = 'xxhash32')
|
|
expect_identical(xxhash32(xxhash32Input, serialize = FALSE), xxhash32Output)
|
|
|
|
|
|
## these outputs were calculated using xxh64sum
|
|
## [ Correction: These reproduce via the Python xxhash package and its hexdigest() output
|
|
## but not the xxh64sum command-line tool as the original comment here implies. ]
|
|
xxhash64Input <-
|
|
c("abc",
|
|
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
|
"")
|
|
xxhash64Output <-
|
|
c("44bc2cf5ad770999",
|
|
"f06103773e8585df",
|
|
"ef46db3751d8e999")
|
|
|
|
for (i in seq(along.with=xxhash64Input)) {
|
|
xxhash64 <- digest(xxhash64Input[i], algo="xxhash64", serialize=FALSE)
|
|
#cat(xxhash64, "\n")
|
|
expect_true(identical(xxhash64, xxhash64Output[i]))
|
|
}
|
|
|
|
xxhash64 <- getVDigest(algo = 'xxhash64')
|
|
expect_identical(xxhash64(xxhash64Input, serialize = FALSE), xxhash64Output)
|
|
|
|
|
|
## these outputs were calculated using mmh3 python package
|
|
## the first two are also shown at this StackOverflow question on test vectors
|
|
## https://stackoverflow.com/questions/14747343/murmurhash3-test-vectors
|
|
murmur32Input <-
|
|
c("abc",
|
|
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
|
"")
|
|
murmur32Output <-
|
|
c("b3dd93fa",
|
|
"ee925b90",
|
|
"00000000")
|
|
|
|
for (i in seq(along.with=murmur32Input)) {
|
|
murmur32 <- digest(murmur32Input[i], algo="murmur32", serialize=FALSE)
|
|
#cat(murmur32, "\n")
|
|
expect_true(identical(murmur32, murmur32Output[i]))
|
|
}
|
|
|
|
murmur32 <- getVDigest(algo = 'murmur32')
|
|
expect_identical(murmur32(murmur32Input, serialize = FALSE), murmur32Output)
|
|
|
|
|
|
## tests for digest spooky
|
|
|
|
## Per PR 205, see comment in https://github.com/facebook/folly/blob/4c603f8c2add8d0228de0e073c5ae3ce9b02b6f3/folly/hash/SpookyHashV2.h#L35-L36
|
|
## Values ought to be sensible on big endian too but different from little endian reference
|
|
## so we do not test on big endian
|
|
if (isTRUE(.Call(digest:::is_little_endian))) {
|
|
|
|
## test vectors (originally for md5)
|
|
spookyInput <- c("",
|
|
"a",
|
|
"abc",
|
|
"message digest",
|
|
"abcdefghijklmnopqrstuvwxyz",
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
|
|
paste("12345678901234567890123456789012345678901234567890123456789012",
|
|
"345678901234567890", sep=""))
|
|
|
|
# from spooky import hash128
|
|
# from binascii import hexlify
|
|
#
|
|
# spookyInput = [
|
|
# "",
|
|
# "a",
|
|
# "abc",
|
|
# "message digest",
|
|
# "abcdefghijklmnopqrstuvwxyz",
|
|
# "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
|
|
# "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
|
|
# ]
|
|
#
|
|
# for s in spookyInput:
|
|
# hexlify(hash128(s).to_bytes(16, 'little')).decode()
|
|
#
|
|
# '1909f56bfc062723c751e8b465ee728b'
|
|
# 'bdc9bba09181101a922a4161f0584275'
|
|
# '67c93775f715ab8ab01178caf86713c6'
|
|
# '9630c2a55c0987a0db44434f9d67a192'
|
|
# '5172de938ce149a98f4d06d3c3168ffe'
|
|
# 'b5b3b2d0f08b58aa07f551895f929f81'
|
|
# '3621ec01112dafa1610a4bd23041966b'
|
|
|
|
spookyOutputPython <- c('1909f56bfc062723c751e8b465ee728b',
|
|
'bdc9bba09181101a922a4161f0584275',
|
|
'67c93775f715ab8ab01178caf86713c6',
|
|
'9630c2a55c0987a0db44434f9d67a192',
|
|
'5172de938ce149a98f4d06d3c3168ffe',
|
|
'b5b3b2d0f08b58aa07f551895f929f81',
|
|
'3621ec01112dafa1610a4bd23041966b')
|
|
|
|
## spooky raw output test
|
|
for (i in seq(along.with=spookyInput)) {
|
|
# skip = 30 skips the entire serialization header for a length 1 character vector
|
|
# this is equivalent to raw = TRUE and matches the python spooky implementation for those vectors
|
|
spooky <- digest(spookyInput[i], algo = "spookyhash", skip = 30)
|
|
expect_true(identical(spooky, spookyOutputPython[i]))
|
|
#cat(spooky, "\n")
|
|
}
|
|
|
|
expect_identical(
|
|
getVDigest(algo = 'spookyhash')(spookyInput, skip = 30),
|
|
spookyOutputPython
|
|
)
|
|
|
|
## some extras to get coverage up - these aren't tested against reference output,
|
|
## just output from R 3.6.0
|
|
spookyInput <- c("a", "aaaaaaaaa", "aaaaaaaaaaaaa")
|
|
spookyOutput <- c("b7a3573ba6139dfdc52db30acba87f46",
|
|
"fd876ecaa5d1e442600333118f223e02",
|
|
"91848873bf91d06ad321bbd47400a556")
|
|
for (i in seq(along.with=spookyInput)) {
|
|
spooky <- digest(spookyInput[i], algo = "spookyhash")
|
|
expect_true(identical(spooky, spookyOutput[i]))
|
|
##cat(spooky, "\n")
|
|
}
|
|
|
|
expect_identical(
|
|
getVDigest(algo = 'spookyhash')(spookyInput),
|
|
spookyOutput
|
|
)
|
|
|
|
## test a bigger object
|
|
spooky <- digest(iris, algo = "spookyhash")
|
|
expect_true(identical(spooky, "af58add8b4f7044582b331083bc239ff"))
|
|
expect_identical(getVDigest('spookyhash')(list(iris)),
|
|
"af58add8b4f7044582b331083bc239ff")
|
|
##cat(spooky, "\n")
|
|
|
|
# test error message
|
|
#error.message <- try(digest(spookyInput[i], algo = "spookyhash", serialize = FALSE))
|
|
#expect_true(
|
|
# grepl("spookyhash algorithm is not available without serialization.", error.message)
|
|
#)
|
|
}
|
|
|
|
## Ensure that all values of algo are actually allowed (in case a new one is
|
|
## added in the future). The call to match.arg() passes choices explicitly
|
|
## because it is significantly faster to do it than to have it automatically
|
|
## infer the possible choices from the function's formals.
|
|
|
|
# Grab the possible values of algo, then call digest() for each one.
|
|
algos <- eval(formals(digest)$algo)
|
|
for (algo in algos) {
|
|
digest(123, algo = algo)
|
|
}
|
|
# Same for getVDigest
|
|
algos <- eval(formals(getVDigest)$algo)
|
|
for (algo in algos) {
|
|
getVDigest(algo = algo)
|
|
}
|
|
|
|
|
|
## xxhash h3_64 variant
|
|
## reference values computed via xxhash and its xxh3_64 object and hexdiges printer:
|
|
## ie print(xxhash.xxh3_64("abc").hexdigest())
|
|
xxh3_64Input <- c("abc",
|
|
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
|
"")
|
|
xxh3_64Output <- c("78af5f94892f3950",
|
|
"5bbcbbabcdcc3d3f",
|
|
"2d06800538d394c2")
|
|
for (i in seq(along.with=xxh3_64Input)) {
|
|
xxh3_64 <- digest(xxh3_64Input[i], algo="xxh3_64", serialize=FALSE)
|
|
#cat(xxh3_64, "\n")
|
|
expect_true(identical(xxh3_64, xxh3_64Output[i]))
|
|
}
|
|
|
|
xxh3_64 <- getVDigest(algo = 'xxh3_64')
|
|
expect_identical(xxh3_64(xxh3_64Input, serialize = FALSE), xxh3_64Output)
|
|
|
|
|
|
## xxhash h3_128 variant
|
|
## reference values computed via xxhash and its xxh3_128 object and hexdiges printer:
|
|
## ie print(xxhash.xxh3_128("abc").hexdigest())
|
|
xxh3_128Input <- c("abc",
|
|
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
|
|
"")
|
|
xxh3_128Output <- c("06b05ab6733a618578af5f94892f3950",
|
|
"3d62d22a5169b016c0d894fd4828a1a7",
|
|
"99aa06d3014798d86001c324468d497f")
|
|
for (i in seq(along.with=xxh3_128Input)) {
|
|
xxh3_128 <- digest(xxh3_128Input[i], algo="xxh3_128", serialize=FALSE)
|
|
#cat(xxh3_128, "\n")
|
|
expect_true(identical(xxh3_128, xxh3_128Output[i]))
|
|
}
|
|
|
|
xxh3_128 <- getVDigest(algo = 'xxh3_128')
|
|
expect_identical(xxh3_128(xxh3_128Input, serialize = FALSE), xxh3_128Output)
|