2025-01-12 04:36:52 +08:00

421 lines
15 KiB
R

test_DataFrame_construction <- function() {
score <- c(X=1L, Y=3L, Z=NA)
counts <- c(10L, 2L, NA)
## na in rn
checkException(DataFrame(score, row.names = c("a", NA, "b")), silent = TRUE)
## invalid rn length
checkException(DataFrame(score, row.names = "a"), silent = TRUE)
DF <- DataFrame() # no args
checkTrue(validObject(DF))
row.names <- c("one", "two", "three")
DF <- DataFrame(row.names = row.names) # no args, but row.names
checkTrue(validObject(DF))
checkIdentical(rownames(DF), row.names)
DF <- DataFrame(score) # single, unnamed arg
checkTrue(validObject(DF))
checkIdentical(DF[["score"]], score)
DF <- DataFrame(score, row.names = row.names) # with row names
checkTrue(validObject(DF))
checkIdentical(rownames(DF), row.names)
## dups in rn
row.names = c("a", "b", "a")
DF <- DataFrame(score, row.names = row.names)
checkTrue(validObject(DF))
checkIdentical(rownames(DF), row.names)
DF <- DataFrame(score=setNames(score, row.names))
checkTrue(validObject(DF))
checkIdentical(rownames(DF), row.names)
DF <- DataFrame(vals = score) # named vector arg
checkTrue(validObject(DF))
checkIdentical(DF[["vals"]], score)
DF <- DataFrame(counts, vals = score) # mixed named and unnamed
checkTrue(validObject(DF))
checkIdentical(DF[["vals"]], score)
checkIdentical(DF[["counts"]], counts)
DF <- DataFrame(score + score) # unnamed arg with invalid name expression
checkTrue(validObject(DF))
checkIdentical(DF[["score...score"]], score + score)
mat <- cbind(score)
DF <- DataFrame(mat) # single column matrix with column name
checkTrue(validObject(DF))
checkIdentical(DF[["score"]], unname(score))
mat <- cbind(score, counts)
DF <- DataFrame(mat) # two column matrix with col names
checkTrue(validObject(DF))
checkIdentical(DF[["score"]], unname(score))
checkIdentical(DF[["counts"]], counts)
colnames(mat) <- NULL
DF <- DataFrame(mat) # two column matrix without col names
checkTrue(validObject(DF))
checkIdentical(DF[["V1"]], unname(score))
sw <- DataFrame(swiss, row.names = rownames(swiss)) # a data.frame
checkIdentical(as.data.frame(sw), swiss)
rownames(swiss) <- NULL # strip row names to make them comparable
sw <- DataFrame(swiss) # a data.frame
checkIdentical(as.data.frame(sw), swiss)
sw <- DataFrame(swiss[1:3,], score = unname(score))
checkIdentical(as.data.frame(sw), data.frame(swiss[1:3,], score))
sw <- DataFrame(score = score, swiss = swiss[1:3,]) # named data.frame/matrix
checkIdentical(as.data.frame(sw),
data.frame(score = score, swiss = swiss[1:3,]))
## identity
DF <- DataFrame(A=I(list(1:3)), B=I(list(1:4)))
DF[[2]] <- I(DF[[2]])
df <- data.frame(A=I(list(1:3)), B=I(list(1:4)))
df[[1]] <- unclass(df[[1]])
checkIdentical(as.data.frame(DF), df)
## recycling
DF <- DataFrame(1, score)
checkIdentical(DF[[1]], rep(1, 3))
checkIdentical(DF[[2]], score)
## Non-S4 columns for which is.object() is TRUE (a.k.a. S3-typed columns)
foo <- package_version(c("3.4.99", "5.2", "2.0"))
bar <- as.POSIXlt(c(123123, 124235235, 96546546))
DF <- DataFrame(foo=foo, bar=bar)
checkTrue(validObject(DF))
checkIdentical(dim(DF), c(3L, 2L))
checkIdentical(colnames(DF), c("foo", "bar"))
checkIdentical(as.data.frame(DF), data.frame(foo=foo, bar=bar))
}
test_DataFrame_coerce <- function() {
## need to introduce character() dim names
checkTrue(validObject(as(matrix(0L, 0L, 0L), "DataFrame")))
score <- c(X=1L, Y=3L, Z=NA)
DF <- as(score, "DataFrame")
checkTrue(validObject(DF))
checkIdentical(DF[[1]], score)
}
test_DataFrame_subset <- function() {
data(swiss)
sw <- DataFrame(swiss)
rn <- rownames(swiss)
checkException(sw[list()], silent = TRUE) # non-atomic
checkException(sw[NA], silent = TRUE) # column indices cannot be NA
checkException(sw[100], silent = TRUE) # out of bounds col
checkException(sw[,100], silent = TRUE)
checkException(sw[1000,], silent = TRUE) # out of bounds row
oldOpts <- options(warn=2)
checkException(sw[1:3, drop=TRUE], silent = TRUE) # drop ignored
checkException(sw[drop=TRUE], silent = TRUE)
checkException(sw[foo = "bar"], silent = TRUE) # invalid argument
options(oldOpts)
checkException(sw[,"Fert"], silent = TRUE) # bad column name
sw <- DataFrame(swiss)
checkIdentical(sw[], sw) # identity subset
checkIdentical(sw[,], sw)
checkIdentical(sw[NULL], DataFrame(swiss[NULL])) # NULL subsetting
checkIdentical(sw[,NULL], DataFrame(swiss[,NULL]))
checkIdentical(as.data.frame(sw[NULL,]),
structure(data.frame(swiss[NULL,]), row.names = character()))
rownames(sw) <- rn
## select columns
checkIdentical(as.data.frame(sw[1:3]), swiss[1:3])
checkIdentical(as.data.frame(sw[, 1:3]), swiss[1:3])
## select rows
checkIdentical(as.data.frame(sw[1:3,]), swiss[1:3,])
checkIdentical(as.data.frame(sw[1:3,]), swiss[1:3,])
checkIdentical(as.data.frame(sw[sw[["Education"]] == 7,]),
swiss[swiss[["Education"]] == 7,])
checkIdentical(as.data.frame(sw[Rle(sw[["Education"]] == 7),]),
swiss[swiss[["Education"]] == 7,])
## select rows and columns
checkIdentical(as.data.frame(sw[4:5, 1:3]), swiss[4:5,1:3])
checkIdentical(as.data.frame(sw[1]), swiss[1]) # a one-column data frame
checkIdentical(sw[,"Fertility"], swiss[,"Fertility"])
## the same
checkIdentical(as.data.frame(sw[, 1, drop = FALSE]), swiss[, 1, drop = FALSE])
checkIdentical(sw[, 1], swiss[,1]) # a (unnamed) vector
checkIdentical(sw[[1]], swiss[[1]]) # the same
checkIdentical(sw[["Fertility"]], swiss[["Fertility"]])
checkIdentical(sw[["Fert"]], swiss[["Fert"]]) # should return 'NULL'
checkIdentical(sw[,c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)],
swiss[,c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)])
checkIdentical(as.data.frame(sw[1,]), swiss[1,]) # a one-row data frame
checkIdentical(sw[1,, drop=TRUE], swiss[1,, drop=TRUE]) # a list
## duplicate row, unique row names are created
checkIdentical(as.data.frame(sw[c(1, 1:2),]), swiss[c(1,1:2),])
## NOTE: NA subsetting not yet supported for XVectors
##checkIdentical(as.data.frame(sw[c(1, NA, 1:2, NA),]), # mixin some NAs
## swiss[c(1, NA, 1:2, NA),])
checkIdentical(as.data.frame(sw["Courtelary",]), swiss["Courtelary",])
subswiss <- swiss[1:5,1:4]
subsw <- sw[1:5,1:4]
## Starting with S4Vectors 0.31.3, we no longer support partial matching on
## the rownames of a DataFrame.
#checkIdentical(as.data.frame(subsw["C",]), subswiss["C",]) # partially matches
## NOTE: NA subsetting not yet supported for XVectors
##checkIdentical(as.data.frame(subsw["foo",]), # bad row name
## subswiss["foo",])
##checkIdentical(as.data.frame(sw[match("C", row.names(sw)), ]),
## swiss[match("C", row.names(sw)), ]) # no exact match
}
test_DataFrame_dimnames_replace <- function() {
data(swiss)
cn <- paste("X", seq_len(ncol(swiss)), sep = ".")
sw <- DataFrame(swiss)
colnames(sw) <- cn
checkIdentical(colnames(sw), cn)
cn <- as.character(seq_len(ncol(swiss)))
colnames(sw) <- cn
colnames(swiss) <- cn
checkIdentical(colnames(sw), colnames(swiss))
colnames(sw) <- cn[1]
colnames(swiss) <- cn[1]
checkIdentical(colnames(sw), colnames(swiss))
rn <- seq_len(nrow(sw))
rownames(sw) <- rn
checkIdentical(rownames(sw), as.character(rn))
checkException(rownames(sw) <- rn[1], silent = TRUE)
rownames(sw) <- rep(rn[1], nrow(sw))
checkIdentical(rownames(sw), as.character(rep(rn[1], nrow(sw))))
rn[1] <- NA
checkException(rownames(sw) <- rn, silent = TRUE)
}
test_DataFrame_replace <- function() {
score <- c(1L, 3L, NA)
counts <- c(10L, 2L, NA)
DF <- DataFrame(score) # single, unnamed arg
DF[["counts"]] <- counts
checkIdentical(DF[["counts"]], counts)
DF[[3]] <- score
checkIdentical(DF[[3]], score)
DF[[3]] <- NULL # deletion
DF[["counts"]] <- NULL
DF$counts <- counts
checkIdentical(DF$counts, counts)
checkException(DF[[13]] <- counts, silent = TRUE) # index must be < length+1
checkException(DF[["tooshort"]] <- counts[1:2], silent = TRUE)
sw <- DataFrame(swiss, row.names = rownames(swiss)) # a data.frame
sw1 <- sw; swiss1 <- swiss
sw1[] <- 1L; swiss1[] <- 1L
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[] <- 1; swiss1[] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["Education"] <- 1; swiss1["Education"] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[,"Education"] <- 1; swiss1[,"Education"] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["Courtelary",] <- 1; swiss1["Courtelary",] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[1:3] <- 1; swiss1[1:3] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[,1:3] <- 1; swiss1[,1:3] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[2:4,1:3] <- 1; swiss1[2:4,1:3] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[2:4,-c(2,4,5)] <- 1; swiss1[2:4,-c(2,4,5)] <- 1
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[,1:3] <- sw1[,2:4]; swiss1[,1:3] <- swiss1[,2:4]
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[2:4,] <- sw1[1:3,]; swiss1[2:4,] <- swiss1[1:3,]
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[2:4,1:3] <- sw1[1:3,2:4]; swiss1[2:4,1:3] <- swiss1[1:3,2:4]
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["NewCity",] <- NA; swiss1["NewCity",] <- NA
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[nrow(sw1)+(1:2),] <- NA; swiss1[nrow(swiss1)+(1:2),] <- NA
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["NewCol"] <- seq_len(nrow(sw1)); swiss1["NewCol"] <- seq_len(nrow(sw1))
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[ncol(sw1)+1L] <- seq_len(nrow(sw1))
swiss1[ncol(swiss1)+1L] <- seq_len(nrow(sw1))
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[,"NewCol"] <- seq_len(nrow(sw1)); swiss1[,"NewCol"] <- seq_len(nrow(sw1))
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["NewCity","NewCol"] <- 0
swiss1["NewCity","NewCol"] <- 0
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["NewCity",] <- DataFrame(NA); swiss1["NewCity",] <- data.frame(NA)
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[nrow(sw1)+(1:2),] <- DataFrame(NA)
swiss1[nrow(swiss1)+(1:2),] <- data.frame(NA)
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["NewCol"] <- DataFrame(seq_len(nrow(sw1)))
swiss1["NewCol"] <- data.frame(seq_len(nrow(sw1)))
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[ncol(sw1)+1L] <- DataFrame(seq_len(nrow(sw1)))
swiss1[ncol(swiss1)+1L] <- data.frame(seq_len(nrow(sw1)))
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[,"NewCol"] <- DataFrame(seq_len(nrow(sw1)))
swiss1[,"NewCol"] <- data.frame(seq_len(nrow(sw1)))
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1["NewCity","NewCol"] <- DataFrame(0)
swiss1["NewCity","NewCol"] <- data.frame(0)
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw[,1:2]; swiss1 <- swiss[,1:2]
sw1[,colnames(sw)[2:3]] <- sw[,2:3]
swiss1[,colnames(swiss)[2:3]] <- swiss[,2:3]
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw; swiss1 <- swiss
sw1[FALSE] <- list()
checkIdentical(sw1, sw)
sw1[1L] <- list()
swiss1[1L] <- list()
checkIdentical(as.data.frame(sw1), swiss1)
sw1[1L] <- NULL
swiss1[1L] <- NULL
checkIdentical(as.data.frame(sw1), swiss1)
sw1 <- sw
mcols(sw1) <- DataFrame(id = seq_len(ncol(sw1)))
sw1["NewCol"] <- DataFrame(seq_len(nrow(sw1)))
checkIdentical(mcols(sw1, use.names=TRUE),
DataFrame(id = c(seq_len(ncol(sw1)-1), NA),
row.names = colnames(sw1)))
## Non-S4 columns for which is.object() is TRUE (a.k.a. S3-typed columns)
foo <- package_version(c("3.4.99", "5.2", "2.0"))
bar <- as.POSIXlt(c(123123, 124235235, 96546546))
DF <- DataFrame(normal=1:3, foo=foo, bar=bar)
DF$normal <- letters[1:3]
checkTrue(validObject(DF))
checkIdentical(DF$foo, foo)
checkIdentical(DF$bar, bar)
DF$foo <- rev(foo)
checkTrue(validObject(DF))
checkIdentical(DF$foo, rev(foo))
DF$bar <- bar
checkTrue(validObject(DF))
checkIdentical(DF$bar, bar)
}
test_DataFrame_looping <- function() {
data(iris)
actual <- by(iris, iris$Species, nrow)
## a bit tricky because of the 'call' attribute
attr(actual, "call")[[1]] <- as.name("by")
iris <- DataFrame(iris, row.names=rownames(iris))
checkIdentical(actual, by(iris, iris$Species, nrow))
}
test_DataFrame_annotation <- function() {
df <- DataFrame(x = c(1L, 3L, NA), y = c(10L, 2L, NA))
mcols(df) <- DataFrame(a = 1:2)
checkIdentical(mcols(df)[,1], 1:2)
checkIdentical(mcols(df[2:1])[,1], 2:1)
checkIdentical(mcols(cbind(df,df))[,1], rep(1:2,2))
df$z <- 1:3
checkIdentical(mcols(df, use.names=TRUE),
DataFrame(a = c(1L, 2L, NA), row.names = c("x", "y", "z")))
}
## '[<-' setter
test_DataFrame_Setter <- function() {
.SingleBracket <- function(df0, df1, idx) {
target <- df0
for (i in seq_len(length(df0))[idx])
target[[i]] <- df1[[i]]
df <- df0
df[idx] <- df1[idx]
stopifnot(identical(target, df))
df <- DataFrame(df0)
df[idx] <- DataFrame(df1)[idx]
if (!identical(DataFrame(target), df))
FALSE
else
TRUE
}
df0 <- data.frame(x=11:12, y=21:22, z=31:32)
df1 <- data.frame(matrix(LETTERS[1:6], ncol=3))
checkTrue(.SingleBracket(df0, df1, c(FALSE, FALSE, TRUE)))
checkTrue(.SingleBracket(df0, df1, c(TRUE, FALSE, TRUE)))
checkTrue(.SingleBracket(df0, df1, c(TRUE, TRUE, TRUE)))
checkTrue(.SingleBracket(df0, df1, TRUE))
target <- df0
target[] <- df1[]
df <- DataFrame(df0)
df[] <- DataFrame(df1)[]
checkIdentical(DataFrame(target), df)
for (i in c('a', 'c', 'e')) {
DF <- DataFrame(A=1:5, row.names=letters[1:5])
df <- data.frame(A=1:5, row.names=letters[1:5])
DF[i, 'B'] <- df[i, 'B'] <- 1
checkIdentical(as.data.frame(DF), df)
}
}
test_DataFrame_droplevels <- function() {
df <- DataFrame(state.name, state.region, state.region.rle=Rle(state.region))
df2 <- head(df)
checkIdentical(lapply(droplevels(df2), levels),
list(state.name=NULL,
state.region=c("South", "West"),
state.region.rle=c("South", "West")))
}
test_DataFrame_transform <- function() {
DF <- DataFrame(state.name, state.region, state.area)
df <- as.data.frame(DF)
checkIdentical(transform(DF), DF)
TF <- transform(DF, log.area = log(state.area), ratio = log.area / state.area)
tf <- transform(transform(df, log.area = log(state.area)),
ratio = log.area / state.area)
checkIdentical(tf, as.data.frame(TF))
}