test_DataFrame_construction <- function() { score <- c(X=1L, Y=3L, Z=NA) counts <- c(10L, 2L, NA) ## na in rn checkException(DataFrame(score, row.names = c("a", NA, "b")), silent = TRUE) ## invalid rn length checkException(DataFrame(score, row.names = "a"), silent = TRUE) DF <- DataFrame() # no args checkTrue(validObject(DF)) row.names <- c("one", "two", "three") DF <- DataFrame(row.names = row.names) # no args, but row.names checkTrue(validObject(DF)) checkIdentical(rownames(DF), row.names) DF <- DataFrame(score) # single, unnamed arg checkTrue(validObject(DF)) checkIdentical(DF[["score"]], score) DF <- DataFrame(score, row.names = row.names) # with row names checkTrue(validObject(DF)) checkIdentical(rownames(DF), row.names) ## dups in rn row.names = c("a", "b", "a") DF <- DataFrame(score, row.names = row.names) checkTrue(validObject(DF)) checkIdentical(rownames(DF), row.names) DF <- DataFrame(score=setNames(score, row.names)) checkTrue(validObject(DF)) checkIdentical(rownames(DF), row.names) DF <- DataFrame(vals = score) # named vector arg checkTrue(validObject(DF)) checkIdentical(DF[["vals"]], score) DF <- DataFrame(counts, vals = score) # mixed named and unnamed checkTrue(validObject(DF)) checkIdentical(DF[["vals"]], score) checkIdentical(DF[["counts"]], counts) DF <- DataFrame(score + score) # unnamed arg with invalid name expression checkTrue(validObject(DF)) checkIdentical(DF[["score...score"]], score + score) mat <- cbind(score) DF <- DataFrame(mat) # single column matrix with column name checkTrue(validObject(DF)) checkIdentical(DF[["score"]], unname(score)) mat <- cbind(score, counts) DF <- DataFrame(mat) # two column matrix with col names checkTrue(validObject(DF)) checkIdentical(DF[["score"]], unname(score)) checkIdentical(DF[["counts"]], counts) colnames(mat) <- NULL DF <- DataFrame(mat) # two column matrix without col names checkTrue(validObject(DF)) checkIdentical(DF[["V1"]], unname(score)) sw <- DataFrame(swiss, row.names = rownames(swiss)) # a data.frame checkIdentical(as.data.frame(sw), swiss) rownames(swiss) <- NULL # strip row names to make them comparable sw <- DataFrame(swiss) # a data.frame checkIdentical(as.data.frame(sw), swiss) sw <- DataFrame(swiss[1:3,], score = unname(score)) checkIdentical(as.data.frame(sw), data.frame(swiss[1:3,], score)) sw <- DataFrame(score = score, swiss = swiss[1:3,]) # named data.frame/matrix checkIdentical(as.data.frame(sw), data.frame(score = score, swiss = swiss[1:3,])) ## identity DF <- DataFrame(A=I(list(1:3)), B=I(list(1:4))) DF[[2]] <- I(DF[[2]]) df <- data.frame(A=I(list(1:3)), B=I(list(1:4))) df[[1]] <- unclass(df[[1]]) checkIdentical(as.data.frame(DF), df) ## recycling DF <- DataFrame(1, score) checkIdentical(DF[[1]], rep(1, 3)) checkIdentical(DF[[2]], score) ## Non-S4 columns for which is.object() is TRUE (a.k.a. S3-typed columns) foo <- package_version(c("3.4.99", "5.2", "2.0")) bar <- as.POSIXlt(c(123123, 124235235, 96546546)) DF <- DataFrame(foo=foo, bar=bar) checkTrue(validObject(DF)) checkIdentical(dim(DF), c(3L, 2L)) checkIdentical(colnames(DF), c("foo", "bar")) checkIdentical(as.data.frame(DF), data.frame(foo=foo, bar=bar)) } test_DataFrame_coerce <- function() { ## need to introduce character() dim names checkTrue(validObject(as(matrix(0L, 0L, 0L), "DataFrame"))) score <- c(X=1L, Y=3L, Z=NA) DF <- as(score, "DataFrame") checkTrue(validObject(DF)) checkIdentical(DF[[1]], score) } test_DataFrame_subset <- function() { data(swiss) sw <- DataFrame(swiss) rn <- rownames(swiss) checkException(sw[list()], silent = TRUE) # non-atomic checkException(sw[NA], silent = TRUE) # column indices cannot be NA checkException(sw[100], silent = TRUE) # out of bounds col checkException(sw[,100], silent = TRUE) checkException(sw[1000,], silent = TRUE) # out of bounds row oldOpts <- options(warn=2) checkException(sw[1:3, drop=TRUE], silent = TRUE) # drop ignored checkException(sw[drop=TRUE], silent = TRUE) checkException(sw[foo = "bar"], silent = TRUE) # invalid argument options(oldOpts) checkException(sw[,"Fert"], silent = TRUE) # bad column name sw <- DataFrame(swiss) checkIdentical(sw[], sw) # identity subset checkIdentical(sw[,], sw) checkIdentical(sw[NULL], DataFrame(swiss[NULL])) # NULL subsetting checkIdentical(sw[,NULL], DataFrame(swiss[,NULL])) checkIdentical(as.data.frame(sw[NULL,]), structure(data.frame(swiss[NULL,]), row.names = character())) rownames(sw) <- rn ## select columns checkIdentical(as.data.frame(sw[1:3]), swiss[1:3]) checkIdentical(as.data.frame(sw[, 1:3]), swiss[1:3]) ## select rows checkIdentical(as.data.frame(sw[1:3,]), swiss[1:3,]) checkIdentical(as.data.frame(sw[1:3,]), swiss[1:3,]) checkIdentical(as.data.frame(sw[sw[["Education"]] == 7,]), swiss[swiss[["Education"]] == 7,]) checkIdentical(as.data.frame(sw[Rle(sw[["Education"]] == 7),]), swiss[swiss[["Education"]] == 7,]) ## select rows and columns checkIdentical(as.data.frame(sw[4:5, 1:3]), swiss[4:5,1:3]) checkIdentical(as.data.frame(sw[1]), swiss[1]) # a one-column data frame checkIdentical(sw[,"Fertility"], swiss[,"Fertility"]) ## the same checkIdentical(as.data.frame(sw[, 1, drop = FALSE]), swiss[, 1, drop = FALSE]) checkIdentical(sw[, 1], swiss[,1]) # a (unnamed) vector checkIdentical(sw[[1]], swiss[[1]]) # the same checkIdentical(sw[["Fertility"]], swiss[["Fertility"]]) checkIdentical(sw[["Fert"]], swiss[["Fert"]]) # should return 'NULL' checkIdentical(sw[,c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)], swiss[,c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)]) checkIdentical(as.data.frame(sw[1,]), swiss[1,]) # a one-row data frame checkIdentical(sw[1,, drop=TRUE], swiss[1,, drop=TRUE]) # a list ## duplicate row, unique row names are created checkIdentical(as.data.frame(sw[c(1, 1:2),]), swiss[c(1,1:2),]) ## NOTE: NA subsetting not yet supported for XVectors ##checkIdentical(as.data.frame(sw[c(1, NA, 1:2, NA),]), # mixin some NAs ## swiss[c(1, NA, 1:2, NA),]) checkIdentical(as.data.frame(sw["Courtelary",]), swiss["Courtelary",]) subswiss <- swiss[1:5,1:4] subsw <- sw[1:5,1:4] ## Starting with S4Vectors 0.31.3, we no longer support partial matching on ## the rownames of a DataFrame. #checkIdentical(as.data.frame(subsw["C",]), subswiss["C",]) # partially matches ## NOTE: NA subsetting not yet supported for XVectors ##checkIdentical(as.data.frame(subsw["foo",]), # bad row name ## subswiss["foo",]) ##checkIdentical(as.data.frame(sw[match("C", row.names(sw)), ]), ## swiss[match("C", row.names(sw)), ]) # no exact match } test_DataFrame_dimnames_replace <- function() { data(swiss) cn <- paste("X", seq_len(ncol(swiss)), sep = ".") sw <- DataFrame(swiss) colnames(sw) <- cn checkIdentical(colnames(sw), cn) cn <- as.character(seq_len(ncol(swiss))) colnames(sw) <- cn colnames(swiss) <- cn checkIdentical(colnames(sw), colnames(swiss)) colnames(sw) <- cn[1] colnames(swiss) <- cn[1] checkIdentical(colnames(sw), colnames(swiss)) rn <- seq_len(nrow(sw)) rownames(sw) <- rn checkIdentical(rownames(sw), as.character(rn)) checkException(rownames(sw) <- rn[1], silent = TRUE) rownames(sw) <- rep(rn[1], nrow(sw)) checkIdentical(rownames(sw), as.character(rep(rn[1], nrow(sw)))) rn[1] <- NA checkException(rownames(sw) <- rn, silent = TRUE) } test_DataFrame_replace <- function() { score <- c(1L, 3L, NA) counts <- c(10L, 2L, NA) DF <- DataFrame(score) # single, unnamed arg DF[["counts"]] <- counts checkIdentical(DF[["counts"]], counts) DF[[3]] <- score checkIdentical(DF[[3]], score) DF[[3]] <- NULL # deletion DF[["counts"]] <- NULL DF$counts <- counts checkIdentical(DF$counts, counts) checkException(DF[[13]] <- counts, silent = TRUE) # index must be < length+1 checkException(DF[["tooshort"]] <- counts[1:2], silent = TRUE) sw <- DataFrame(swiss, row.names = rownames(swiss)) # a data.frame sw1 <- sw; swiss1 <- swiss sw1[] <- 1L; swiss1[] <- 1L checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[] <- 1; swiss1[] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["Education"] <- 1; swiss1["Education"] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,"Education"] <- 1; swiss1[,"Education"] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["Courtelary",] <- 1; swiss1["Courtelary",] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[1:3] <- 1; swiss1[1:3] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,1:3] <- 1; swiss1[,1:3] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,1:3] <- 1; swiss1[2:4,1:3] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,-c(2,4,5)] <- 1; swiss1[2:4,-c(2,4,5)] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,1:3] <- sw1[,2:4]; swiss1[,1:3] <- swiss1[,2:4] checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,] <- sw1[1:3,]; swiss1[2:4,] <- swiss1[1:3,] checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,1:3] <- sw1[1:3,2:4]; swiss1[2:4,1:3] <- swiss1[1:3,2:4] checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity",] <- NA; swiss1["NewCity",] <- NA checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[nrow(sw1)+(1:2),] <- NA; swiss1[nrow(swiss1)+(1:2),] <- NA checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCol"] <- seq_len(nrow(sw1)); swiss1["NewCol"] <- seq_len(nrow(sw1)) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[ncol(sw1)+1L] <- seq_len(nrow(sw1)) swiss1[ncol(swiss1)+1L] <- seq_len(nrow(sw1)) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,"NewCol"] <- seq_len(nrow(sw1)); swiss1[,"NewCol"] <- seq_len(nrow(sw1)) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity","NewCol"] <- 0 swiss1["NewCity","NewCol"] <- 0 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity",] <- DataFrame(NA); swiss1["NewCity",] <- data.frame(NA) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[nrow(sw1)+(1:2),] <- DataFrame(NA) swiss1[nrow(swiss1)+(1:2),] <- data.frame(NA) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCol"] <- DataFrame(seq_len(nrow(sw1))) swiss1["NewCol"] <- data.frame(seq_len(nrow(sw1))) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[ncol(sw1)+1L] <- DataFrame(seq_len(nrow(sw1))) swiss1[ncol(swiss1)+1L] <- data.frame(seq_len(nrow(sw1))) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,"NewCol"] <- DataFrame(seq_len(nrow(sw1))) swiss1[,"NewCol"] <- data.frame(seq_len(nrow(sw1))) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity","NewCol"] <- DataFrame(0) swiss1["NewCity","NewCol"] <- data.frame(0) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw[,1:2]; swiss1 <- swiss[,1:2] sw1[,colnames(sw)[2:3]] <- sw[,2:3] swiss1[,colnames(swiss)[2:3]] <- swiss[,2:3] checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[FALSE] <- list() checkIdentical(sw1, sw) sw1[1L] <- list() swiss1[1L] <- list() checkIdentical(as.data.frame(sw1), swiss1) sw1[1L] <- NULL swiss1[1L] <- NULL checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw mcols(sw1) <- DataFrame(id = seq_len(ncol(sw1))) sw1["NewCol"] <- DataFrame(seq_len(nrow(sw1))) checkIdentical(mcols(sw1, use.names=TRUE), DataFrame(id = c(seq_len(ncol(sw1)-1), NA), row.names = colnames(sw1))) ## Non-S4 columns for which is.object() is TRUE (a.k.a. S3-typed columns) foo <- package_version(c("3.4.99", "5.2", "2.0")) bar <- as.POSIXlt(c(123123, 124235235, 96546546)) DF <- DataFrame(normal=1:3, foo=foo, bar=bar) DF$normal <- letters[1:3] checkTrue(validObject(DF)) checkIdentical(DF$foo, foo) checkIdentical(DF$bar, bar) DF$foo <- rev(foo) checkTrue(validObject(DF)) checkIdentical(DF$foo, rev(foo)) DF$bar <- bar checkTrue(validObject(DF)) checkIdentical(DF$bar, bar) } test_DataFrame_looping <- function() { data(iris) actual <- by(iris, iris$Species, nrow) ## a bit tricky because of the 'call' attribute attr(actual, "call")[[1]] <- as.name("by") iris <- DataFrame(iris, row.names=rownames(iris)) checkIdentical(actual, by(iris, iris$Species, nrow)) } test_DataFrame_annotation <- function() { df <- DataFrame(x = c(1L, 3L, NA), y = c(10L, 2L, NA)) mcols(df) <- DataFrame(a = 1:2) checkIdentical(mcols(df)[,1], 1:2) checkIdentical(mcols(df[2:1])[,1], 2:1) checkIdentical(mcols(cbind(df,df))[,1], rep(1:2,2)) df$z <- 1:3 checkIdentical(mcols(df, use.names=TRUE), DataFrame(a = c(1L, 2L, NA), row.names = c("x", "y", "z"))) } ## '[<-' setter test_DataFrame_Setter <- function() { .SingleBracket <- function(df0, df1, idx) { target <- df0 for (i in seq_len(length(df0))[idx]) target[[i]] <- df1[[i]] df <- df0 df[idx] <- df1[idx] stopifnot(identical(target, df)) df <- DataFrame(df0) df[idx] <- DataFrame(df1)[idx] if (!identical(DataFrame(target), df)) FALSE else TRUE } df0 <- data.frame(x=11:12, y=21:22, z=31:32) df1 <- data.frame(matrix(LETTERS[1:6], ncol=3)) checkTrue(.SingleBracket(df0, df1, c(FALSE, FALSE, TRUE))) checkTrue(.SingleBracket(df0, df1, c(TRUE, FALSE, TRUE))) checkTrue(.SingleBracket(df0, df1, c(TRUE, TRUE, TRUE))) checkTrue(.SingleBracket(df0, df1, TRUE)) target <- df0 target[] <- df1[] df <- DataFrame(df0) df[] <- DataFrame(df1)[] checkIdentical(DataFrame(target), df) for (i in c('a', 'c', 'e')) { DF <- DataFrame(A=1:5, row.names=letters[1:5]) df <- data.frame(A=1:5, row.names=letters[1:5]) DF[i, 'B'] <- df[i, 'B'] <- 1 checkIdentical(as.data.frame(DF), df) } } test_DataFrame_droplevels <- function() { df <- DataFrame(state.name, state.region, state.region.rle=Rle(state.region)) df2 <- head(df) checkIdentical(lapply(droplevels(df2), levels), list(state.name=NULL, state.region=c("South", "West"), state.region.rle=c("South", "West"))) } test_DataFrame_transform <- function() { DF <- DataFrame(state.name, state.region, state.area) df <- as.data.frame(DF) checkIdentical(transform(DF), DF) TF <- transform(DF, log.area = log(state.area), ratio = log.area / state.area) tf <- transform(transform(df, log.area = log(state.area)), ratio = log.area / state.area) checkIdentical(tf, as.data.frame(TF)) }