test_DataFrameList_construction <- function() { checkDFL2dfl <- function(DFL, dfl) { checkIdentical(lapply(as.list(DFL), as.data.frame), dfl) } data(airquality) data(swiss) checkDFL2dfl(DataFrameList(swiss, airquality), list(swiss, airquality)) } test_SplitDataFrameList_construction <- function() { checkDFL2dfl <- function(DFL, dfl) { checkIdentical(lapply(as.list(DFL), as.data.frame), dfl) } striprownames <- function(x) { lapply(x, function(y) { rownames(y) <- NULL y }) } data(airquality) data(swiss) aq <- DataFrame(airquality) sw <- DataFrame(swiss, row.names=rownames(swiss)) aqsplit1 <- split(aq, aq[["Month"]]) aqsplit2 <- SplitDataFrameList(lapply(split(airquality, airquality[["Month"]]), as, "DataFrame")) checkIdentical(aqsplit1, aqsplit2) swsplit1 <- split(sw, sw[["Education"]]) swsplit2 <- SplitDataFrameList(lapply(split(swiss, swiss[["Education"]]), as, "DataFrame")) checkIdentical(swsplit1, swsplit2) for (compress in c(TRUE, FALSE)) { airqualitysplit <- striprownames(split(airquality, airquality[["Month"]])) aqsplit <- SplitDataFrameList(as.list(split(aq, aq[["Month"]])), compress = compress) checkDFL2dfl(aqsplit, airqualitysplit) swisssplit <- split(swiss, swiss[["Education"]]) swsplit <- SplitDataFrameList(as.list(split(sw, sw[["Education"]])), compress = compress) checkDFL2dfl(swsplit, swisssplit) } } test_DataFrameList_subset <- function() { checkDFL2dfl <- function(DFL, dfl) { checkIdentical(lapply(as.list(DFL), as.data.frame), dfl) } data(airquality) data(swiss) DFL1 <- DataFrameList(swiss, airquality) dfl1 <- list(swiss, airquality) checkDFL2dfl(DFL1[], dfl1[]) checkDFL2dfl(DFL1[1], dfl1[1]) checkDFL2dfl(DFL1[2:1], dfl1[2:1]) checkIdentical(as.data.frame(DFL1[[2]]), airquality) checkException(DFL1[[3]], silent = TRUE) DFL2 <- DataFrameList(s = swiss, a = airquality) dfl2 <- list(s = swiss, a = airquality) checkDFL2dfl(DFL2[], dfl2[]) checkDFL2dfl(DFL2[1], dfl2[1]) checkDFL2dfl(DFL2["a"], dfl2["a"]) checkDFL2dfl(DFL2[c("a", "s")], dfl2[c("a", "s")]) checkIdentical(as.data.frame(DFL2[["a"]]), airquality) checkIdentical(DFL2[["z"]], NULL) } test_SplitDataFrameList_subset <- function() { checkDFL2dfl <- function(DFL, dfl) { checkIdentical(lapply(as.list(DFL), as.data.frame), dfl) } data(swiss) sw <- DataFrame(swiss, row.names = rownames(swiss)) for (compress in c(TRUE, FALSE)) { swsplit <- SplitDataFrameList(as.list(split(sw, sw[["Education"]])), compress = compress) swisssplit <- split(swiss, swiss[["Education"]]) checkDFL2dfl(swsplit[], swisssplit[]) checkDFL2dfl(swsplit[1], swisssplit[1]) checkDFL2dfl(swsplit[2:1], swisssplit[2:1]) checkIdentical(as.data.frame(swsplit[[2]]), swisssplit[[2]]) checkIdentical(swsplit[["A"]], NULL) checkException(swsplit[[30]], silent = TRUE) checkIdentical(as.list(swsplit[,1]), split(swiss[[1]], swiss[["Education"]])) checkIdentical(as.list(swsplit[,"Examination"]), split(swiss[["Examination"]], swiss[["Education"]])) } } test_SplitDataFrameList_as.data.frame <- function() { checkDFL2dfl <- function(DFL, dfl, compress) { target <- data.frame(group = togroup(PartitioningByWidth(dfl)), group_name = names(dfl)[togroup(PartitioningByWidth(dfl))], do.call(rbind, dfl), stringsAsFactors=FALSE, row.names=NULL) rownames(target) <- unlist(lapply(dfl, row.names), use.names = FALSE) checkIdentical(target, as.data.frame(DFL)) } data(swiss) sw <- DataFrame(swiss, row.names = rownames(swiss)) for (compress in c(TRUE, FALSE)) { swsplit <- SplitDataFrameList(as.list(split(sw, sw[["Education"]])), compress = compress) swisssplit <- split(swiss, swiss[["Education"]]) checkDFL2dfl(swsplit, swisssplit, compress) } } test_SplitDataFrameList_columnUtils <- function() { set.seed(100001) original <- splitAsList(DataFrame(X=runif(100), Y=rpois(100, 5)), sample(letters, 100, replace=TRUE)) out <- original checkIdentical(commonColnames(out), c("X", "Y")) commonColnames(out) <- c("a", "b") checkIdentical(commonColnames(out), c("a", "b")) checkIdentical(colnames(out[[1]]), c("a", "b")) checkIdentical(colnames(out[[length(out)]]), c("a", "b")) checkIdentical(commonColnames(out[0]), c("a", "b")) # Same behavior for SimpleSDFLs. alt <- as(original, "SimpleSplitDataFrameList") checkIdentical(commonColnames(alt), c("X", "Y")) commonColnames(alt) <- c("a", "b") checkIdentical(commonColnames(alt), c("a", "b")) checkIdentical(colnames(alt[[1]]), c("a", "b")) checkIdentical(colnames(alt[[length(alt)]]), c("a", "b")) checkIdentical(commonColnames(alt[0]), NULL) } test_DataFrameList_replace <- function() { checkDFL2dfl <- function(DFL, dfl) { checkIdentical(lapply(as.list(DFL), as.data.frame), dfl) } data(airquality) data(swiss) DFL1 <- DataFrameList(swiss, airquality) dfl1 <- list(swiss, airquality) DFL1[] <- DFL1[1] dfl1[] <- dfl1[1] checkDFL2dfl(DFL1, dfl1) DFL1 <- DataFrameList(swiss, airquality) dfl1 <- list(swiss, airquality) DFL1[2] <- DFL1[1] dfl1[2] <- dfl1[1] checkDFL2dfl(DFL1, dfl1) DFL1 <- DataFrameList(swiss, airquality) dfl1 <- list(swiss, airquality) DFL1[[1]][[1]] <- DFL1[[1]][[1]] + 1L dfl1[[1]][[1]] <- dfl1[[1]][[1]] + 1L checkDFL2dfl(DFL1, dfl1) } test_SplitDataFrameList_replace <- function() { checkDFL2dfl <- function(DFL, dfl) { checkIdentical(lapply(as.list(DFL), as.data.frame), dfl) } striprownames <- function(x) { lapply(x, function(y) { rownames(y) <- NULL y }) } data(airquality) data(swiss) swiss2 <- swiss rownames(swiss2) <- NULL sw2 <- DataFrame(swiss2) for (compress in c(TRUE, FALSE)) { swiss2split <- striprownames(split(swiss2, swiss2[["Education"]])) sw2split <- SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])), compress = compress) swiss2split[] <- swiss2split[1] sw2split[] <- sw2split[1] checkDFL2dfl(sw2split, swiss2split) swiss2split <- striprownames(split(swiss2, swiss2[["Education"]])) sw2split <- SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])), compress = compress) swiss2split[c(2, 4, 5)] <- swiss2split[1] sw2split[c(2, 4, 5)] <- sw2split[1] checkDFL2dfl(sw2split, swiss2split) swiss2split <- striprownames(split(swiss2, swiss2[["Education"]])) swiss2split <- lapply(swiss2split, function(x) {x[["Examination"]] <- x[["Examination"]] + 1L; x}) sw2split <- SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])), compress = compress) sw2split[,"Examination"] <- sw2split[,"Examination"] + 1L checkDFL2dfl(sw2split, swiss2split) swiss2split <- striprownames(split(swiss2, swiss2[["Education"]])) swiss2split <- lapply(swiss2split, function(x) { x[["Examination"]][x[["Examination"]] > 22] <- x[["Examination"]][x[["Examination"]] > 22] + 1L x }) sw2split <- SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])), compress = compress) sw2split[sw2split[, "Examination"] > 22, "Examination"] <- sw2split[sw2split[, "Examination"] > 22,"Examination"] + 1L checkDFL2dfl(sw2split, swiss2split) } } test_DataFrameList_transform <- function() { DF <- DataFrame(state.division, state.region, state.area) DFL <- split(DF, DF$state.division) # NICER: split(DF, ~ state.devision) DFL <- transform(DFL, total.area=sum(state.area[state.region!="South"]), fraction=ifelse2(total.area == 0, 0, state.area/total.area)) ANS <- DataFrame(lapply(unlist(DFL, use.names=FALSE), unname)) df <- as.data.frame(DF) df$total.area <- with(subset(df, state.region != "South"), sapply(split(state.area, state.division), sum))[df$state.division] df$fraction <- with(df, ifelse(total.area == 0, 0, state.area/total.area)) df <- df[order(df$state.division),] rownames(df) <- NULL checkIdentical(ANS, DataFrame(df)) } test_SplitDataFrameList_rownames <- function() { csdfl <- SplitDataFrameList(DataFrame(one = c(1,2,3,4), row.names = seq_len(4)), DataFrame(one = c(11,12,13,14), row.names = c("a","b","c","d"))) csdfl[[1]] <- DataFrame(one = c(4,3,2,1), row.names = rev(seq_len(4))) csdfl2 <- SplitDataFrameList(DataFrame(one = c(1,2,3,4), row.names = rev(seq_len(4))), DataFrame(one = c(11,12,13,14), row.names = c("a","b","c","d"))) checkIdentical(rownames(csdfl), rownames(csdfl2)) }