2025-01-12 04:36:52 +08:00

266 lines
9.4 KiB
R

test_DataFrameList_construction <- function() {
checkDFL2dfl <- function(DFL, dfl) {
checkIdentical(lapply(as.list(DFL), as.data.frame), dfl)
}
data(airquality)
data(swiss)
checkDFL2dfl(DataFrameList(swiss, airquality), list(swiss, airquality))
}
test_SplitDataFrameList_construction <- function() {
checkDFL2dfl <- function(DFL, dfl) {
checkIdentical(lapply(as.list(DFL), as.data.frame), dfl)
}
striprownames <- function(x) {
lapply(x, function(y) {
rownames(y) <- NULL
y
})
}
data(airquality)
data(swiss)
aq <- DataFrame(airquality)
sw <- DataFrame(swiss, row.names=rownames(swiss))
aqsplit1 <- split(aq, aq[["Month"]])
aqsplit2 <-
SplitDataFrameList(lapply(split(airquality, airquality[["Month"]]),
as, "DataFrame"))
checkIdentical(aqsplit1, aqsplit2)
swsplit1 <- split(sw, sw[["Education"]])
swsplit2 <-
SplitDataFrameList(lapply(split(swiss, swiss[["Education"]]),
as, "DataFrame"))
checkIdentical(swsplit1, swsplit2)
for (compress in c(TRUE, FALSE)) {
airqualitysplit <-
striprownames(split(airquality, airquality[["Month"]]))
aqsplit <-
SplitDataFrameList(as.list(split(aq, aq[["Month"]])),
compress = compress)
checkDFL2dfl(aqsplit, airqualitysplit)
swisssplit <- split(swiss, swiss[["Education"]])
swsplit <-
SplitDataFrameList(as.list(split(sw, sw[["Education"]])),
compress = compress)
checkDFL2dfl(swsplit, swisssplit)
}
}
test_DataFrameList_subset <- function() {
checkDFL2dfl <- function(DFL, dfl) {
checkIdentical(lapply(as.list(DFL), as.data.frame), dfl)
}
data(airquality)
data(swiss)
DFL1 <- DataFrameList(swiss, airquality)
dfl1 <- list(swiss, airquality)
checkDFL2dfl(DFL1[], dfl1[])
checkDFL2dfl(DFL1[1], dfl1[1])
checkDFL2dfl(DFL1[2:1], dfl1[2:1])
checkIdentical(as.data.frame(DFL1[[2]]), airquality)
checkException(DFL1[[3]], silent = TRUE)
DFL2 <- DataFrameList(s = swiss, a = airquality)
dfl2 <- list(s = swiss, a = airquality)
checkDFL2dfl(DFL2[], dfl2[])
checkDFL2dfl(DFL2[1], dfl2[1])
checkDFL2dfl(DFL2["a"], dfl2["a"])
checkDFL2dfl(DFL2[c("a", "s")], dfl2[c("a", "s")])
checkIdentical(as.data.frame(DFL2[["a"]]), airquality)
checkIdentical(DFL2[["z"]], NULL)
}
test_SplitDataFrameList_subset <- function() {
checkDFL2dfl <- function(DFL, dfl) {
checkIdentical(lapply(as.list(DFL), as.data.frame), dfl)
}
data(swiss)
sw <- DataFrame(swiss, row.names = rownames(swiss))
for (compress in c(TRUE, FALSE)) {
swsplit <-
SplitDataFrameList(as.list(split(sw, sw[["Education"]])),
compress = compress)
swisssplit <- split(swiss, swiss[["Education"]])
checkDFL2dfl(swsplit[], swisssplit[])
checkDFL2dfl(swsplit[1], swisssplit[1])
checkDFL2dfl(swsplit[2:1], swisssplit[2:1])
checkIdentical(as.data.frame(swsplit[[2]]), swisssplit[[2]])
checkIdentical(swsplit[["A"]], NULL)
checkException(swsplit[[30]], silent = TRUE)
checkIdentical(as.list(swsplit[,1]),
split(swiss[[1]], swiss[["Education"]]))
checkIdentical(as.list(swsplit[,"Examination"]),
split(swiss[["Examination"]], swiss[["Education"]]))
}
}
test_SplitDataFrameList_as.data.frame <- function() {
checkDFL2dfl <- function(DFL, dfl, compress) {
target <-
data.frame(group = togroup(PartitioningByWidth(dfl)),
group_name = names(dfl)[togroup(PartitioningByWidth(dfl))],
do.call(rbind, dfl),
stringsAsFactors=FALSE, row.names=NULL)
rownames(target) <- unlist(lapply(dfl, row.names), use.names = FALSE)
checkIdentical(target, as.data.frame(DFL))
}
data(swiss)
sw <- DataFrame(swiss, row.names = rownames(swiss))
for (compress in c(TRUE, FALSE)) {
swsplit <-
SplitDataFrameList(as.list(split(sw, sw[["Education"]])),
compress = compress)
swisssplit <- split(swiss, swiss[["Education"]])
checkDFL2dfl(swsplit, swisssplit, compress)
}
}
test_SplitDataFrameList_columnUtils <- function() {
set.seed(100001)
original <- splitAsList(DataFrame(X=runif(100), Y=rpois(100, 5)),
sample(letters, 100, replace=TRUE))
out <- original
checkIdentical(commonColnames(out), c("X", "Y"))
commonColnames(out) <- c("a", "b")
checkIdentical(commonColnames(out), c("a", "b"))
checkIdentical(colnames(out[[1]]), c("a", "b"))
checkIdentical(colnames(out[[length(out)]]), c("a", "b"))
checkIdentical(commonColnames(out[0]), c("a", "b"))
# Same behavior for SimpleSDFLs.
alt <- as(original, "SimpleSplitDataFrameList")
checkIdentical(commonColnames(alt), c("X", "Y"))
commonColnames(alt) <- c("a", "b")
checkIdentical(commonColnames(alt), c("a", "b"))
checkIdentical(colnames(alt[[1]]), c("a", "b"))
checkIdentical(colnames(alt[[length(alt)]]), c("a", "b"))
checkIdentical(commonColnames(alt[0]), NULL)
}
test_DataFrameList_replace <- function() {
checkDFL2dfl <- function(DFL, dfl) {
checkIdentical(lapply(as.list(DFL), as.data.frame), dfl)
}
data(airquality)
data(swiss)
DFL1 <- DataFrameList(swiss, airquality)
dfl1 <- list(swiss, airquality)
DFL1[] <- DFL1[1]
dfl1[] <- dfl1[1]
checkDFL2dfl(DFL1, dfl1)
DFL1 <- DataFrameList(swiss, airquality)
dfl1 <- list(swiss, airquality)
DFL1[2] <- DFL1[1]
dfl1[2] <- dfl1[1]
checkDFL2dfl(DFL1, dfl1)
DFL1 <- DataFrameList(swiss, airquality)
dfl1 <- list(swiss, airquality)
DFL1[[1]][[1]] <- DFL1[[1]][[1]] + 1L
dfl1[[1]][[1]] <- dfl1[[1]][[1]] + 1L
checkDFL2dfl(DFL1, dfl1)
}
test_SplitDataFrameList_replace <- function() {
checkDFL2dfl <- function(DFL, dfl) {
checkIdentical(lapply(as.list(DFL), as.data.frame), dfl)
}
striprownames <- function(x) {
lapply(x, function(y) {
rownames(y) <- NULL
y
})
}
data(airquality)
data(swiss)
swiss2 <- swiss
rownames(swiss2) <- NULL
sw2 <- DataFrame(swiss2)
for (compress in c(TRUE, FALSE)) {
swiss2split <- striprownames(split(swiss2, swiss2[["Education"]]))
sw2split <-
SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])),
compress = compress)
swiss2split[] <- swiss2split[1]
sw2split[] <- sw2split[1]
checkDFL2dfl(sw2split, swiss2split)
swiss2split <- striprownames(split(swiss2, swiss2[["Education"]]))
sw2split <-
SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])),
compress = compress)
swiss2split[c(2, 4, 5)] <- swiss2split[1]
sw2split[c(2, 4, 5)] <- sw2split[1]
checkDFL2dfl(sw2split, swiss2split)
swiss2split <- striprownames(split(swiss2, swiss2[["Education"]]))
swiss2split <-
lapply(swiss2split,
function(x) {x[["Examination"]] <- x[["Examination"]] + 1L; x})
sw2split <-
SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])),
compress = compress)
sw2split[,"Examination"] <- sw2split[,"Examination"] + 1L
checkDFL2dfl(sw2split, swiss2split)
swiss2split <- striprownames(split(swiss2, swiss2[["Education"]]))
swiss2split <-
lapply(swiss2split, function(x) {
x[["Examination"]][x[["Examination"]] > 22] <-
x[["Examination"]][x[["Examination"]] > 22] + 1L
x
})
sw2split <-
SplitDataFrameList(as.list(split(sw2, sw2[["Education"]])),
compress = compress)
sw2split[sw2split[, "Examination"] > 22, "Examination"] <-
sw2split[sw2split[, "Examination"] > 22,"Examination"] + 1L
checkDFL2dfl(sw2split, swiss2split)
}
}
test_DataFrameList_transform <- function() {
DF <- DataFrame(state.division, state.region, state.area)
DFL <- split(DF, DF$state.division) # NICER: split(DF, ~ state.devision)
DFL <- transform(DFL, total.area=sum(state.area[state.region!="South"]),
fraction=ifelse2(total.area == 0, 0, state.area/total.area))
ANS <- DataFrame(lapply(unlist(DFL, use.names=FALSE), unname))
df <- as.data.frame(DF)
df$total.area <-
with(subset(df, state.region != "South"),
sapply(split(state.area, state.division), sum))[df$state.division]
df$fraction <- with(df, ifelse(total.area == 0, 0, state.area/total.area))
df <- df[order(df$state.division),]
rownames(df) <- NULL
checkIdentical(ANS, DataFrame(df))
}
test_SplitDataFrameList_rownames <- function() {
csdfl <- SplitDataFrameList(DataFrame(one = c(1,2,3,4), row.names = seq_len(4)),
DataFrame(one = c(11,12,13,14), row.names = c("a","b","c","d")))
csdfl[[1]] <- DataFrame(one = c(4,3,2,1), row.names = rev(seq_len(4)))
csdfl2 <- SplitDataFrameList(DataFrame(one = c(1,2,3,4), row.names = rev(seq_len(4))),
DataFrame(one = c(11,12,13,14), row.names = c("a","b","c","d")))
checkIdentical(rownames(csdfl), rownames(csdfl2))
}