657 lines
35 KiB
Plaintext
657 lines
35 KiB
Plaintext
|
|
R Under development (unstable) (2020-03-24 r78051) -- "Unsuffered Consequences"
|
|
Copyright (C) 2020 The R Foundation for Statistical Computing
|
|
Platform: x86_64-apple-darwin19.3.0 (64-bit)
|
|
|
|
R is free software and comes with ABSOLUTELY NO WARRANTY.
|
|
You are welcome to redistribute it under certain conditions.
|
|
Type 'license()' or 'licence()' for distribution details.
|
|
|
|
R is a collaborative project with many contributors.
|
|
Type 'contributors()' for more information and
|
|
'citation()' on how to cite R or R packages in publications.
|
|
|
|
Type 'demo()' for some demos, 'help()' for on-line help, or
|
|
'help.start()' for an HTML browser interface to help.
|
|
Type 'q()' to quit R.
|
|
|
|
> library(foreign)
|
|
>
|
|
> sample100 <- read.spss("sample100.sav",FALSE)
|
|
> summary(sample100)
|
|
Length Class Mode
|
|
YEAR 100 -none- numeric
|
|
DISTRICT 100 -none- numeric
|
|
CAMPUS 100 -none- numeric
|
|
DNAME 100 -none- character
|
|
CNAME 100 -none- character
|
|
SEX 100 -none- character
|
|
DISADVG 100 -none- character
|
|
ETHNICTY 100 -none- character
|
|
STUID 100 -none- character
|
|
TLIMTH 100 -none- numeric
|
|
GRADE 100 -none- numeric
|
|
> str(sample100)
|
|
List of 11
|
|
$ YEAR : num [1:100] 99 94 94 95 96 97 98 99 94 95 ...
|
|
$ DISTRICT: num [1:100] 57914 57914 57914 57914 57914 ...
|
|
$ CAMPUS : num [1:100] 57914045 57914106 57914110 57914123 57914117 ...
|
|
$ DNAME : chr [1:100] "MESQUITE ISD " "MESQUITE ISD " "MESQUITE ISD " "MESQUITE ISD " ...
|
|
$ CNAME : chr [1:100] "A C NEW MIDDLE " "MCWHORTER EL " "RUTHERFORD EL " "JOEY M PIRRUNG " ...
|
|
$ SEX : chr [1:100] "F" "F" " " "F" ...
|
|
$ DISADVG : chr [1:100] "NO " "NO " " " "NO " ...
|
|
$ ETHNICTY: chr [1:100] "WHITE " "WHITE " " " "HISPANIC" ...
|
|
$ STUID : chr [1:100] "00614371S" "00614372S" "00614373S" "00614373S" ...
|
|
$ TLIMTH : num [1:100] 90 86 67 77 77 79 86 87 89 85 ...
|
|
$ GRADE : num [1:100] 8 3 3 4 5 6 7 8 3 4 ...
|
|
- attr(*, "label.table")=List of 11
|
|
..$ YEAR : NULL
|
|
..$ DISTRICT: NULL
|
|
..$ CAMPUS : NULL
|
|
..$ DNAME : NULL
|
|
..$ CNAME : NULL
|
|
..$ SEX : NULL
|
|
..$ DISADVG : NULL
|
|
..$ ETHNICTY: NULL
|
|
..$ STUID : NULL
|
|
..$ TLIMTH : NULL
|
|
..$ GRADE : NULL
|
|
- attr(*, "variable.labels")= Named chr [1:11] "YEAR OF ADMINISTRATION" "COUNTY DISTRICT NUMBER" "COUNTY DISTRICT CAMPUS NUMBER" "DISTRICT NAME" ...
|
|
..- attr(*, "names")= chr [1:11] "YEAR" "DISTRICT" "CAMPUS" "DNAME" ...
|
|
> d.sample100 <- data.frame(sample100,stringsAsFactors=TRUE)
|
|
> summary(d.sample100)
|
|
YEAR DISTRICT CAMPUS DNAME
|
|
Min. : 94.0 Min. : 57905 Min. : 57905126 DALLAS ISD : 1
|
|
1st Qu.: 95.0 1st Qu.: 57914 1st Qu.: 57914046 HARMONY ISD : 1
|
|
Median : 96.0 Median : 57914 Median : 57914112 MESQUITE ISD :96
|
|
Mean : 115.4 Mean : 59644 Mean : 59644015 SUNNYVALE ISD : 2
|
|
3rd Qu.: 98.0 3rd Qu.: 57914 3rd Qu.: 57914125
|
|
Max. :2000.0 Max. :230905 Max. :230905101
|
|
|
|
CNAME SEX DISADVG ETHNICTY STUID
|
|
J C AUSTIN EL :23 : 4 : 4 : 4 00614373S: 6
|
|
R S KIMBROUGH M:12 F:62 NO :90 HISPANIC: 3 00614378S: 6
|
|
WILKINSON MIDDL: 9 M:34 YES: 6 OTHER : 2 00614379S: 6
|
|
VERNON PRICE EL: 7 WHITE :91 00614389S: 6
|
|
TISINGER EL : 6 00614397S: 6
|
|
THOMPSON EL : 5 00614403S: 6
|
|
(Other) :38 (Other) :64
|
|
TLIMTH GRADE
|
|
Min. :54.00 Min. :3.00
|
|
1st Qu.:74.00 1st Qu.:4.00
|
|
Median :84.00 Median :5.00
|
|
Mean :80.16 Mean :5.37
|
|
3rd Qu.:87.00 3rd Qu.:7.00
|
|
Max. :93.00 Max. :8.00
|
|
|
|
> s100 <- sample100
|
|
> sample100 <- read.spss("sample100.por",FALSE)
|
|
> stopifnot(identical(s100, sample100)) # no need for further summary() etc
|
|
>
|
|
> pbc <- read.spss("pbc.sav",FALSE)
|
|
> summary(pbc)
|
|
Length Class Mode
|
|
AGE 418 -none- numeric
|
|
ALB 418 -none- numeric
|
|
ALKPHOS 418 -none- numeric
|
|
ASCITES 418 -none- numeric
|
|
BILI 418 -none- numeric
|
|
CHOL 418 -none- numeric
|
|
EDEMA 418 -none- numeric
|
|
EDTRT 418 -none- numeric
|
|
HEPMEG 418 -none- numeric
|
|
TIME 418 -none- numeric
|
|
PLATELET 418 -none- numeric
|
|
PROTIME 418 -none- numeric
|
|
SEX 418 -none- numeric
|
|
SGOT 418 -none- numeric
|
|
SPIDERS 418 -none- numeric
|
|
STAGE 418 -none- numeric
|
|
STATUS 418 -none- numeric
|
|
TRT 418 -none- numeric
|
|
TRIG 418 -none- numeric
|
|
COPPER 418 -none- numeric
|
|
> str(pbc)
|
|
List of 20
|
|
$ AGE : num [1:418] 58.8 56.4 70.1 54.7 38.1 ...
|
|
$ ALB : num [1:418] 2.6 4.14 3.48 2.54 3.53 3.98 4.09 4 3.08 2.74 ...
|
|
$ ALKPHOS : num [1:418] 1718 7395 516 6122 671 ...
|
|
$ ASCITES : num [1:418] 1 0 0 0 0 0 0 0 0 1 ...
|
|
$ BILI : num [1:418] 14.5 1.1 1.4 1.8 3.4 0.8 1 0.3 3.2 12.6 ...
|
|
$ CHOL : num [1:418] 261 302 176 244 279 248 322 280 562 200 ...
|
|
$ EDEMA : num [1:418] 1 0 1 1 0 0 0 0 0 1 ...
|
|
$ EDTRT : num [1:418] 1 0 0.5 0.5 0 0 0 0 0 1 ...
|
|
$ HEPMEG : num [1:418] 1 1 0 1 1 1 1 0 0 0 ...
|
|
$ TIME : num [1:418] 400 4500 1012 1925 1504 ...
|
|
$ PLATELET: num [1:418] 190 221 151 183 136 -9 204 373 251 302 ...
|
|
$ PROTIME : num [1:418] 12.2 10.6 12 10.3 10.9 11 9.7 11 11 11.5 ...
|
|
$ SEX : num [1:418] 1 1 0 1 1 1 1 1 1 1 ...
|
|
$ SGOT : num [1:418] 137.9 113.5 96.1 60.6 113.2 ...
|
|
$ SPIDERS : num [1:418] 1 1 0 1 1 0 0 0 1 1 ...
|
|
$ STAGE : num [1:418] 4 3 4 4 3 3 3 3 2 4 ...
|
|
$ STATUS : num [1:418] 1 0 1 1 0 1 0 1 1 1 ...
|
|
$ TRT : num [1:418] 1 1 1 1 2 2 2 2 1 2 ...
|
|
$ TRIG : num [1:418] 172 88 55 92 72 63 213 189 88 143 ...
|
|
$ COPPER : num [1:418] 156 54 210 64 143 50 52 52 79 140 ...
|
|
- attr(*, "label.table")=List of 20
|
|
..$ AGE : NULL
|
|
..$ ALB : NULL
|
|
..$ ALKPHOS : NULL
|
|
..$ ASCITES : NULL
|
|
..$ BILI : NULL
|
|
..$ CHOL : NULL
|
|
..$ EDEMA : NULL
|
|
..$ EDTRT : NULL
|
|
..$ HEPMEG : NULL
|
|
..$ TIME : NULL
|
|
..$ PLATELET: NULL
|
|
..$ PROTIME : NULL
|
|
..$ SEX : NULL
|
|
..$ SGOT : NULL
|
|
..$ SPIDERS : NULL
|
|
..$ STAGE : NULL
|
|
..$ STATUS : NULL
|
|
..$ TRT : NULL
|
|
..$ TRIG : NULL
|
|
..$ COPPER : NULL
|
|
> d.pbc <- data.frame(pbc)
|
|
> summary(d.pbc)
|
|
AGE ALB ALKPHOS ASCITES
|
|
Min. :26.28 Min. :1.960 Min. : -9 Min. :-9.000
|
|
1st Qu.:42.83 1st Qu.:3.243 1st Qu.: -9 1st Qu.:-9.000
|
|
Median :51.00 Median :3.530 Median : 1009 Median : 0.000
|
|
Mean :50.74 Mean :3.497 Mean : 1478 Mean :-2.225
|
|
3rd Qu.:58.24 3rd Qu.:3.770 3rd Qu.: 1708 3rd Qu.: 0.000
|
|
Max. :78.44 Max. :4.640 Max. :13862 Max. : 1.000
|
|
BILI CHOL EDEMA EDTRT
|
|
Min. : 0.300 Min. : -9.0 Min. :0.0000 Min. :0.0000
|
|
1st Qu.: 0.800 1st Qu.: -9.0 1st Qu.:0.0000 1st Qu.:0.0000
|
|
Median : 1.400 Median : 252.5 Median :0.0000 Median :0.0000
|
|
Mean : 3.221 Mean : 248.2 Mean :0.1196 Mean :0.1005
|
|
3rd Qu.: 3.400 3rd Qu.: 347.8 3rd Qu.:0.0000 3rd Qu.:0.0000
|
|
Max. :28.000 Max. :1775.0 Max. :1.0000 Max. :1.0000
|
|
HEPMEG TIME PLATELET PROTIME SEX
|
|
Min. :-9.0 Min. : 41 Min. : -9.0 Min. : 9.00 Min. :-9.000
|
|
1st Qu.:-9.0 1st Qu.:1093 1st Qu.:181.0 1st Qu.:10.00 1st Qu.:-9.000
|
|
Median : 0.0 Median :1730 Median :248.0 Median :10.60 Median : 1.000
|
|
Mean :-1.9 Mean :1918 Mean :250.0 Mean :10.73 Mean :-1.622
|
|
3rd Qu.: 1.0 3rd Qu.:2614 3rd Qu.:315.5 3rd Qu.:11.10 3rd Qu.: 1.000
|
|
Max. : 1.0 Max. :4795 Max. :721.0 Max. :18.00 Max. : 1.000
|
|
SGOT SPIDERS STAGE STATUS
|
|
Min. : -9.00 Min. :-9.000 Min. :-9.00000 Min. :0.0000
|
|
1st Qu.: -9.00 1st Qu.:-9.000 1st Qu.:-9.00000 1st Qu.:0.0000
|
|
Median : 90.45 Median : 0.000 Median : 3.00000 Median :0.0000
|
|
Mean : 89.20 Mean :-2.067 Mean :-0.01914 Mean :0.3852
|
|
3rd Qu.:135.75 3rd Qu.: 0.000 3rd Qu.: 4.00000 3rd Qu.:1.0000
|
|
Max. :457.25 Max. : 1.000 Max. : 4.00000 Max. :1.0000
|
|
TRT TRIG COPPER
|
|
Min. :-9.000 Min. : -9.0 Min. : -9.00
|
|
1st Qu.:-9.000 1st Qu.: -9.0 1st Qu.: -9.00
|
|
Median : 1.000 Median : 85.0 Median : 50.50
|
|
Mean :-1.167 Mean : 81.2 Mean : 70.09
|
|
3rd Qu.: 2.000 3rd Qu.:127.8 3rd Qu.:100.75
|
|
Max. : 2.000 Max. :598.0 Max. :588.00
|
|
> pbco <- read.spss("pbcold.sav",FALSE)
|
|
> stopifnot(identical(pbc, pbco))
|
|
> ## summary(pbco)
|
|
> ## str(pbco)
|
|
> ## d.pbco <- data.frame(pbco)
|
|
> ## summary(d.pbco)
|
|
> pbc. <- read.spss("pbc.por",FALSE)
|
|
> summary(pbc.)
|
|
Length Class Mode
|
|
AGE 418 -none- numeric
|
|
ALB 418 -none- numeric
|
|
ALKPHOS 418 -none- numeric
|
|
ASCITES 418 -none- numeric
|
|
BILI 418 -none- numeric
|
|
CHOL 418 -none- numeric
|
|
EDEMA 418 -none- numeric
|
|
EDTRT 418 -none- numeric
|
|
HEPMEG 418 -none- numeric
|
|
TIME 418 -none- numeric
|
|
PLATELET 418 -none- numeric
|
|
PROTIME 418 -none- numeric
|
|
SEX 418 -none- numeric
|
|
SGOT 418 -none- numeric
|
|
SPIDERS 418 -none- numeric
|
|
STAGE 418 -none- numeric
|
|
STATUS 418 -none- numeric
|
|
TRT 418 -none- numeric
|
|
TRIG 418 -none- numeric
|
|
COPPER 418 -none- numeric
|
|
> str(pbc.) # has variable.labels
|
|
List of 20
|
|
$ AGE : num [1:418] 58.8 56.4 70.1 54.7 38.1 ...
|
|
$ ALB : num [1:418] 2.6 4.14 3.48 2.54 3.53 3.98 4.09 4 3.08 2.74 ...
|
|
$ ALKPHOS : num [1:418] 1718 7395 516 6122 671 ...
|
|
$ ASCITES : num [1:418] 1 0 0 0 0 0 0 0 0 1 ...
|
|
$ BILI : num [1:418] 14.5 1.1 1.4 1.8 3.4 0.8 1 0.3 3.2 12.6 ...
|
|
$ CHOL : num [1:418] 261 302 176 244 279 248 322 280 562 200 ...
|
|
$ EDEMA : num [1:418] 1 0 1 1 0 0 0 0 0 1 ...
|
|
$ EDTRT : num [1:418] 1 0 0.5 0.5 0 0 0 0 0 1 ...
|
|
$ HEPMEG : num [1:418] 1 1 0 1 1 1 1 0 0 0 ...
|
|
$ TIME : num [1:418] 400 4500 1012 1925 1504 ...
|
|
$ PLATELET: num [1:418] 190 221 151 183 136 -9 204 373 251 302 ...
|
|
$ PROTIME : num [1:418] 12.2 10.6 12 10.3 10.9 11 9.7 11 11 11.5 ...
|
|
$ SEX : num [1:418] 1 1 0 1 1 1 1 1 1 1 ...
|
|
$ SGOT : num [1:418] 137.9 113.5 96.1 60.6 113.2 ...
|
|
$ SPIDERS : num [1:418] 1 1 0 1 1 0 0 0 1 1 ...
|
|
$ STAGE : num [1:418] 4 3 4 4 3 3 3 3 2 4 ...
|
|
$ STATUS : num [1:418] 1 0 1 1 0 1 0 1 1 1 ...
|
|
$ TRT : num [1:418] 1 1 1 1 2 2 2 2 1 2 ...
|
|
$ TRIG : num [1:418] 172 88 55 92 72 63 213 189 88 143 ...
|
|
$ COPPER : num [1:418] 156 54 210 64 143 50 52 52 79 140 ...
|
|
- attr(*, "label.table")=List of 20
|
|
..$ AGE : NULL
|
|
..$ ALB : NULL
|
|
..$ ALKPHOS : NULL
|
|
..$ ASCITES : NULL
|
|
..$ BILI : NULL
|
|
..$ CHOL : NULL
|
|
..$ EDEMA : NULL
|
|
..$ EDTRT : NULL
|
|
..$ HEPMEG : NULL
|
|
..$ TIME : NULL
|
|
..$ PLATELET: NULL
|
|
..$ PROTIME : NULL
|
|
..$ SEX : NULL
|
|
..$ SGOT : NULL
|
|
..$ SPIDERS : NULL
|
|
..$ STAGE : NULL
|
|
..$ STATUS : NULL
|
|
..$ TRT : NULL
|
|
..$ TRIG : NULL
|
|
..$ COPPER : NULL
|
|
- attr(*, "variable.labels")= Named chr [1:20] " " " " " " " " ...
|
|
..- attr(*, "names")= chr [1:20] "AGE" "ALB" "ALKPHOS" "ASCITES" ...
|
|
> stopifnot(all.equal(d.pbc, data.frame(pbc.), tolerance = 1e-15))
|
|
>
|
|
> electric.s <- read.spss(system.file("files", "electric.sav", package = "foreign"), TRUE, TRUE)
|
|
> electric.p <- read.spss("electric.por",TRUE,TRUE)
|
|
> electric.s4 <- read.spss(system.file("files", "electric.sav", package = "foreign"), TRUE, TRUE, max.value.labels = 4)
|
|
> summary(electric.s)
|
|
CASEID FIRSTCHD AGE DBP58
|
|
Min. : 1.00 NO CHD :120 Min. :40.0 Min. : 65.00
|
|
1st Qu.: 73.75 SUDDEN DEATH: 36 1st Qu.:45.0 1st Qu.: 80.00
|
|
Median : 144.50 NONFATALMI : 72 Median :48.0 Median : 87.00
|
|
Mean : 572.94 FATAL MI : 9 Mean :47.8 Mean : 88.79
|
|
3rd Qu.:1042.25 OTHER CHD : 3 3rd Qu.:51.0 3rd Qu.: 96.50
|
|
Max. :2098.00 Max. :54.0 Max. :160.00
|
|
NA's :1
|
|
EDUYR CHOL58 CGT58 HT58
|
|
Min. : 6.00 Min. :106.0 Min. : 0.00 Min. :60.90
|
|
1st Qu.: 9.75 1st Qu.:228.8 1st Qu.: 0.00 1st Qu.:66.50
|
|
Median :12.00 Median :261.0 Median :10.00 Median :68.15
|
|
Mean :11.66 Mean :264.1 Mean :11.58 Mean :68.51
|
|
3rd Qu.:14.00 3rd Qu.:293.2 3rd Qu.:20.00 3rd Qu.:70.20
|
|
Max. :18.00 Max. :515.0 Max. :60.00 Max. :77.00
|
|
NA's :28 NA's :1
|
|
WT58 DAYOFWK VITAL10 FAMHXCVR CHD
|
|
Min. :123.0 SUNDAY : 19 ALIVE:179 NO :178 Min. :0.0
|
|
1st Qu.:156.0 TUESDAY : 19 DEAD : 61 YES: 62 1st Qu.:0.0
|
|
Median :171.0 WEDNSDAY: 17 Median :0.5
|
|
Mean :173.4 SATURDAY: 16 Mean :0.5
|
|
3rd Qu.:187.0 THURSDAY: 15 3rd Qu.:1.0
|
|
Max. :278.0 (Other) : 24 Max. :1.0
|
|
NA's :130
|
|
> ii <- c(2,10)
|
|
> vl <- list(FIRSTCHD = c("OTHER CHD"= 6, "FATAL MI"= 5, "NONFATALMI"= 3,
|
|
+ "SUDDEN DEATH" = 2, "NO CHD" = 1),
|
|
+ DAYOFWK = c(SATURDAY=7, FRIDAY=6, THURSDAY=5,
|
|
+ WEDNSDAY=4, TUESDAY=3, MONDAY=2, SUNDAY=1))
|
|
> stopifnot(identical(electric.s, electric.p),
|
|
+ identical(electric.s[-ii], electric.s4[-ii]),
|
|
+ identical(vl, lapply(electric.s4[ii], attr, "value.labels")),
|
|
+ identical(lapply(vl, names),
|
|
+ lapply(electric.s[ii], function(.) rev(levels(.)))))
|
|
>
|
|
>
|
|
> ## after "long label patch":
|
|
>
|
|
> invisible(Sys.setlocale (locale="C")) ## to resolve locale problem
|
|
> ldat <- read.spss("spss_long.sav", to.data.frame=TRUE)
|
|
> ldat
|
|
variable1 variable2
|
|
1 1 1
|
|
2 2 1
|
|
3 2 3
|
|
> nnms <- nms <- names(ldat)
|
|
> names(nnms) <- nms
|
|
> stopifnot(identical(nms, c("variable1", "variable2")),
|
|
+ identical(nnms, attr(ldat, "variable.labels")))
|
|
>
|
|
>
|
|
> ## some new arkward testcases for problems found in foreign <= 0.8-68 and duplicated value labels in general:
|
|
>
|
|
> ## Expect lots of warnings as value labels (corresponding to R factor labels) are uncomplete,
|
|
> ## and an unsupported long string variable is present in the data
|
|
>
|
|
> setwd(system.file("files", package = "foreign"))
|
|
> sav <- "testdata.sav"
|
|
>
|
|
> x.nodat <- read.spss(file=sav, to.data.frame = FALSE, reencode="UTF-8")
|
|
re-encoding from UTF-8
|
|
Warning messages:
|
|
1: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
testdata.sav: Very long string record(s) found (record type 7, subtype 14), each will be imported in consecutive separate variables
|
|
2: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
Undeclared level(s) -1 added in variable: factor_numeric
|
|
3: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
Duplicated levels in factor factor_n_duplicated: A
|
|
4: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
Undeclared level(s) 2, 3, 4 added in variable: factor_n_undeclared
|
|
5: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
Undeclared level(s) 0, 3 added in variable: factor_n_undeclared2
|
|
6: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
Undeclared level(s) .. added in variable: factor_s_duplicated
|
|
7: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
Duplicated levels in factor factor_s_duplicated: A
|
|
8: In read.spss(file = sav, to.data.frame = FALSE, reencode = "UTF-8") :
|
|
Undeclared level(s) perhaps added in variable: factor_s_undeclared
|
|
> str(x.nodat)
|
|
List of 17
|
|
$ numeric : num [1:5] 1 2 3 NA 3
|
|
$ numeric_long_label : num [1:5] 1 2 3.33 4 NA
|
|
$ factor_numeric : Factor w/ 6 levels "-1","strongly disagree",..: 2 3 4 1 NA
|
|
$ factor_n_long_value_label: Factor w/ 2 levels "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnop",..: 1 2 NA 2 1
|
|
$ factor_n_coded_miss : Factor w/ 6 levels "strongly disagree",..: 1 2 6 5 NA
|
|
$ factor_n_duplicated : Factor w/ 3 levels "A","A_duplicated_2",..: 1 1 2 NA 3
|
|
$ factor_n_undeclared : Factor w/ 5 levels "strongly disagree",..: 1 2 4 3 1
|
|
$ factor_n_undeclared2 : Factor w/ 4 levels "0","yes","no",..: 1 4 NA 1 4
|
|
$ string : chr [1:5] "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum s"| __truncated__ "One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrib"| __truncated__ " "| __truncated__ " "| __truncated__ ...
|
|
$ string_500 : chr [1:5] "A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy "| __truncated__ " "| __truncated__ "Far far away, behind the word mountains, far from the countries Vokalia and Consonantia, there live the blind t"| __truncated__ " "| __truncated__ ...
|
|
$ STRIN0 : chr [1:5] ", my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I s"| __truncated__ " "| __truncated__ "r place and supplies it with the necessary regelialia. It is a paradisematic country, in which roasted parts of"| __truncated__ " "| __truncated__ ...
|
|
$ string_miss : chr [1:5] "a " "c " "b " "g " ...
|
|
$ factor_s_coded_miss : Factor w/ 3 levels "female","male",..: 2 1 NA 2 3
|
|
$ factor_s_duplicated : Factor w/ 4 levels "..","A","A_duplicated_b",..: 2 1 1 NA NA
|
|
$ factor_s_undeclared : Factor w/ 3 levels "no","perhaps",..: 3 1 2 NA 3
|
|
$ factor_s_undeclared2 : Factor w/ 2 levels "no","yes": NA NA NA NA NA
|
|
$ date : num [1:5] 1.27e+10 1.37e+10 1.37e+10 NA NA
|
|
- attr(*, "label.table")=List of 17
|
|
..$ numeric : NULL
|
|
..$ numeric_long_label : NULL
|
|
..$ factor_numeric : Named chr [1:5] "5" "4" "3" "2" ...
|
|
.. ..- attr(*, "names")= chr [1:5] "strongly agree" "agree" "neither agree nor disagree" "disagree" ...
|
|
..$ factor_n_long_value_label: Named chr [1:2] "2" "1"
|
|
.. ..- attr(*, "names")= chr [1:2] "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ! \" # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \\ ] ^ _ ` { | } ~ ..." "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnop"
|
|
..$ factor_n_coded_miss : Named chr [1:6] "99" "5" "4" "3" ...
|
|
.. ..- attr(*, "names")= chr [1:6] "no answer" "strongly agree" "agree" "neither agree nor disagree" ...
|
|
..$ factor_n_duplicated : Named chr [1:3] "3" "2" "1"
|
|
.. ..- attr(*, "names")= chr [1:3] "B" "A" "A"
|
|
..$ factor_n_undeclared : Named chr [1:2] "5" "1"
|
|
.. ..- attr(*, "names")= chr [1:2] "strongly agree" "strongly disagree"
|
|
..$ factor_n_undeclared2 : Named chr [1:2] "2" "1"
|
|
.. ..- attr(*, "names")= chr [1:2] "no" "yes"
|
|
..$ string : NULL
|
|
..$ string_500 : NULL
|
|
..$ STRIN0 : NULL
|
|
..$ string_miss : NULL
|
|
..$ factor_s_coded_miss : Named chr [1:3] "u " "m " "f "
|
|
.. ..- attr(*, "names")= chr [1:3] "unknown" "male" "female"
|
|
..$ factor_s_duplicated : Named chr [1:3] "c " "b " "a "
|
|
.. ..- attr(*, "names")= chr [1:3] "C" "A" "A"
|
|
..$ factor_s_undeclared : Named chr [1:2] "y " "n "
|
|
.. ..- attr(*, "names")= chr [1:2] "yes" "no"
|
|
..$ factor_s_undeclared2 : Named chr [1:2] "y " "n "
|
|
.. ..- attr(*, "names")= chr [1:2] "yes" "no"
|
|
..$ date : NULL
|
|
- attr(*, "variable.labels")= Named chr [1:17] "numeric variable" "numeric variable with long label: this variable hat five observations (one is missing). All values between 1 an"| __truncated__ "numeric factor with missing range" "numeric factor with long value labels" ...
|
|
..- attr(*, "names")= chr [1:17] "numeric" "numeric_long_label" "factor_numeric" "factor_n_long_value_label" ...
|
|
- attr(*, "missings")=List of 17
|
|
..$ numeric :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ numeric_long_label :List of 2
|
|
.. ..$ type : chr "range"
|
|
.. ..$ value: num [1:2] 1 2
|
|
..$ factor_numeric :List of 2
|
|
.. ..$ type : chr "range"
|
|
.. ..$ value: num [1:2] -1 0
|
|
..$ factor_n_long_value_label:List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ factor_n_coded_miss :List of 2
|
|
.. ..$ type : chr "one"
|
|
.. ..$ value: num 99
|
|
..$ factor_n_duplicated :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ factor_n_undeclared :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ factor_n_undeclared2 :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ string :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ string_500 :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ STRIN0 :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ string_miss :List of 2
|
|
.. ..$ type : chr "two"
|
|
.. ..$ value: chr [1:2] "a b " "b "
|
|
..$ factor_s_coded_miss :List of 2
|
|
.. ..$ type : chr "three"
|
|
.. ..$ value: chr [1:3] "u v w \b" "v w \b" "w \b"
|
|
..$ factor_s_duplicated :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ factor_s_undeclared :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ factor_s_undeclared2 :List of 1
|
|
.. ..$ type: chr "none"
|
|
..$ date :List of 1
|
|
.. ..$ type: chr "none"
|
|
- attr(*, "codepage")= int 65001
|
|
>
|
|
> x.sort <- read.spss(file=sav, to.data.frame = TRUE, reencode="UTF-8",
|
|
+ stringsAsFactors=TRUE)
|
|
re-encoding from UTF-8
|
|
Warning messages:
|
|
1: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8", :
|
|
testdata.sav: Very long string record(s) found (record type 7, subtype 14), each will be imported in consecutive separate variables
|
|
2: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8", :
|
|
Duplicated levels in factor factor_n_duplicated: A
|
|
3: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8", :
|
|
Undeclared level(s) 2, 3, 4 added in variable: factor_n_undeclared
|
|
4: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8", :
|
|
Undeclared level(s) 0, 3 added in variable: factor_n_undeclared2
|
|
5: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8", :
|
|
Undeclared level(s) .. added in variable: factor_s_duplicated
|
|
6: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8", :
|
|
Duplicated levels in factor factor_s_duplicated: A
|
|
7: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8", :
|
|
Undeclared level(s) perhaps added in variable: factor_s_undeclared
|
|
> str(x.sort)
|
|
'data.frame': 5 obs. of 17 variables:
|
|
$ numeric : num 1 2 3 NA 3
|
|
$ numeric_long_label : num NA NA 3.33 4 NA
|
|
$ factor_numeric : Factor w/ 5 levels "strongly disagree",..: 1 2 3 NA NA
|
|
$ factor_n_long_value_label: Factor w/ 2 levels "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnop",..: 1 2 NA 2 1
|
|
$ factor_n_coded_miss : Factor w/ 5 levels "strongly disagree",..: 1 2 NA 5 NA
|
|
$ factor_n_duplicated : Factor w/ 3 levels "A","A_duplicated_2",..: 1 1 2 NA 3
|
|
$ factor_n_undeclared : Factor w/ 5 levels "strongly disagree",..: 1 2 4 3 1
|
|
$ factor_n_undeclared2 : Factor w/ 4 levels "0","yes","no",..: 1 4 NA 1 4
|
|
$ string : Factor w/ 4 levels " "| __truncated__,..: 3 4 1 1 2
|
|
$ string_500 : Factor w/ 4 levels " "| __truncated__,..: 2 1 3 1 4
|
|
$ STRIN0 : Factor w/ 4 levels " "| __truncated__,..: 2 1 4 1 3
|
|
$ string_miss : Factor w/ 4 levels " ","a ",..: 2 3 NA 4 1
|
|
$ factor_s_coded_miss : Factor w/ 3 levels "female","male",..: 2 1 NA 2 3
|
|
$ factor_s_duplicated : Factor w/ 4 levels "..","A","A_duplicated_b",..: 2 1 1 NA NA
|
|
$ factor_s_undeclared : Factor w/ 3 levels "no","perhaps",..: 3 1 2 NA 3
|
|
$ factor_s_undeclared2 : Factor w/ 2 levels "no","yes": NA NA NA NA NA
|
|
$ date : num 1.27e+10 1.37e+10 1.37e+10 NA NA
|
|
- attr(*, "variable.labels")= Named chr [1:17] "numeric variable" "numeric variable with long label: this variable hat five observations (one is missing). All values between 1 an"| __truncated__ "numeric factor with missing range" "numeric factor with long value labels" ...
|
|
..- attr(*, "names")= chr [1:17] "numeric" "numeric_long_label" "factor_numeric" "factor_n_long_value_label" ...
|
|
- attr(*, "codepage")= int 65001
|
|
> x.append <- read.spss(file=sav, to.data.frame = TRUE,
|
|
+ add.undeclared.levels = "append", reencode="UTF-8")
|
|
re-encoding from UTF-8
|
|
Warning messages:
|
|
1: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "append", :
|
|
testdata.sav: Very long string record(s) found (record type 7, subtype 14), each will be imported in consecutive separate variables
|
|
2: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "append", :
|
|
Duplicated levels in factor factor_n_duplicated: A
|
|
3: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "append", :
|
|
Undeclared level(s) 2, 3, 4 added in variable: factor_n_undeclared
|
|
4: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "append", :
|
|
Undeclared level(s) 0, 3 added in variable: factor_n_undeclared2
|
|
5: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "append", :
|
|
Undeclared level(s) .. added in variable: factor_s_duplicated
|
|
6: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "append", :
|
|
Duplicated levels in factor factor_s_duplicated: A
|
|
7: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "append", :
|
|
Undeclared level(s) perhaps added in variable: factor_s_undeclared
|
|
> x.no <- read.spss(file=sav, to.data.frame = TRUE,
|
|
+ add.undeclared.levels = "no", reencode="UTF-8")
|
|
re-encoding from UTF-8
|
|
Warning messages:
|
|
1: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "no", :
|
|
testdata.sav: Very long string record(s) found (record type 7, subtype 14), each will be imported in consecutive separate variables
|
|
2: In read.spss(file = sav, to.data.frame = TRUE, add.undeclared.levels = "no", :
|
|
Duplicated levels in factor factor_n_duplicated: A
|
|
>
|
|
> levels(x.sort$factor_n_undeclared)
|
|
[1] "strongly disagree" "2" "3"
|
|
[4] "4" "strongly agree"
|
|
> levels(x.append$factor_n_undeclared)
|
|
[1] "strongly disagree" "strongly agree" "2"
|
|
[4] "3" "4"
|
|
> str(x.no$factor_n_undeclared)
|
|
num [1:5] 1 2 4 3 1
|
|
- attr(*, "value.labels")= Named chr [1:2] "5" "1"
|
|
..- attr(*, "names")= chr [1:2] "strongly agree" "strongly disagree"
|
|
>
|
|
>
|
|
> ### Examples for duplicated.value.labels:
|
|
> ## duplicated.value.labels = "append" (default)
|
|
> x.append <- read.spss(file=sav, to.data.frame=TRUE, reencode="UTF-8")
|
|
re-encoding from UTF-8
|
|
Warning messages:
|
|
1: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8") :
|
|
testdata.sav: Very long string record(s) found (record type 7, subtype 14), each will be imported in consecutive separate variables
|
|
2: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8") :
|
|
Duplicated levels in factor factor_n_duplicated: A
|
|
3: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8") :
|
|
Undeclared level(s) 2, 3, 4 added in variable: factor_n_undeclared
|
|
4: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8") :
|
|
Undeclared level(s) 0, 3 added in variable: factor_n_undeclared2
|
|
5: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8") :
|
|
Undeclared level(s) .. added in variable: factor_s_duplicated
|
|
6: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8") :
|
|
Duplicated levels in factor factor_s_duplicated: A
|
|
7: In read.spss(file = sav, to.data.frame = TRUE, reencode = "UTF-8") :
|
|
Undeclared level(s) perhaps added in variable: factor_s_undeclared
|
|
> ## duplicated.value.labels = "condense"
|
|
> x.condense <- read.spss(file=sav, to.data.frame=TRUE,
|
|
+ duplicated.value.labels = "condense", reencode="UTF-8")
|
|
re-encoding from UTF-8
|
|
Warning messages:
|
|
1: In read.spss(file = sav, to.data.frame = TRUE, duplicated.value.labels = "condense", :
|
|
testdata.sav: Very long string record(s) found (record type 7, subtype 14), each will be imported in consecutive separate variables
|
|
2: In read.spss(file = sav, to.data.frame = TRUE, duplicated.value.labels = "condense", :
|
|
Duplicated levels in factor factor_n_duplicated: A
|
|
3: In read.spss(file = sav, to.data.frame = TRUE, duplicated.value.labels = "condense", :
|
|
Undeclared level(s) 2, 3, 4 added in variable: factor_n_undeclared
|
|
4: In read.spss(file = sav, to.data.frame = TRUE, duplicated.value.labels = "condense", :
|
|
Undeclared level(s) 0, 3 added in variable: factor_n_undeclared2
|
|
5: In read.spss(file = sav, to.data.frame = TRUE, duplicated.value.labels = "condense", :
|
|
Undeclared level(s) .. added in variable: factor_s_duplicated
|
|
6: In read.spss(file = sav, to.data.frame = TRUE, duplicated.value.labels = "condense", :
|
|
Duplicated levels in factor factor_s_duplicated: A
|
|
7: In read.spss(file = sav, to.data.frame = TRUE, duplicated.value.labels = "condense", :
|
|
Undeclared level(s) perhaps added in variable: factor_s_undeclared
|
|
>
|
|
> levels(x.append$factor_n_duplicated)
|
|
[1] "A" "A_duplicated_2" "B"
|
|
> levels(x.condense$factor_n_duplicated)
|
|
[1] "A" "B"
|
|
>
|
|
> as.numeric(x.append$factor_n_duplicated)
|
|
[1] 1 1 2 NA 3
|
|
> as.numeric(x.condense$factor_n_duplicated)
|
|
[1] 1 1 1 NA 2
|
|
>
|
|
> ### ToDo:
|
|
> ## Long Strings (>255 chars) are imported in consecutive separate variables
|
|
> ## (see warning about subtype 14)
|
|
> ## we should get that right in the import function in future versions
|
|
> x <- read.spss(file=sav, to.data.frame=TRUE, stringsAsFactors=FALSE, reencode="UTF-8")
|
|
re-encoding from UTF-8
|
|
Warning messages:
|
|
1: In read.spss(file = sav, to.data.frame = TRUE, stringsAsFactors = FALSE, :
|
|
testdata.sav: Very long string record(s) found (record type 7, subtype 14), each will be imported in consecutive separate variables
|
|
2: In read.spss(file = sav, to.data.frame = TRUE, stringsAsFactors = FALSE, :
|
|
Duplicated levels in factor factor_n_duplicated: A
|
|
3: In read.spss(file = sav, to.data.frame = TRUE, stringsAsFactors = FALSE, :
|
|
Undeclared level(s) 2, 3, 4 added in variable: factor_n_undeclared
|
|
4: In read.spss(file = sav, to.data.frame = TRUE, stringsAsFactors = FALSE, :
|
|
Undeclared level(s) 0, 3 added in variable: factor_n_undeclared2
|
|
5: In read.spss(file = sav, to.data.frame = TRUE, stringsAsFactors = FALSE, :
|
|
Undeclared level(s) .. added in variable: factor_s_duplicated
|
|
6: In read.spss(file = sav, to.data.frame = TRUE, stringsAsFactors = FALSE, :
|
|
Duplicated levels in factor factor_s_duplicated: A
|
|
7: In read.spss(file = sav, to.data.frame = TRUE, stringsAsFactors = FALSE, :
|
|
Undeclared level(s) perhaps added in variable: factor_s_undeclared
|
|
>
|
|
> cat.long.string <- function(x, w=70) cat(paste(strwrap(x, width=w), "\n"))
|
|
>
|
|
> ## first part: x$string_500:
|
|
> cat.long.string(x$string_500)
|
|
A wonderful serenity has taken possession of my entire soul, like
|
|
these sweet mornings of spring which I enjoy with my whole heart. I
|
|
am alone, and feel the charm of existence in this spot, which was
|
|
created for the bliss of souls like mine. I am so happy
|
|
|
|
Far far away, behind the word mountains, far from the countries
|
|
Vokalia and Consonantia, there live the blind texts. Separated they
|
|
live in Bookmarksgrove right at the coast of the Semantics, a large
|
|
language ocean. A small river named Duden flows by thei
|
|
|
|
abc def ghi jkl mno pqrs tuv wxyz ABC DEF GHI JKL MNO PQRS TUV WXYZ
|
|
!".. $%& /() =?* '<> #|; ....~ @`.. ...... ...... {} abc def ghi jkl
|
|
mno pqrs tuv wxyz ABC DEF GHI JKL MNO PQRS TUV WXYZ !".. $%& /() =?*
|
|
'<> #|; ....~ @`.. ...... ...... {} abc def ghi j
|
|
> ## second part: x$STRIN0:
|
|
> cat.long.string(x$STRIN0)
|
|
, my dear friend, so absorbed in the exquisite sense of mere tranquil
|
|
existence, that I neglect my talents. I should be incapable of
|
|
drawing a single stroke at the present moment; and yet I feel that I
|
|
never was a greater artist than now.
|
|
|
|
r place and supplies it with the necessary regelialia. It is a
|
|
paradisematic country, in which roasted parts of sentences fly into
|
|
your mouth.
|
|
|
|
kl mno pqrs tuv wxyz ABC DEF GHI JKL MNO PQRS TUV WXYZ !".. $%& /()
|
|
=?* '<> #|; ....~ @`.. ...... ...... {} abc def ghi jkl mno pqrs tuv
|
|
wxyz ABC DEF GHI JKL MNO PQRS TUV WXYZ !".. $%& /() =?* '<> #|; ....~
|
|
@`.. ...... ...... {} abc def ghi jkl
|
|
> ## complete long string:
|
|
> long.string <- apply(x[,c("string_500", "STRIN0")], 1, paste, collapse="")
|
|
> cat.long.string(long.string)
|
|
A wonderful serenity has taken possession of my entire soul, like
|
|
these sweet mornings of spring which I enjoy with my whole heart. I
|
|
am alone, and feel the charm of existence in this spot, which was
|
|
created for the bliss of souls like mine. I am so happy, my dear
|
|
friend, so absorbed in the exquisite sense of mere tranquil
|
|
existence, that I neglect my talents. I should be incapable of
|
|
drawing a single stroke at the present moment; and yet I feel that I
|
|
never was a greater artist than now.
|
|
|
|
Far far away, behind the word mountains, far from the countries
|
|
Vokalia and Consonantia, there live the blind texts. Separated they
|
|
live in Bookmarksgrove right at the coast of the Semantics, a large
|
|
language ocean. A small river named Duden flows by their place and
|
|
supplies it with the necessary regelialia. It is a paradisematic
|
|
country, in which roasted parts of sentences fly into your mouth.
|
|
|
|
abc def ghi jkl mno pqrs tuv wxyz ABC DEF GHI JKL MNO PQRS TUV WXYZ
|
|
!".. $%& /() =?* '<> #|; ....~ @`.. ...... ...... {} abc def ghi jkl
|
|
mno pqrs tuv wxyz ABC DEF GHI JKL MNO PQRS TUV WXYZ !".. $%& /() =?*
|
|
'<> #|; ....~ @`.. ...... ...... {} abc def ghi jkl mno pqrs tuv wxyz
|
|
ABC DEF GHI JKL MNO PQRS TUV WXYZ !".. $%& /() =?* '<> #|; ....~ @`..
|
|
...... ...... {} abc def ghi jkl mno pqrs tuv wxyz ABC DEF GHI JKL
|
|
MNO PQRS TUV WXYZ !".. $%& /() =?* '<> #|; ....~ @`.. ...... ......
|
|
{} abc def ghi jkl
|
|
>
|
|
> proc.time()
|
|
user system elapsed
|
|
0.345 0.055 0.391
|