R语言中的read.table()

时间:2016-06-05 15:12:19   收藏:0   阅读:2496
参考资料:http://www.cnblogs.com/xianghang123/archive/2012/06/06/2538274.html

read.table(file, header = FALSE, sep = "", quote = "\"‘",
           dec = ".", numerals = c("allow.loss", "warn.loss", "no.loss"),
           row.names, col.names, as.is = !stringsAsFactors,
           na.strings = "NA", colClasses = NA, nrows = -1,
           skip = 0, check.names = TRUE, fill = !blank.lines.skip,
           strip.white = FALSE, blank.lines.skip = TRUE,
           comment.char = "#",
           allowEscapes = FALSE, flush = FALSE,
           stringsAsFactors = default.stringsAsFactors(),
           fileEncoding = "", encoding = "unknown", text, skipNul = FALSE)

 

 

技术分享

women1.txt

name	height	weight	tmp
stu1	58	115	1.1
stu2	59	117	1.2
stu3	60	120	1.3
stu4	61	123	1.4
stu5	62	126	1.5
stu6	63	129	1.6
stu7	64	132	1.7
stu8	65	135	1.8
stu9	66	139	1.9
stu10	67	142	2
stu11	68	146	2.1
stu12	69	150	2.2
stu13	70	154	2.3
stu14	71	159	2.4
stu15	72	164	2.5

 

## 基本参数
dataset1 <- read.table("./women1.txt", header = T, sep = "\t")
head(dataset1)

##   name height weight tmp
## 1 stu1     58    115 1.1
## 2 stu2     59    117 1.2
## 3 stu3     60    120 1.3
## 4 stu4     61    123 1.4
## 5 stu5     62    126 1.5
## 6 stu6     63    129 1.6

dataset1$name

##  [1] stu1  stu2  stu3  stu4  stu5  stu6  stu7  stu8  stu9  stu10 stu11
## [12] stu12 stu13 stu14 stu15
## 15 Levels: stu1 stu10 stu11 stu12 stu13 stu14 stu15 stu2 stu3 ... stu9

class(dataset1$name)

## [1] "factor"

is.factor(dataset1$name)

## [1] TRUE

dataset1 <- read.table("./women1.txt", header = T, sep = "\t", as.is = T)
head(dataset1)

##   name height weight tmp
## 1 stu1     58    115 1.1
## 2 stu2     59    117 1.2
## 3 stu3     60    120 1.3
## 4 stu4     61    123 1.4
## 5 stu5     62    126 1.5
## 6 stu6     63    129 1.6

dataset1$name

##  [1] "stu1"  "stu2"  "stu3"  "stu4"  "stu5"  "stu6"  "stu7"  "stu8" 
##  [9] "stu9"  "stu10" "stu11" "stu12" "stu13" "stu14" "stu15"

class(dataset1$name)

## [1] "character"

is.factor(dataset1$name)

## [1] FALSE

## skip = 0 跳过文件的前n行(skip = n)
dataset2 <- read.table("./women1.txt", header = T, sep = "\t", skip = 3)
head(dataset2)

##   stu3 X60 X120 X1.3
## 1 stu4  61  123  1.4
## 2 stu5  62  126  1.5
## 3 stu6  63  129  1.6
## 4 stu7  64  132  1.7
## 5 stu8  65  135  1.8
## 6 stu9  66  139  1.9

dataset2 <- read.table("./women1.txt", header = F, sep = "\t", skip = 3)
head(dataset2)

##     V1 V2  V3  V4
## 1 stu3 60 120 1.3
## 2 stu4 61 123 1.4
## 3 stu5 62 126 1.5
## 4 stu6 63 129 1.6
## 5 stu7 64 132 1.7
## 6 stu8 65 135 1.8

## nrows = -1 最大读入行数,“-1”表示都读入
dataset3 <- read.table("./women1.txt", header = T, sep = "\t", nrows = 3)
head(dataset3)

##   name height weight tmp
## 1 stu1     58    115 1.1
## 2 stu2     59    117 1.2
## 3 stu3     60    120 1.3

dataset3 <- read.table("./women1.txt", header = F, sep = "\t", nrows = 3)
head(dataset3)

##     V1     V2     V3  V4
## 1 name height weight tmp
## 2 stu1     58    115 1.1
## 3 stu2     59    117 1.2

## 指定行名
dataset4 <- read.table("./women1.txt", header = T, sep = "\t", row.names = 1) # **表中第一行一列元素被跳过**
head(dataset4)

##      height weight tmp
## stu1     58    115 1.1
## stu2     59    117 1.2
## stu3     60    120 1.3
## stu4     61    123 1.4
## stu5     62    126 1.5
## stu6     63    129 1.6

row.names(dataset4)

##  [1] "stu1"  "stu2"  "stu3"  "stu4"  "stu5"  "stu6"  "stu7"  "stu8" 
##  [9] "stu9"  "stu10" "stu11" "stu12" "stu13" "stu14" "stu15"

 

技术分享

women2.txt

\ 这是一些简单的测试数据

name	height	weight	tmp
/stu1/	58	115	1*1
/stu2/	59	117	1*2
/stu3/	60	""	1*3\注意:空格处经测试必须引起来,
					\否则会出先error“……line 3 did not have 4 elements”
/stu4/	61	123	1*4
/stu5/	62	NO	1*5
/stu6/	NO	NO	1*6
/stu7/	64	132	1*7
/stu8/	65	135	1*8
/stu9/	66	139	1*9
/stu10/	NA	NA	2*0
/stu11/	68	146	2*1
/stu12/	69	150	2*2
/stu13/	70	154	2*3
/stu14/	71	159	2*4
/stu15/	72	164	2*5

 

## dec = “.” 指定小数点数;na.strings = “NA” 指定什么样的字符表示值缺少;comment.char 只能设定一个
data1 <- read.table("./women2.txt", header = T, dec = "*", na.strings = c("", "NA", "NO"), comment.char = "\\")
head(data1)

##     name height weight tmp
## 1 /stu1/     58    115 1.1
## 2 /stu2/     59    117 1.2
## 3 /stu3/     60     NA 1.3
## 4 /stu4/     61    123 1.4
## 5 /stu5/     62     NA 1.5
## 6 /stu6/     NA     NA 1.6

sapply(data1[1:6,], is.na)

##       name height weight   tmp
## [1,] FALSE  FALSE  FALSE FALSE
## [2,] FALSE  FALSE  FALSE FALSE
## [3,] FALSE  FALSE   TRUE FALSE
## [4,] FALSE  FALSE  FALSE FALSE
## [5,] FALSE  FALSE   TRUE FALSE
## [6,] FALSE   TRUE   TRUE FALSE

sapply(data1, class)

##      name    height    weight       tmp 
##  "factor" "integer" "integer" "numeric"

# quote的设定
data1 <- read.table("./women2.txt", header = T, dec = "*", na.strings = c("", "NA", "NO"), comment.char = "\\", quote = "/", as.is = F)
head(data1)

##   name height weight tmp
## 1 stu1     58    115 1.1
## 2 stu2     59    117 1.2
## 3 stu3     60     "" 1.3
## 4 stu4     61    123 1.4
## 5 stu5     62   <NA> 1.5
## 6 stu6     NA   <NA> 1.6

sapply(data1, class)

##      name    height    weight       tmp 
##  "factor" "integer"  "factor" "numeric"

 

评论(0
© 2014 mamicode.com 版权所有 京ICP备13008772号-2  联系我们:gaon5@hotmail.com
迷上了代码!