R软件基础知识与实战.doc_第1页
R软件基础知识与实战.doc_第2页
R软件基础知识与实战.doc_第3页
R软件基础知识与实战.doc_第4页
R软件基础知识与实战.doc_第5页
已阅读5页,还剩49页未读 继续免费阅读

下载本文档

版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领

文档简介

R软件基础知识与实战第一课:基本知识第二课:基本运算第三课:画图第四课:数据整理和数据分析第五课:统计分析第六课:分布的产生第七课:求导和极值的计算第八课:Bootstrap方法第九课:MCMC算法第一课 基本知识一 R的历史与特点Object-oriented 语言二 R系统介绍安装镜像Installing packages library(survival) # 加载程序包library(help=survival) # 显示程序包 survival 里的内容help(mean) #查看关于函数 mean 的帮助search() # 给出系统里已加载的程序包searchpaths() # 给出系统里已加载的程序包的路径Commands are separated either by semicolon or a new line三 Data objects# There are 7 basic types of data objects:# Vector, Matrix, Array, List, Factor, Time series, Data frame# creating vectors# vector type: character, numeric, integer, logical, complexmy1-c(1.2,2.3,3.5,4.6,5.7)my2-c(Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday)rep(NA,5)rep(c(T,T,F),2)rep(c(yes,no),c(4,2)1:51.2:41:-1seq(-pi,pi, .5)seq(-pi,pi, length=10)seq(1,by=.05, length=10)a1 = vector(logical,3) a2 = numeric(4); a3 = complex(5); a4 = character(6)#whats the difference between ?mat1 - rep(1:4,rep(3,4)mat2 - rep(1:4,3)scan()scan(what=) #扫描字符型变量numbered.letters - lettersnames(numbered.letters) - paste(“obs”,1:26,sep=”)# generating matrixmat1 - matrix(1:12, ncol=3, byrow=T)dimnames(mat1) = list(c(row 1,row 2,row 3,row 4),c(col 1,col 2,col 3)mat2 - matrix(1:12, ncol=4, dimnames=list(NULL,paste(col,1:4)mat - rep(1:4,rep(3,4)dim(mat) - c(3,4)dimnames(mat) - list(paste(row,letters1:3),paste(col,LETTERS1:4)rbind(c(200688,24,33),c(201083,27,115)dim(mat)# arrayarray(c(1:8,11:18,111:118),dim=c(2,4,3)#listattributes(iris)grp - c(rep(1,11),rep(2,10)thw - c(450,760,325,495,285,450,460,375,310,245,350,340,300,310,270,300,360,405,290)heart.list - list(group=grp, thw=thw, descrip=heart data)names(heart.list)names(heart.list) - c(group,total heart weight, descrip)# factorsclasslist - c(male, female, male, male, male,female, female, male, female)classlist = factor(classlist) factor(c(Hi,Lo,Med,Hi,Hi,Lo),levels=c(Lo,Hi),labels=c(LowDose,HighDose)cut(state.x77,Murder, breaks=c(0,8,16)cut(state.x77,Murder, breaks=2)cut(state.x77,Murder, c(0,8,16), labels=c(Low,High)# data framescars = read.table(D:/93cars.txt, header=TRUE)cars1 = as.matrix(cars)cars1a = 1:9b = letters1:9classlist - c(male,female,male,male,male,female,male,male,male)classlist = factor(classlist)all = data.frame(a,b, classlist)is.factor(all$classlist) #每列保持了它的属性my.logical - sample(c(T,F), size=20, replace=T)plex - rnorm(20) + runif(20)*1i my.numeric - rnorm(20)my.matrix - matrix(rnorm(40), ncol=2)my.df1 = cbind(my.logical, plex, my.numeric, my.matrix) #是一个矩阵my.df = data.frame(my.logical, plex, my.numeric, my.matrix) #要用指令data.framemy.df 5/0Inf 0/0NaN # not a number a = c(1,2,3,NA,4) # NA represents missing value mean(a)1 NAa = c(asia,da,NA) # NA 也可以代表字符型的missing valuea = c(T,F,T,T, NA)# NULL 代表一个空值a = NULL # a is a empty vectorfor( i in 1: 10) ai = i2# testing and coercingletter = factor (letters) is.vector(letter) # test whether letter is a vector1 FALSE letter = as.vector(letter) # coerce letter into a vectoris.matrix()as.matrix()is.data.frame()as.data.frame()is.list()as.list()write.table(lizard, E:/R/new.Rdata)new = read.table(E:/R/new.Rdata)save(a,lizard,new x, file=E:/R/mydump.rda)load(file=E:/R/mydump.rda)sink(E:/R/new22.txt) lizard ls() sink()四 Data manipulation逻辑运算符号符号意义= =等于!=不等于=大于等于大于小于= 30 # 一个逻辑值向量myseqmyseq =30 # 给出myseq里大于等于30的所有值 x 2x x 2 & x 4x c(F,F,T)char = c(monday,tuesday,wednesday,thursday,friday)names(char) = c(v1,v2,v3,v4,v5)charv1 # 取出名为v1的元素char # 显示所有元素char8 # 指标出界state.abbstate.abbAnames(state.abb) - state.abbAlaska grep(So*, ) # indexing data frames, matrices and listsJ = matrix( c(12, 15, 6 ,10, 2, 9, 2, 7, 19, 14 ,11, 19), nrow=3, byrow=TRUE)J9J c(1,3), c(2,4) J , c(2,4) J -1, c(2,4) J1,3, drop=F state.x77 # Data sets related to the 50 states of the United States of Americahelp(state.x77)illit - state.x771:50, 3 #美国1970年的文盲率illit 2, 3:5state.x77Arizona, Area # Arizona 州的面积stack.xc(F,F,T), grep(Wa*, dimnames(stack.x)2), drop=Fstate = data.frame(state.x77)dimnames(state)state$life.exp #和 state.x77,Life Exp 比较heart.list$groupheart.list1# combining data frames or matricesa = matrix(1:20, ncol = 5)b = matrix(1:60, ncol = 5)d = matrix(1:72, nrow = 12) c1 = rbind(a,b)c2 = cbind(b,d)# combining data framesstate_no = 1:50state = data.frame(state,state_no) # sortinga = 1:50sort(a)sort(a, decreasing=TRUE)a = round( rnorm(20)order(a)aa = stateorder(state$Area, state$Population, state$Income, decreasing=TRUE) 第二课 基本运算一 Vectorized calculation数学运算符号意义+加法-减法*乘法/除法 或 *幂次%*%矩阵相乘J = matrix(1:10,2,5)sqrt(J)J+3a - 5*(0:3)x - c(2,4,6,8,10)y - 1:5z = (3*x2+2*y)/(x+y)*(x-y)(0.5)x = 1:10x 4x 2 & x 1) & all( y 1) & any (y 0 )all( x 7# Pearson chi-squared Statistic:X2 = (f_ij e_ij)2 / e_ij , e_ij = f_i.f_.j/f_.f = matrix(1:20, ncol=4)fi. = f %*% rep(1,ncol(f) # 计算由列联表f得到的Pearson卡方统计量:f.j = rep(1,nrow(f) %*% fe - (fi. %*% f.j)/sum(fi.)x2 = sum(f-e)2/e) 二apply and tapply O - Orangecol_mean = apply(O,2:3, 2, mean)a = array(1:100, c(2,5,10) apply(a, c(1,2), mean)attach(iris) tapply(iris,1, Species, mean)三. sapply的应用x - list(a=1:10,beta=exp(-3:3),logic=c(TRUE,FALSE,TRUE,FALSE)lapply(x,mean) #将函数mean应用于x的各个分量lapply(1:10, sqrt) # lapply 也可用于向量lapply(1:10, function(x) x2) # 将 lapply 用于自定义的函数# sapply(x, fun, ., simplify = TRUE) #运算的结果为矩阵sapply(1:10, sqrt) # 结果为向量 sapply(1:10, function(x) x2) sample(10) # 数列1:10的随机置换# 如何产生m个大小为n的置换,并将结果存入一个mxn的矩阵?sapply(1:7, function(o) sample(10)t(sapply(1:7,function(o) sample(10)X- function(m,n) t(sapply(1:m,function(o) sample(n) #o 哑变量,t 表示转置四. 在一个n个元素的随机置换中,有至少一个不动点的概率是多少? Y0 #包含Y的大于零的元素的子向量 YY=0 # 不动点 length(YY=0) #不动点的个数 fixed.points - function(m,n) sapply(1:m, function(o) Z - sample(n) - 1:n; length(ZZ=0) ) smry -function(X) mu = mean(X); sigma = sd(X); a = min(X); b = max(X) q25 = quantile(X,0.25); q50 = quantile(X,0.50) q75 = quantile(X,0.75) c(mu=mu,sigma=sigma,min=a,q25=q25,q50=q50,q75=q75,max=b) FP -fixed.points(2500,10) dim(FP) - c(2500,1) apply(FP,2,smry) t( apply(FP,2,smry)五.比较处理向量的两种方法循环语句a = NULLfor ( i in 1:index) ai = #函数的格式# - function(x) fixed.points.fast -function(m,n) sapply(1:m, function(o) Z-sample(n) - 1:n; length(ZZ=0) time.start-proc.time()FP.fast-fixed.points.fast(1000000,10)time.used- proc.time()-time.startcat(User time elapsed (fast):,time.used1, n)fixed.points.slow - function(m,n) FP - rep(0,m) #gives a zero vector of length m for (j in 1:m) W - sample(n) k - 0 for ( i in 1:n) if (Wi=i) k - k+1 FPj - k FP time.start-proc.time()FP.slow-fixed.points.slow(1000000,10)time.used- proc.time()-time.startcat(User time elapsed (slow):,time.used1, n)#the fast approach takes only 60% as long as the slow approach六. 单位正方形上的凸型CL - function(o) floor(657*runif(1)+1 # random colork - 1:10 sum(k) # quick way to add all integers from 1 to 10v - sqrt(1:10)sum(v) # quick way to add the square roots of 1 to 10sum(vk) # another quick way to do the same (but why do this?)v - c(0,v) # prepends the vector v with a 0vvk+1-vk # the vector of successive differences of square rootssum(vk+1-vk)2) # sum of squared successive differences cnvx - function(n) X - matrix(runif(2*n), ncol=2) x11(width=5, height=5.5) plot(X, xlim=0:1, ylim=0:1, pch=22) h - chull(X) h - c(h, h1) lines(Xh, ,col=CL(),lwd=3) cnvx(12)cnvx(12)cnvx(12)# 单位圆上的凸型rdisk - function(q) R - matrix(runif(10000), ncol=1) #产生单位圆上的点的半径 Theta - matrix(runif(10000,0,2*pi), ncol=1) #产生单位圆上的点的夹角 X - cbind(Rq*cos(Theta),Rq*sin(Theta) x11(width=5,height=5.5) plot(X,xlim=c(-1,1),ylim=c(-1,1),pch=.) upp.crcl - function(x) sqrt(1-x2) low.crcl - function(x) -sqrt(1-x2) curve(upp.crcl,from=-1,to=1,add=T) curve(low.crcl,from=-1,to=1,add=T) rdisk(1) # too much weight at center rdisk(1/2) # uniformly distributed (can be proved via calculus) rdisk(1/4) # too much weight at boundarycnvx - function(n) # convex hull of n points in the unit disk R - matrix(runif(n), ncol=1) Theta - matrix(runif(n,0,2*pi), ncol=1) X - cbind(sqrt(R)*cos(Theta),sqrt(R)*sin(Theta) x11(width=5,height=5.5) plot(X,xlim=c(-1,1),ylim=c(-1,1),pch=22) h - chull(X); h - c(h, h1); lines(Xh, ,col=CL(),lwd=3) upp.crcl - function(x) sqrt(1-x2) low.crcl - function(x) -sqrt(1-x2) curve(upp.crcl,from=-1,to=1,add=T) curve(low.crcl,from=-1,to=1,add=T) cnvx(12) 第三课 画图一常用统计作图# plotx - seq(-5, 5, 1)y - x2plot(x, y, pch=X, main=Main Title, sub=Subtitle, xlab=X Axis Label, ylab=Y Axis Label, xlim=c(-8, 8), type=o, lty=2)plot(x, y, axes=F, type=b, pch=x, xlab=, ylab=)axis(1, at=c(0, 1, 2, pi, 4, 5, 2*pi), labels=c(0, 1, 2, Pi, 4, 5, 2 Pi), pos=0)axis(2, at=c(-1, -0.5, 0, 0.25, 0.5, 0.75, 1), adj=1)abline(h=c(-1, -0.5, 0.5, 1), lty=3)text(pi, 0.1, sin(pi)=0, adj=0)title(The sine functionnfrom 0 to 2 Pi)x = -5:5 # generate 5 4 . 3 4 5y = x2 # y equals x squaredpar(mfrow=c(3, 2) # set a multiple figure screenplot(x, y) # and create the graphsplot(x, y, type = l) # in different stylesplot(x, y, type = b)plot(x, y, type = h)plot(x, y, type = o)plot(x, y, type = n)mtext(Different options for the plot parameter type, side=3, outer=T, line=-1)par(mfrow=c(1,1)plot(1:100, sin(1:100/10)plot(x - sort(rnorm(47), type = s, main = plot(x, type = s)points(x, cex = .5, col = dark red)plot(cars) # 以 cars 的第一列为x向量,第二列为y向量lines(lowess(cars) # 使用 locally weighted polynomial regression得到的平滑曲线# 在现有图上加图形的指令abline(a, b) # Line with intercept a and slope babline(h = c) # vertical lineabline(v = c) # horizontal lineabline(lmobject,.)arrows(x1, y1, x2, y2) #Arrows from (x1, y1) to (x2, y2)points(x, y) # Points at the coordinates given by x and ylines(x, y) #Lines through the points given by x and ypolygon(x, y) #Shaded polygon figuresegments(x1, y1, x2, y2) #Disconnected line segments from (x1, y1) to (x2, y2)# barplotAirPassengersair = matrix(AirPassengers, 12, 12) dimnames(air)2 = paste(19, 49:60, sep=)barplot(air, names = colnames(air), main=Monthly Airline Passenger Numbers 1949-1960)barplot(air, names = colnames(air), angle=c(45,135), density=10, col = 1) # different style# boxplotboxplot(air,9, names = 1957) par(mfrow=c(2,1)boxplot(air,9, names = 1957) hist(air,9)boxplot(air, names = colnames(air) # histlibrary(ISwR)data(energy)attach(energy)expend.lean = expendstature = leanexpend.obese = expendstature = obesepar(mfrow=c(2,1)hist(expend.lean, breaks = 10, xlim = c(5,13), ylim=c(0,4), col = white )hist(expend.obese, breaks = 10, xlim = c(5,13), ylim=c(0,4), col = grey )boxplot(expend stature)二.更复杂的作图# contour# 二元标准正态的等高线图x = seq(-5,5, length=50)y = xf = function(x,y) exp(-0.5*x2 - 0.5*y2)z = outer(x, x, f)contour(x, y, z, levels=seq(min(z),max(z),length=10)f = function(x,y) exp(-0.5*x2 - 0.5*y2 - 0.8*x*y)z = outer(x, x, f)contour(x, y, z, levels=seq(min(z),max(z),length=10)x = seq(-5,5, length=50)f = function(x,y) exp(-0.5*x2 - 0.5*y2)z = outer(x, x, f)persp(z)persp(z, d = 5, theta = -160, phi = 30)f = function(x,y) exp(-0.5*x2 - 0.5*y2 - 0.8*x*y)z = outer(x, x, f)persp(z)persp(z, d = 5, theta = -160, phi = 30)三.一个硬币的赌博假设一个游戏有如下规则:1.不断掷一个均匀的硬币直到正面和反面的差的绝对值为3。这时一局结束;2.如果你决定参加游戏,则每次你掷硬币时你需要付一元钱。你不允许中途退出;3.每次游戏结束时你得到8元钱。问你的期望收入是多少?n - 120; x - rbinom(n,1,0.5) cumsum(x)1:n-cumsum(x) # n次掷硬币所得到的反面的个数abs(1:n-2*cumsum(x) # n次掷硬币中正面与反面的绝对值d - abs(1:n-2*cumsum(x)which(d=3) # 绝对值等于三的min(c(which(d=3),Inf) # 绝对值等于三的最小的rnd.trials - function(m,n) sapply(1:m, function(o) D - abs(1:n-2*cumsum(rbinom(n,1,0.5) min(c(which(D=3),Inf) ) TM - rnd.

温馨提示

  • 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
  • 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
  • 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
  • 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
  • 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
  • 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
  • 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

评论

0/150

提交评论