par(op)
##################################################[#################################################]
## largish dataset example with user defined knots[#稍大的数据例如用户自定义节] ##################################################[#################################################]
par(mfrow=c(2,2))
eg <- gamSim(2,n=10000,scale=.5) attach(eg)
ind<-sample(1:10000,1000,replace=FALSE) b5<-gam(y~s(x,z,k=50),data=data,
knots=list(x=data$x[ind],z=data$z[ind])) ## various visualizations[#各种可视化]
vis.gam(b5,theta=30,phi=30) plot(b5)
plot(b5,scheme=1,theta=50,phi=20) plot(b5,scheme=2)
par(mfrow=c(1,1))
## and a pure \#一个纯粹的“结”样条相同的数据] b6<-gam(y~s(x,z,k=100),data=data,knots=list(x= rep((1:10-0.5)/10,10), z=rep((1:10-0.5)/10,rep(10,10)))) vis.gam(b6,color=\
## varying the default large dataset behaviour via `xt'[#XT通过改变默认的大型数据集的行为“]
b7 <- gam(y~s(x,z,k=50,xt=list(max.knots=1000,seed=2)),data=data) vis.gam(b7,theta=30,phi=30) detach(eg)
################################################################[################################################## #############]
## Approximate large dataset logistic regression for rare events[#约大数据集的logistic回归罕见的事件]
## based on subsampling the zeroes, and adding an offset to[#进行二次取样的零,并加一个偏移量的基础上]
## approximately allow for this.[#约允许。]
## Doing the same thing, but upweighting the sampled zeroes[#做同样的事情,但
upweighting采样的零]
## leads to problems with smoothness selection, and CIs.[#平滑的选择和独联体的问题。] ################################################################[################################################## #############] n <- 100000 ## simulate n data [#模拟n个数据]
dat <- gamSim(1,n=n,dist=\
p <- binomial()$linkinv(dat$f-6) ## make 1's rare[#1的罕见]
dat$y <- rbinom(p,1,p) ## re-simulate rare response[重新模拟罕见的反应]
## Now sample all the 1's but only proportion S of the 0's[#现在都1的,但只有比例S的0]
S <- 0.02 ## sampling fraction of zeroes[#取样部分的零] dat <- dat[dat$y==1 | runif(n) < S,] ## sampling[#采样]
## Create offset based on total sampling fraction[#创建偏移根据总抽样比]
dat$s <- rep(log(nrow(dat)/n),nrow(dat))
lr.fit <- gam(y~s(x0,bs=\ offset(s),family=binomial,data=dat,method=\
## plot model components with truth overlaid in red[#图模型组件覆盖在红色与真理] op <- par(mfrow=c(2,2))
fn <- c(\for (k in 1:4) {
plot(lr.fit,select=k,scale=0)
ff <- dat[[fn[k]]];xx <- dat[[xn[k]]]
ind <- sort.int(xx,index.return=TRUE)$ix lines(xx[ind],(ff-mean(ff))[ind]*.33,col=2) }
par(op) rm(dat)
出自 生物统计家园网(http://www.biostatistic.net)。