用R语言做非参数和半参数回归笔记(2)

2019-03-15 21:09

（2）变量个数p=1, local linear regression

min

（3）Lowess (Local Weighted scatterplot smoothing) p=1:

min

【还有个加权修正的过程，这里略，详见原书或者PPT】（4）Loess (Local regression) p=1,2:

min

【还有个加权修正的过程，这里略，详见原书或者PPT】（5）Friedman supersmoother

symmetric k-NN, using local linear fit,

varying span, which is determined by local CV, not robust to outliers, fast to compute supsmu( ) in R 三、模型选择

需要选择的内容：（1）窗宽the span；（2）多项式的度the degree of polynomial for the local regression models；（3）权重函数the weight functions。【其他略】四、R语言部分

library(foreign) library(SemiPar) library(mgcv) jacob <- read.table(\############################################################################### #第一部分，简单的光滑估计 #1、Kernel Density Estimation #Illustration of Kernel Concepts #Defining the Window Width attach(jacob) x0 <- sort(perotvote)[75] diffs <- abs(perotvote - x0) which.diff <- sort(diffs)[120] #Applying the Tricube Weight #...Tricube function tricube <- function(z) { ifelse (abs(z) < 1, (1 - (abs(z))^3)^3, 0) } #... a <- seq(0,1, by=.1) tricube(a) #Figure 2.5 plot(range(perotvote), c(0,1), xlab=\abline(v=c(x0-which.diff, x0+which.diff), lty=2) abline(v=x0) xwts <- seq(x0-which.diff, x0+which.diff, len=250) lines(xwts, tricube((xwts-x0)/which.diff), lty=1, lwd=1) points(x.n, tricube((x.n - x0)/which.diff), cex=1) ########################################################################### #2、Kernel Smoothing ########################################################################### Figure 2.6 par(mfrow=c(3,1)) plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, bandwidth=\plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, kernel=\plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, bandwidth=\#******* Kernel smoothing中选取box和normal核函数的比较，带宽相等 plot(perotvote, chal.vote, pch=\Share (%)\lines(ksmooth(perotvote, chal.vote, kernel=\lines(ksmooth(perotvote, chal.vote, kernel=\################################################################################## #第二部分，LPR模型 #Data Prep For Local Average Regression Step-by-Step cong <- as.data.frame(jacob[,2:3]) cong <- cong[order(cong$perotvote),1:2] y <- as.matrix(cong$chal.vote) x <- as.matrix(cong$perotvote) n <- length(y) #... tricube <- function(z) { ifelse (abs(z) < 1, (1 - (abs(z))^3)^3, 0) } #... x0 <- x[75] diffs <- abs(x - x0) which.diff <- sort(diffs)[120] x.n <- x[diffs<= which.diff] y.n <- y[diffs <= which.diff] weigh=tricube((x.n-x0)/which.diff) mod <- lm(y.n ~ x.n, weights=weigh) #Figure 2.7 plot(x, y, type=\bty=\abline(v=c(x0 - which.diff, x0 + which.diff), lty = 2) abline(v=x0) points(x[diffs > which.diff], y[diffs > which.diff], pch=16, cex=1, col=gray(.80)) points(x[diffs <= which.diff], y[diffs <= which.diff], cex=.85) abline(mod, lwd=2, col=1) text(27.5, 50, expression(paste(\x[0]))) #这里expression的用法比较有意思 arrows(25, 47, 15, 37, code =2, length = .10) ################################################################################# #2、Now Putting It Together For Local Regression Demonstration. #OLS Fit for Comparison ols <- lm(chal.vote ~ perotvote, data=jacob) #The loess fit model.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5) #*** 默认设置 degree=2，family=gauss, tricube加权 *** n <- length(chal.vote) x.loess <- seq(min(perotvote), max(perotvote), length=n) y.loess <- predict(model.loess, data.frame(perotvote=x.loess)) #得到预测值便于比较 #The lowess fit model.lowess <- lowess(chal.vote ~ perotvote, data=jacob, f = 0.5) #*** 默认设置 robust linear tricube加权 *** n <- length(chal.vote) x.lowess <- seq(min(perotvote), max(perotvote), length=n) y.lowess <- predict(model.lowess, data.frame(perotvote=x.lowess)) #得到预测值便于比较 #Figure 2.8 plot(perotvote, chal.vote, pch=\ylab=\lines(x.loess, y.loess) lines(x.lowess, y.lowess) abline(ols) legend(15,20, c(\################################################################################# #3、lowess中不同robust的比较 m1.lowess <- lowess(perotvote, chal.vote, f = 0.5, iter=0) #*** 没有进行第二步的robust加权估计 *** m2.lowess <- lowess(perotvote, chal.vote, f = 0.5) #*** 默认 iter=3，要进行3次robust加权估计 *** m0.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5, degree=1, family=\iterations=1) #** no robust m1.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5, degree=1) #*** 没有进行第二步的robust加权估计 *** m2.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5, degree=1, family=\#*** 进行3次robust加权估计 *** plot(perotvote, chal.vote, pch=\(%)\lines(m1.lowess) lines(sort(perotvote), m1.loess$fit[order(perotvote)], lty=3, col=\lines(sort(perotvote), m0.loess$fit[order(perotvote)], lty=9,col=18) lines(m2.lowess, lty=2, col=\lines(sort(perotvote), m2.loess$fit[order(perotvote)], lty=4, col=\ ---------------------------------------------------------------------------- 第四章样条估计spline 一、基本思想

按照x将样本分成多个区间，对每个区间分别进行估计。不同于核估计，这里不用移动计算，从而减小了计算量。二、最简单的形式

Linear Spline with k knots:

其中，，

三、其他样条模型 1、p次样条估计

——二次样条Quadratic Spline (basis functions with k knots)

——三次样条Cubic Spline (with k knots, use quadratic basis functions)

——p-order spline (with k knots)

2、B-splines (with k knots cubic B-spline basis)

其中，

3、Natural Splines

以上估计方法对结点（knots）之间的估计比较准确，但对边界的拟合效果较差。自然样条的思想是，在自变量最小值和最大值处各增加一个结点，用线性模型拟合边界位置的样本点。

4、k的选择和模型比较采用AIC准则

四、光滑样条smoothing spline 基于如果目标得到参数估计值

min

五、模型比较的F检验

六、R语言部分

library(foreign) jacob <- read.dta(\attach(jacob)

共4页:

用R语言做非参数和半参数回归笔记(2).doc 将本文的Word文档下载到电脑下载失败或者文档不完整，请联系客服人员解决！

下载这篇word文档