(2)变量个数p=1, local linear regression
min
(3)Lowess (Local Weighted scatterplot smoothing) p=1:
min
【还有个加权修正的过程,这里略,详见原书或者PPT】 (4)Loess (Local regression) p=1,2:
min
【还有个加权修正的过程,这里略,详见原书或者PPT】 (5)Friedman supersmoother
symmetric k-NN, using local linear fit,
varying span, which is determined by local CV, not robust to outliers, fast to compute supsmu( ) in R 三、模型选择
需要选择的内容:(1)窗宽the span;(2)多项式的度the degree of polynomial for the local regression models;(3)权重函数the weight functions。 【其他略】 四、R语言部分
library(foreign) library(SemiPar) library(mgcv) jacob <- read.table(\############################################################################### #第一部分,简单的光滑估计 #1、Kernel Density Estimation #Illustration of Kernel Concepts #Defining the Window Width attach(jacob) x0 <- sort(perotvote)[75] diffs <- abs(perotvote - x0) which.diff <- sort(diffs)[120] #Applying the Tricube Weight #...Tricube function tricube <- function(z) { ifelse (abs(z) < 1, (1 - (abs(z))^3)^3, 0) } #... a <- seq(0,1, by=.1) tricube(a) #Figure 2.5 plot(range(perotvote), c(0,1), xlab=\abline(v=c(x0-which.diff, x0+which.diff), lty=2) abline(v=x0) xwts <- seq(x0-which.diff, x0+which.diff, len=250) lines(xwts, tricube((xwts-x0)/which.diff), lty=1, lwd=1) points(x.n, tricube((x.n - x0)/which.diff), cex=1) ########################################################################### #2、Kernel Smoothing ########################################################################### Figure 2.6 par(mfrow=c(3,1)) plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, bandwidth=\plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, kernel=\plot(perotvote, chal.vote, pch=\xlab=\main=\lines(ksmooth(perotvote, chal.vote, bandwidth=\#******* Kernel smoothing中选取box和normal核函数的比较,带宽相等 plot(perotvote, chal.vote, pch=\Share (%)\lines(ksmooth(perotvote, chal.vote, kernel=\lines(ksmooth(perotvote, chal.vote, kernel=\################################################################################## #第二部分,LPR模型 #Data Prep For Local Average Regression Step-by-Step cong <- as.data.frame(jacob[,2:3]) cong <- cong[order(cong$perotvote),1:2] y <- as.matrix(cong$chal.vote) x <- as.matrix(cong$perotvote) n <- length(y) #... tricube <- function(z) { ifelse (abs(z) < 1, (1 - (abs(z))^3)^3, 0) } #... x0 <- x[75] diffs <- abs(x - x0) which.diff <- sort(diffs)[120] x.n <- x[diffs<= which.diff] y.n <- y[diffs <= which.diff] weigh=tricube((x.n-x0)/which.diff) mod <- lm(y.n ~ x.n, weights=weigh) #Figure 2.7 plot(x, y, type=\bty=\abline(v=c(x0 - which.diff, x0 + which.diff), lty = 2) abline(v=x0) points(x[diffs > which.diff], y[diffs > which.diff], pch=16, cex=1, col=gray(.80)) points(x[diffs <= which.diff], y[diffs <= which.diff], cex=.85) abline(mod, lwd=2, col=1) text(27.5, 50, expression(paste(\x[0]))) #这里expression的用法比较有意思 arrows(25, 47, 15, 37, code =2, length = .10) ################################################################################# #2、Now Putting It Together For Local Regression Demonstration. #OLS Fit for Comparison ols <- lm(chal.vote ~ perotvote, data=jacob) #The loess fit model.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5) #*** 默认设置 degree=2,family=gauss, tricube加权 *** n <- length(chal.vote) x.loess <- seq(min(perotvote), max(perotvote), length=n) y.loess <- predict(model.loess, data.frame(perotvote=x.loess)) #得到预测值便于比较 #The lowess fit model.lowess <- lowess(chal.vote ~ perotvote, data=jacob, f = 0.5) #*** 默认设置 robust linear tricube加权 *** n <- length(chal.vote) x.lowess <- seq(min(perotvote), max(perotvote), length=n) y.lowess <- predict(model.lowess, data.frame(perotvote=x.lowess)) #得到预测值便于比较 #Figure 2.8 plot(perotvote, chal.vote, pch=\ylab=\lines(x.loess, y.loess) lines(x.lowess, y.lowess) abline(ols) legend(15,20, c(\################################################################################# #3、lowess中不同robust的比较 m1.lowess <- lowess(perotvote, chal.vote, f = 0.5, iter=0) #*** 没有进行第二步的robust加权估计 *** m2.lowess <- lowess(perotvote, chal.vote, f = 0.5) #*** 默认 iter=3,要进行3次robust加权估计 *** m0.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5, degree=1, family=\iterations=1) #** no robust m1.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5, degree=1) #*** 没有进行第二步的robust加权估计 *** m2.loess <- loess(chal.vote ~ perotvote, data=jacob, span = 0.5, degree=1, family=\#*** 进行3次robust加权估计 *** plot(perotvote, chal.vote, pch=\(%)\lines(m1.lowess) lines(sort(perotvote), m1.loess$fit[order(perotvote)], lty=3, col=\lines(sort(perotvote), m0.loess$fit[order(perotvote)], lty=9,col=18) lines(m2.lowess, lty=2, col=\lines(sort(perotvote), m2.loess$fit[order(perotvote)], lty=4, col=\ ---------------------------------------------------------------------------- 第四章 样条估计spline 一、基本思想
按照x将样本分成多个区间,对每个区间分别进行估计。不同于核估计,这里不用移动计算,从而减小了计算量。 二、最简单的形式
Linear Spline with k knots:
其中,,
三、其他样条模型 1、p次样条估计
——二次样条Quadratic Spline (basis functions with k knots)
——三次样条Cubic Spline (with k knots, use quadratic basis functions)
——p-order spline (with k knots)
2、B-splines (with k knots cubic B-spline basis)
其中,
3、Natural Splines
以上估计方法对结点(knots)之间的估计比较准确,但对边界的拟合效果较差。自然样条的思想是,在自变量最小值和最大值处各增加一个结点,用线性模型拟合边界位置的样本点。
4、k的选择和模型比较 采用AIC准则
四、光滑样条smoothing spline 基于如果目标得到参数估计值
min
五、模型比较的F检验
六、R语言部分
library(foreign) jacob <- read.dta(\attach(jacob)