例:library(MASS)
fit1 <- lm(Murder ~ Population + Illiteracy + Income + Frost, data = states)
stepAIC(fit, direction = \2、regsubsets():leaps包:全子集回归 例:library(leaps)
leaps <- regsubsets(Murder ~ Population + Illiteracy + Income + Frost, data = states, nbest = 4)
plot(leaps, scale = \交叉验证
1、crossval() 函 数:bootstrap 包 :实 现 k 重 交 叉 验 证 例:shrinkage <- function(fit, k = 10) { require(bootstrap) # define functions
theta.fit <- function(x, y) { lsfit(x, y) }
theta.predict <- function(fit, x) { cbind(1, x) %*% fit$coef }
# matrix of predictors
x <- fit$model[, 2:ncol(fit$model)] # vector of predicted values y <- fit$model[, 1]
results <- crossval(x, y, theta.fit, theta.predict, ngroup = k) r2 <- cor(y, fit$fitted.values)^2 r2cv <- cor(y, results$cv.fit)^2 cat(\
cat(k, \ cat(\}
2、shrinkage():交叉验证 ;R平方减少得越少,预测则越精确。 例:fit <- lm(Murder ~ Population + Income + Illiteracy + Frost, data = states)
shrinkage(fit) 相对重要性
1、scale():将数据标准化为均值为0、标准差为1的数据集,这样用R回归即可获得标准化的回归系数。注意, scale()函数返回的是一个矩阵,而lm()函数要求一个数据框 例:zstates <- as.data.frame(scale(states))
zfit <- lm(Murder ~ Population + Income + Illiteracy + Frost, data = zstates)
coef(zfit)
2、relweights() :相对权重
例:relweights <- function(fit, ...) { R <- cor(fit$model) nvar <- ncol(R)
rxx <- R[2:nvar, 2:nvar] rxy <- R[2:nvar, 1] svd <- eigen(rxx) evec <- svd$vectors ev <- svd$values
delta <- diag(sqrt(ev))
# correlations between original predictors and new orthogonal variables lambda <- evec %*% delta %*% t(evec) lambdasq <- lambda^2
# regression coefficients of Y on orthogonal variables beta <- solve(lambda) %*% rxy rsquare <- colSums(beta^2)
rawwgt <- lambdasq %*% beta^2 import <- (rawwgt/rsquare) * 100 lbls <- names(fit$model[2:nvar]) rownames(import) <- lbls
colnames(import) <- \
# plot results
barplot(t(import), names.arg = lbls, ylab = \
xlab = \ sub = paste(\ ...)
return(import) }
# using relweights()
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data = states)
relweights(fit, col = \
方差分析
1、aov() =lm() 单因素方差分析
2、plotmeans():绘制带置信区间的图形 例:library(multcomp)
attach(cholesterol) table(trt)
aggregate(response, by = list(trt), FUN = mean) aggregate(response, by = list(trt), FUN = sd) fit <- aov(response ~ trt) summary(fit) library(gplots)
plotmeans(response ~ trt, xlab = \ main = \detach(cholesterol) 多重比较
1、TukeyHSD():对各组均值差异的成对检验 例:TukeyHSD(fit)
par(las = 2)
par(mar = c(5, 8, 4, 2)) plot(TukeyHSD(fit)) par(opar)
2、glht():multcomp包:多重均值比较 例:library(multcomp)
par(mar = c(5, 4, 6, 2))
tuk <- glht(fit, linfct = mcp(trt = \plot(cld(tuk, level = 0.05), col = \par(opar)
评估检验的假设条件
1、正态检验:library(car) qqPlot(lm(response ~ trt, data = cholesterol), simulate = TRUE, main = \
2、方差齐性检验:bartlett.test(response ~ trt, data = cholesterol) 3、检测离群点:outlierTest() car包 library(car)
outlierTest(fit) 单因素协方差分析
例:data(litter, package = \attach(litter) table(dose)
aggregate(weight, by = list(dose), FUN = mean)
fit <- aov(weight ~ gesttime + dose) summary(fit)
1、effects() :effects包 :计算调整的均值 例: library(effects) effect(\
2、ancova() :HH包 :绘制因变量、协变量和因子之间的关系图 例:library(HH)
ancova(weight ~ gesttime + dose, data = litter)
3、interaction.plot() :函数来展示双因素方差分析的交互效应 例:interaction.plot(dose, supp, len, type = \ \
main = \4、plotmeans():gplots包 :展示交互效应 例:library(gplots)
plotmeans(len ~ interaction(supp, dose, sep = \ connect = list(c(1, 3, 5), c(2, 4, 6)), col = c(\
main = \
xlab = \5、interaction2wt():HH包 :可视化结果 例:library(HH)
interaction2wt(len ~ supp * dose) 6、colMeans():计算每列的平均值
7、nrow()/ncol :计算数组额行数和列数
8、mahalanobis():用协方差来计算两点之间距离的方法 稳健多元方差分析
Wilks.test() :稳 健 单 因 素 MANOVA