#整理されなくてごめんよ→安部くん #散布図+ヒストグラム(Airquality) hist(airquality$Wind, breaks=seq(0,25,2.5)) layout2 <- function (n) { oldpar <- par(no.readonly = TRUE); on.exit(par(oldpar)) xhist <- hist(airquality$Wind, breaks=seq(0,25,2.5), plot=FALSE) yhist <- hist(airquality$Ozone, breaks=seq(0,175,17.5), plot=FALSE) top <- max(c(xhist$counts, yhist$counts)) xrange <- c(0,25) yrange <- c(0,175) nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE) par(mar=c(3,3,1,1)) #番号1の副画面の余白指定 plot(airquality$Wind, airquality$Ozone, xlim=xrange, ylim=yrange, xlab="", ylab="") # 番号1の副画面にプロット par(mar=c(0,3,1,1)) #番号2の副画面の余白指定 barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0) # 番号2の副画面にプロット par(mar=c(3,0,1,1)) #番号3の副画面の余白指定 barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE) # 番号3の副画面にプロット } layout2() #散布図原因と結果の重要性 plot(airquality$Wind, airquality$Ozone, xlab = "Wind", ylab = "Ozone") result <- lm(airquality$Ozone~airquality$Wind) #普通に回帰させた場合 abline(result) result2 <- lm(airquality$Wind~airquality$Ozone) #逆に回帰させた場合 abline(-result2$coef[1]/result2$coef[2], 1/result2$coef[2], lty=2) #上の線の図示(点線) #補足説明(どの方向で最小化するか) lin1 <- function(x){y = result$coef[2]*x+result$coef[1]} lin3 <- function(y){x = result2$coef[2]*y+result2$coef[1]} for(i in 1:5){ arrows(airquality$Wind[i*3], airquality$Ozone[i*3], airquality$Wind[i*3], lin1(airquality$Wind[i*3]) , length=0.1, col="purple", lwd=3) } for(i in 8:13){ arrows(airquality$Wind[i*2], airquality$Ozone[i*2], lin3(airquality$Ozone[i*2]), airquality$Ozone[i*2] , length=0.1, col="green", lwd=3) } #標準化主成分回帰(SMA)を行なう関数を自作 SMA <- function(x, y) # 2 つの変数 { dt <- data.frame(x, y) dt2 <- na.omit(dt) x1 <- dt2$x y1 <- dt2$y slope <- sign(cor(x1, y1))*sqrt(var(y1)/var(x1)) # 傾きを求める intercept <- mean(y1)-slope*mean(x1) # 切片を求める return(list(Slope=slope, Intercept=intercept)) } result3 <- SMA(airquality$Wind, airquality$Ozone) #主成分回帰(SMA)の計算 result3 #結果の表示 abline(res3$Intercept, res3$Slope, col="red") #SMA補足 plot(airquality$Wind, airquality$Ozone, xlab = "Wind", ylab = "Ozone") abline(res3$Intercept, res3$Slope, col="red") lin3 <- function(x){y = res3$Slope*x+res3$Intercept} lin4 <- function(y){x = 1/res3$Slope*y-res3$Intercept/res3$Slope} for(i in 1:5){ arrows(airquality$Wind[i*3], airquality$Ozone[i*3], airquality$Wind[i*3], lin3(airquality$Wind[i*3]) , length=0.1, col="orange", lwd=3) } for(i in 1:5){ arrows(airquality$Wind[i*3], airquality$Ozone[i*3], lin4(airquality$Ozone[i*3]), airquality$Ozone[i*3] , length=0.1, col="orange", lwd=3) } #パッケージを使って求める方法(SMA(RMA)) library(smatr) line.cis(airquality$Ozone, airquality$Wind) res <- line.cis(airquality$Ozone, airquality$Wind) abline(res$coef[1], res$coef[2], col="red") slope.test(airquality$Ozone, airquality$Wind, test.value = -5.55) #重回帰に関する項目(3次元プロット) library(rgl) plot3d(airquality$Wind, airquality$Temp, airquality$Ozone, size=3, col="green") #回帰平面のプロット res <- lm(airquality$Ozone~airquality$Wind+airquality$Temp) f <- function(x,y) {res$coef[1]+res$coef[2]*x+res$coef[3]*y} x <- seq(0, 30, length= 30) y <- seq(40, 100, length= 30) z <- outer(x, y, f) persp3d(x, y, z, aspect=c(1, 1, 1), col = "lightblue", add = F, alpha=0.5) #回帰平面のプロット2 persp(x, y, z, theta = 30, phi = 30, expand = 0.5) res <- persp(x, y, z, theta = 30, phi = 30, expand = 0.5) points(trans3d(airquality$Wind, airquality$Temp, airquality$Ozone, pmat = res), col ="green", pch =16) #説明変数の相関が高い場合(というか二つとも同じ) points3d(airquality$Wind, airquality$Wind, airquality$Ozone, size=3, col="green")