library(readr)
library(car)
source("VIF.R")
source("ShowSubsets.R")
source("HistNormal.R")
library(corrplot)
library(leaps)
library(tidyverse)
library(olsrr)
Ames.Train <- read_csv("AmesTrain16.csv")
Ames.Test <- read_csv("AmesTest16.csv")
#PART 1:
head(Ames.Train)
Ames.Train.nv <- select(Ames.Train, c(2,3,4,7,8,9,10,17,18,19,23,24,25,26,27,28,29,30,32,33,35,36,39,40,41,42))
head(Ames.Train.nv)
Ames.Train.nv.corr <- cor(Ames.Train.nv)
corrplot(Ames.Train.nv.corr, type="upper")
mod1 <- lm(Price~., data = Ames.Train.nv)
summary(mod1)
##
## Call:
## lm(formula = Price ~ ., data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.788 -16.602 -0.395 13.693 185.004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.282e+03 1.844e+02 -6.954 9.7e-12 ***
## LotFrontage 1.267e-01 3.874e-02 3.271 0.00114 **
## LotArea 8.973e-04 1.683e-04 5.332 1.4e-07 ***
## Quality 1.721e+01 1.730e+00 9.947 < 2e-16 ***
## Condition 2.775e+00 1.388e+00 2.000 0.04602 *
## YearBuilt 3.458e-01 8.204e-02 4.215 2.9e-05 ***
## YearRemodel 2.532e-01 9.103e-02 2.782 0.00558 **
## BasementFinSF 2.473e-02 8.212e-03 3.012 0.00271 **
## BasementUnFinSF -1.224e-03 8.326e-03 -0.147 0.88317
## BasementSF 2.792e-02 9.938e-03 2.810 0.00512 **
## FirstSF 9.300e-03 3.413e-02 0.272 0.78535
## SecondSF -2.105e-03 3.362e-02 -0.063 0.95011
## GroundSF 4.762e-02 3.410e-02 1.397 0.16306
## BasementFBath 1.170e+00 3.467e+00 0.338 0.73583
## BasementHBath -2.310e+00 5.207e+00 -0.444 0.65746
## FullBath -4.311e+00 3.736e+00 -1.154 0.24891
## HalfBath 4.539e+00 3.764e+00 1.206 0.22832
## Bedroom -5.697e+00 2.369e+00 -2.405 0.01651 *
## TotalRooms 3.562e+00 1.619e+00 2.200 0.02820 *
## Fireplaces 4.411e+00 2.327e+00 1.896 0.05845 .
## GarageCars -5.027e-01 3.830e+00 -0.131 0.89561
## GarageSF 3.131e-02 1.326e-02 2.362 0.01852 *
## WoodDeckSF 1.208e-02 1.020e-02 1.185 0.23665
## OpenPorchSF 8.583e-03 2.025e-02 0.424 0.67178
## EnclosedPorchSF 2.758e-02 1.883e-02 1.465 0.14347
## ScreenPorchSF 4.190e-02 2.442e-02 1.716 0.08669 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.45 on 574 degrees of freedom
## Multiple R-squared: 0.8656, Adjusted R-squared: 0.8598
## F-statistic: 147.9 on 25 and 574 DF, p-value: < 2.2e-16
plot(mod1)
vif(mod1)
## LotFrontage LotArea Quality Condition YearBuilt
## 1.124055 1.211997 3.738469 1.500289 4.269855
## YearRemodel BasementFinSF BasementUnFinSF BasementSF FirstSF
## 2.372390 8.867104 8.955856 10.640480 97.844513
## SecondSF GroundSF BasementFBath BasementHBath FullBath
## 146.265317 181.674196 2.254359 1.194088 2.851072
## HalfBath Bedroom TotalRooms Fireplaces GarageCars
## 2.397305 2.580379 4.068803 1.590703 5.264011
## GarageSF WoodDeckSF OpenPorchSF EnclosedPorchSF ScreenPorchSF
## 5.181973 1.166073 1.246137 1.272542 1.102638
all.predictors <- regsubsets(Price~., data = Ames.Train.nv, nbest = 2, nvmax = 25)
ShowSubsets(all.predictors)
Since the model with the #1 best model with 15 predictor variables has the highest adjusted R-squared and the lowest Cp, that is the best model.
mod2 <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)
summary(mod2)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF, data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.845 -16.864 -0.772 14.072 185.146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.309e+03 1.706e+02 -7.673 7.10e-14 ***
## LotFrontage 1.300e-01 3.790e-02 3.430 0.000646 ***
## LotArea 9.186e-04 1.656e-04 5.546 4.42e-08 ***
## Quality 1.706e+01 1.674e+00 10.187 < 2e-16 ***
## Condition 2.630e+00 1.363e+00 1.930 0.054091 .
## YearBuilt 3.512e-01 7.247e-02 4.846 1.62e-06 ***
## YearRemodel 2.642e-01 8.957e-02 2.950 0.003304 **
## BasementFinSF 2.640e-02 3.393e-03 7.782 3.26e-14 ***
## BasementSF 3.197e-02 4.298e-03 7.438 3.68e-13 ***
## GroundSF 5.233e-02 5.677e-03 9.219 < 2e-16 ***
## FullBath -6.531e+00 3.367e+00 -1.940 0.052869 .
## Bedroom -6.459e+00 2.317e+00 -2.787 0.005492 **
## TotalRooms 3.732e+00 1.594e+00 2.341 0.019565 *
## Fireplaces 4.941e+00 2.270e+00 2.177 0.029918 *
## GarageSF 3.171e-02 7.905e-03 4.011 6.82e-05 ***
## ScreenPorchSF 3.899e-02 2.375e-02 1.641 0.101239
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.36 on 584 degrees of freedom
## Multiple R-squared: 0.8641, Adjusted R-squared: 0.8606
## F-statistic: 247.5 on 15 and 584 DF, p-value: < 2.2e-16
plot(mod2)
Full <- lm(Price~., data = Ames.Train.nv)
MSE = (summary(Full)$sigma)^2
step(Full, scale=MSE, trace = "false")
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF, data = Ames.Train.nv)
##
## Coefficients:
## (Intercept) LotFrontage LotArea Quality Condition
## -1.309e+03 1.300e-01 9.186e-04 1.706e+01 2.630e+00
## YearBuilt YearRemodel BasementFinSF BasementSF GroundSF
## 3.512e-01 2.642e-01 2.640e-02 3.197e-02 5.233e-02
## FullBath Bedroom TotalRooms Fireplaces GarageSF
## -6.531e+00 -6.459e+00 3.732e+00 4.941e+00 3.171e-02
## ScreenPorchSF
## 3.899e-02
The model produced is the same as the model we have for mod2
None <- lm(Price~1, data = Ames.Train.nv)
step(None, scope=list(upper=Full), scale=MSE, direction="forward",trace=FALSE)
##
## Call:
## lm(formula = Price ~ Quality + FirstSF + SecondSF + BasementFinSF +
## LotArea + YearBuilt + GarageSF + BasementSF + YearRemodel +
## LotFrontage + Fireplaces + HalfBath + Condition + Bedroom +
## TotalRooms + ScreenPorchSF + EnclosedPorchSF, data = Ames.Train.nv)
##
## Coefficients:
## (Intercept) Quality FirstSF SecondSF
## -1.226e+03 1.708e+01 5.491e-02 4.224e-02
## BasementFinSF LotArea YearBuilt GarageSF
## 2.706e-02 8.876e-04 3.100e-01 3.059e-02
## BasementSF YearRemodel LotFrontage Fireplaces
## 2.747e-02 2.589e-01 1.255e-01 4.608e+00
## HalfBath Condition Bedroom TotalRooms
## 6.323e+00 2.917e+00 -5.895e+00 3.575e+00
## ScreenPorchSF EnclosedPorchSF
## 4.023e-02 2.849e-02
Here we get a different model than what we got for the 2 previous methods
mod3 <- lm(Price ~ Quality + FirstSF + SecondSF + BasementFinSF +
LotArea + YearBuilt + GarageSF + BasementSF + YearRemodel +
LotFrontage + Fireplaces + HalfBath + Condition + Bedroom +
TotalRooms + ScreenPorchSF + EnclosedPorchSF, data = Ames.Train.nv)
summary(mod3)
##
## Call:
## lm(formula = Price ~ Quality + FirstSF + SecondSF + BasementFinSF +
## LotArea + YearBuilt + GarageSF + BasementSF + YearRemodel +
## LotFrontage + Fireplaces + HalfBath + Condition + Bedroom +
## TotalRooms + ScreenPorchSF + EnclosedPorchSF, data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -92.646 -16.832 -0.493 14.794 180.793
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.226e+03 1.664e+02 -7.367 6.00e-13 ***
## Quality 1.708e+01 1.699e+00 10.053 < 2e-16 ***
## FirstSF 5.491e-02 8.035e-03 6.833 2.10e-11 ***
## SecondSF 4.224e-02 6.072e-03 6.957 9.37e-12 ***
## BasementFinSF 2.706e-02 3.391e-03 7.980 7.81e-15 ***
## LotArea 8.876e-04 1.665e-04 5.332 1.39e-07 ***
## YearBuilt 3.100e-01 7.599e-02 4.080 5.14e-05 ***
## GarageSF 3.059e-02 8.023e-03 3.812 0.000152 ***
## BasementSF 2.747e-02 6.020e-03 4.563 6.16e-06 ***
## YearRemodel 2.589e-01 8.948e-02 2.893 0.003957 **
## LotFrontage 1.255e-01 3.840e-02 3.269 0.001145 **
## Fireplaces 4.608e+00 2.309e+00 1.996 0.046410 *
## HalfBath 6.323e+00 3.453e+00 1.831 0.067584 .
## Condition 2.917e+00 1.361e+00 2.143 0.032512 *
## Bedroom -5.895e+00 2.338e+00 -2.521 0.011955 *
## TotalRooms 3.575e+00 1.599e+00 2.236 0.025716 *
## ScreenPorchSF 4.023e-02 2.396e-02 1.679 0.093635 .
## EnclosedPorchSF 2.849e-02 1.870e-02 1.524 0.128086
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.39 on 582 degrees of freedom
## Multiple R-squared: 0.8643, Adjusted R-squared: 0.8603
## F-statistic: 218 on 17 and 582 DF, p-value: < 2.2e-16
plot(mod3)
vif(mod2)
## LotFrontage LotArea Quality Condition YearBuilt
## 1.082105 1.181011 3.521544 1.455091 3.351422
## YearRemodel BasementFinSF BasementSF GroundSF FullBath
## 2.310357 1.522870 2.002441 5.066200 2.329781
## Bedroom TotalRooms Fireplaces GarageSF ScreenPorchSF
## 2.483329 3.966868 1.523651 1.854051 1.049800
vif(mod3)
## Quality FirstSF SecondSF BasementFinSF LotArea
## 3.618069 5.444095 4.788303 1.518124 1.190687
## YearBuilt GarageSF BasementSF YearRemodel LotFrontage
## 3.677444 1.905684 3.920093 2.301364 1.108694
## Fireplaces HalfBath Condition Bedroom TotalRooms
## 1.572568 2.025182 1.448649 2.522101 3.981090
## ScreenPorchSF EnclosedPorchSF
## 1.065707 1.259741
ols_mallows_cp(mod2, mod1)
## [1] 12.49027
ols_mallows_cp(mod3, mod1)
## [1] 15.71616
#PART 2:
plot(mod2)
head(sort(abs(mod2$residuals), decreasing = TRUE), n=10)
## 343 222 109 319 78 380 292 382
## 185.14553 159.15927 130.33355 112.68010 111.43223 96.84544 94.61528 87.73272
## 588 351
## 87.51145 87.35665
head(mod2$residuals[c(343,222,109,319,78,380,292,382,588,351)])
## 343 222 109 319 78 380
## 185.14553 159.15927 130.33355 112.68010 111.43223 -96.84544
rstandard(mod2)[c(343,222,109,319,78,380,292,382,588,351)]
## 343 222 109 319 78 380 292 382
## 6.560243 5.814848 4.476083 3.881726 3.848407 -3.381801 -3.292753 3.032637
## 588 351
## -3.023010 3.041522
rstudent(mod2)[c(343,222,109,319,78,380,292,382,588,351)]
## 343 222 109 319 78 380 292 382
## 6.810364 5.985732 4.550996 3.929424 3.894814 -3.412483 -3.320904 3.054184
## 588 351
## -3.044334 3.063275
2*(16/600)
## [1] 0.05333333
3*(16/600)
## [1] 0.08
hatvalues(mod2)[c(343,222,109,319,78,380,292,382,588,351)]
## 343 222 109 319 78 380 292
## 0.07603531 0.13092764 0.01647594 0.02250739 0.02741283 0.04867035 0.04220408
## 382 588 351
## 0.02915258 0.02788100 0.04307275
head(sort(cooks.distance(mod2)[c(343,222,109,319,78,380,292,382,588,351)]), decreasing = TRUE, n = 10)
## 588 382 109 319 351 78 292
## 0.01638129 0.01726025 0.02097690 0.02168410 0.02602469 0.02608959 0.02985931
## 380 343 222
## 0.03656870 0.22135015 0.31836999
plot(mod2, 5)
plot(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)
plot(mod2)
mod4 <- lm(Price~log(LotFrontage+1)+log(LotArea+1)+log(Quality+1)+log(Condition+1)+log(YearBuilt)+log(YearRemodel)+log(BasementFinSF+1)+log(BasementSF+1)+log(GroundSF+1)+log(FullBath+1)+log(Bedroom+1)+log(TotalRooms+1)+log(Fireplaces+1)+log(GarageSF+1)+log(ScreenPorchSF+1), data = Ames.Train.nv)
mod5 <- lm(log(Price)~log(LotFrontage+1)+log(LotArea+1)+log(Quality+1)+log(Condition+1)+log(YearBuilt)+log(YearRemodel)+log(BasementFinSF+1)+log(BasementSF+1)+log(GroundSF+1)+log(FullBath+1)+log(Bedroom+1)+log(TotalRooms+1)+log(Fireplaces+1)+log(GarageSF+1)+log(ScreenPorchSF+1), data = Ames.Train.nv)
mod6 <- lm(log(Price)~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)
plot(mod4)
plot(mod5)
plot(mod6)
summary(mod4)
##
## Call:
## lm(formula = Price ~ log(LotFrontage + 1) + log(LotArea + 1) +
## log(Quality + 1) + log(Condition + 1) + log(YearBuilt) +
## log(YearRemodel) + log(BasementFinSF + 1) + log(BasementSF +
## 1) + log(GroundSF + 1) + log(FullBath + 1) + log(Bedroom +
## 1) + log(TotalRooms + 1) + log(Fireplaces + 1) + log(GarageSF +
## 1) + log(ScreenPorchSF + 1), data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -102.407 -21.287 -4.512 16.746 251.717
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.296e+04 1.631e+03 -7.945 1.00e-14 ***
## log(LotFrontage + 1) 2.869e+00 9.385e-01 3.057 0.00234 **
## log(LotArea + 1) 2.921e+01 3.309e+00 8.828 < 2e-16 ***
## log(Quality + 1) 1.395e+02 1.421e+01 9.817 < 2e-16 ***
## log(Condition + 1) 5.337e+00 1.142e+01 0.467 0.64042
## log(YearBuilt) 8.726e+02 1.764e+02 4.947 9.87e-07 ***
## log(YearRemodel) 7.122e+02 2.163e+02 3.293 0.00105 **
## log(BasementFinSF + 1) 2.978e+00 5.779e-01 5.152 3.52e-07 ***
## log(BasementSF + 1) 2.471e+00 1.465e+00 1.687 0.09214 .
## log(GroundSF + 1) 6.898e+01 1.076e+01 6.411 2.99e-10 ***
## log(FullBath + 1) -3.446e+00 1.073e+01 -0.321 0.74818
## log(Bedroom + 1) -4.580e+01 9.632e+00 -4.755 2.51e-06 ***
## log(TotalRooms + 1) 4.519e+01 1.495e+01 3.022 0.00262 **
## log(Fireplaces + 1) 1.506e+01 4.647e+00 3.242 0.00126 **
## log(GarageSF + 1) -6.569e-01 1.414e+00 -0.465 0.64231
## log(ScreenPorchSF + 1) 7.647e-01 1.039e+00 0.736 0.46187
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36.5 on 584 degrees of freedom
## Multiple R-squared: 0.7899, Adjusted R-squared: 0.7845
## F-statistic: 146.4 on 15 and 584 DF, p-value: < 2.2e-16
summary(mod5)
##
## Call:
## lm(formula = log(Price) ~ log(LotFrontage + 1) + log(LotArea +
## 1) + log(Quality + 1) + log(Condition + 1) + log(YearBuilt) +
## log(YearRemodel) + log(BasementFinSF + 1) + log(BasementSF +
## 1) + log(GroundSF + 1) + log(FullBath + 1) + log(Bedroom +
## 1) + log(TotalRooms + 1) + log(Fireplaces + 1) + log(GarageSF +
## 1) + log(ScreenPorchSF + 1), data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.27266 -0.07681 -0.00287 0.08203 0.45149
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -75.525367 6.699460 -11.273 < 2e-16 ***
## log(LotFrontage + 1) 0.006982 0.003854 1.811 0.070580 .
## log(LotArea + 1) 0.137063 0.013588 10.087 < 2e-16 ***
## log(Quality + 1) 0.700683 0.058357 12.007 < 2e-16 ***
## log(Condition + 1) 0.309571 0.046895 6.601 9.16e-11 ***
## log(YearBuilt) 7.488680 0.724335 10.339 < 2e-16 ***
## log(YearRemodel) 2.247819 0.888113 2.531 0.011635 *
## log(BasementFinSF + 1) 0.014182 0.002373 5.976 3.98e-09 ***
## log(BasementSF + 1) 0.020765 0.006015 3.452 0.000597 ***
## log(GroundSF + 1) 0.460841 0.044182 10.430 < 2e-16 ***
## log(FullBath + 1) -0.028716 0.044052 -0.652 0.514743
## log(Bedroom + 1) -0.142349 0.039554 -3.599 0.000347 ***
## log(TotalRooms + 1) 0.086737 0.061411 1.412 0.158368
## log(Fireplaces + 1) 0.052469 0.019081 2.750 0.006148 **
## log(GarageSF + 1) 0.003200 0.005805 0.551 0.581645
## log(ScreenPorchSF + 1) 0.007193 0.004265 1.686 0.092263 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1499 on 584 degrees of freedom
## Multiple R-squared: 0.8718, Adjusted R-squared: 0.8685
## F-statistic: 264.7 on 15 and 584 DF, p-value: < 2.2e-16
summary(mod6)
##
## Call:
## lm(formula = log(Price) ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF, data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.57157 -0.07269 0.00404 0.07659 0.42543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.824e+00 8.537e-01 -6.823 2.24e-11 ***
## LotFrontage 3.747e-04 1.897e-04 1.975 0.04874 *
## LotArea 3.950e-06 8.290e-07 4.765 2.38e-06 ***
## Quality 8.533e-02 8.380e-03 10.182 < 2e-16 ***
## Condition 5.491e-02 6.822e-03 8.050 4.67e-15 ***
## YearBuilt 3.903e-03 3.627e-04 10.761 < 2e-16 ***
## YearRemodel 8.417e-04 4.483e-04 1.878 0.06094 .
## BasementFinSF 9.540e-05 1.698e-05 5.618 3.00e-08 ***
## BasementSF 1.511e-04 2.151e-05 7.021 6.12e-12 ***
## GroundSF 3.052e-04 2.841e-05 10.742 < 2e-16 ***
## FullBath -2.526e-02 1.685e-02 -1.499 0.13442
## Bedroom -8.224e-03 1.160e-02 -0.709 0.47862
## TotalRooms 3.921e-03 7.979e-03 0.491 0.62333
## Fireplaces 2.992e-02 1.136e-02 2.633 0.00868 **
## GarageSF 9.212e-05 3.957e-05 2.328 0.02025 *
## ScreenPorchSF 2.854e-04 1.189e-04 2.400 0.01669 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.147 on 584 degrees of freedom
## Multiple R-squared: 0.8768, Adjusted R-squared: 0.8736
## F-statistic: 277 on 15 and 584 DF, p-value: < 2.2e-16
summary(mod2)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF, data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.845 -16.864 -0.772 14.072 185.146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.309e+03 1.706e+02 -7.673 7.10e-14 ***
## LotFrontage 1.300e-01 3.790e-02 3.430 0.000646 ***
## LotArea 9.186e-04 1.656e-04 5.546 4.42e-08 ***
## Quality 1.706e+01 1.674e+00 10.187 < 2e-16 ***
## Condition 2.630e+00 1.363e+00 1.930 0.054091 .
## YearBuilt 3.512e-01 7.247e-02 4.846 1.62e-06 ***
## YearRemodel 2.642e-01 8.957e-02 2.950 0.003304 **
## BasementFinSF 2.640e-02 3.393e-03 7.782 3.26e-14 ***
## BasementSF 3.197e-02 4.298e-03 7.438 3.68e-13 ***
## GroundSF 5.233e-02 5.677e-03 9.219 < 2e-16 ***
## FullBath -6.531e+00 3.367e+00 -1.940 0.052869 .
## Bedroom -6.459e+00 2.317e+00 -2.787 0.005492 **
## TotalRooms 3.732e+00 1.594e+00 2.341 0.019565 *
## Fireplaces 4.941e+00 2.270e+00 2.177 0.029918 *
## GarageSF 3.171e-02 7.905e-03 4.011 6.82e-05 ***
## ScreenPorchSF 3.899e-02 2.375e-02 1.641 0.101239
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.36 on 584 degrees of freedom
## Multiple R-squared: 0.8641, Adjusted R-squared: 0.8606
## F-statistic: 247.5 on 15 and 584 DF, p-value: < 2.2e-16
head(sort(abs(mod6$residuals), decreasing = TRUE), n=10)
## 299 458 380 588 132 109 240 44
## 1.5715711 0.4926986 0.4677200 0.4467211 0.4298559 0.4254321 0.4217658 0.4165398
## 317 179
## 0.3934745 0.3860867
head(mod6$residuals[c(299,458,380,588,132,109,240,44,317,179)])
## 299 458 380 588 132 109
## -1.5715711 -0.4926986 -0.4677200 -0.4467211 -0.4298559 0.4254321
rstandard(mod6)[c(299,458,380,588,132,109,240,44,317,179)]
## 299 458 380 588 132 109 240
## -11.239166 -3.383835 -3.263067 -3.083062 -2.943474 2.919062 2.890237
## 44 317 179
## 2.871125 -2.703059 -4.419756
rstudent(mod6)[c(299,458,380,588,132,109,240,44,317,179)]
## 299 458 380 588 132 109 240
## -12.684894 -3.414576 -3.290405 -3.105800 -2.963015 2.938074 2.908639
## 44 317 179
## 2.889129 -2.717799 -4.491731
2*(16/600)
## [1] 0.05333333
3*(16/600)
## [1] 0.08
hatvalues(mod6)[c(299,458,380,588,132,109,240,44,317,179)]
## 299 458 380 588 132 109 240
## 0.09466155 0.01835268 0.04867035 0.02788100 0.01250162 0.01647594 0.01397735
## 44 317 179
## 0.02541472 0.01885488 0.64666695
head(sort(cooks.distance(mod6)[c(299,458,380,588,132,109,240,44,317,179)]), decreasing = TRUE, n = 10)
## 132 240 317 109 458 44
## 0.006855362 0.007400903 0.008775699 0.008921374 0.013379574 0.013435370
## 588 380 299 179
## 0.017038583 0.034045940 0.825488145 2.234462090
plot(mod6, 5)
mod6CP <- regsubsets(log(Price)~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv, nbest=1, nvmax=15)
ShowSubsets(mod6CP)
#Part 5
House<-data.frame(YearBuilt=1995,YearRemodel=2003, Quality=7, Condition=5, ExteriorQ="Gd", ExteriorC="Gd", LotFrontage=90,LotArea=11060, LotConfig="Corner", HouseStyle="2Story", Foundation="PConc", BasementHt="Ex",BasementSF=1150, BasementFinSF=0, BasementFin="Unf", Heating="GasA", HeatingQC="Ex", CentralAir="Y", FirstSF=1164, SecondSF=1150, GroundSF=2314, BasementFBath=0, BasementHBath=0, FullBath=2, HalfBath=1, Bedroom=3, TotalRooms=9, Fireplaces=1, GarageType="Attchd",ScreenPorchSF=0,GarageCars=2, GarageSF=502, GarageQ="TA", GarageC="TA",OpenPorchSF=274)
exp(predict.lm(mod6,House, interval="prediction", level=.95))
## fit lwr upr
## 1 248.2351 185.452 332.2728
predict.lm(mod2,House, interval="prediction", level=.95)
## fit lwr upr
## 1 255.5706 197.3161 313.8251
Part 6
head(Ames.Train)
Ames.Train.nv <- select(Ames.Train, c(2,3,4,7,8,9,10,17,18,19,23,24,25,26,27,28,29,30,32,33,35,36,39,40,41,42))
head(Ames.Train.nv)
Ames.Test.nv <- select(Ames.Test, c(2,3,4,7,8,9,10,17,18,19,23,24,25,26,27,28,29,30,32,33,35,36,39,40,41,42))
Ames.Test.nv
#model from part 1 and 2 of HW 3
mod1.Train <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)
summary(mod1.Train)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF, data = Ames.Train.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.845 -16.864 -0.772 14.072 185.146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.309e+03 1.706e+02 -7.673 7.10e-14 ***
## LotFrontage 1.300e-01 3.790e-02 3.430 0.000646 ***
## LotArea 9.186e-04 1.656e-04 5.546 4.42e-08 ***
## Quality 1.706e+01 1.674e+00 10.187 < 2e-16 ***
## Condition 2.630e+00 1.363e+00 1.930 0.054091 .
## YearBuilt 3.512e-01 7.247e-02 4.846 1.62e-06 ***
## YearRemodel 2.642e-01 8.957e-02 2.950 0.003304 **
## BasementFinSF 2.640e-02 3.393e-03 7.782 3.26e-14 ***
## BasementSF 3.197e-02 4.298e-03 7.438 3.68e-13 ***
## GroundSF 5.233e-02 5.677e-03 9.219 < 2e-16 ***
## FullBath -6.531e+00 3.367e+00 -1.940 0.052869 .
## Bedroom -6.459e+00 2.317e+00 -2.787 0.005492 **
## TotalRooms 3.732e+00 1.594e+00 2.341 0.019565 *
## Fireplaces 4.941e+00 2.270e+00 2.177 0.029918 *
## GarageSF 3.171e-02 7.905e-03 4.011 6.82e-05 ***
## ScreenPorchSF 3.899e-02 2.375e-02 1.641 0.101239
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.36 on 584 degrees of freedom
## Multiple R-squared: 0.8641, Adjusted R-squared: 0.8606
## F-statistic: 247.5 on 15 and 584 DF, p-value: < 2.2e-16
plot(mod1.Train)
mod1.Test <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Test.nv)
summary(mod1.Test)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF, data = Ames.Test.nv)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.477 -14.029 -3.168 11.916 142.003
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.091e+03 2.720e+02 -4.010 8.81e-05 ***
## LotFrontage 1.655e-01 6.219e-02 2.661 0.008488 **
## LotArea 2.604e-03 6.193e-04 4.205 4.08e-05 ***
## Quality 1.711e+01 2.390e+00 7.161 1.85e-11 ***
## Condition 3.659e+00 2.283e+00 1.603 0.110712
## YearBuilt 4.191e-01 1.234e-01 3.397 0.000835 ***
## YearRemodel 8.560e-02 1.428e-01 0.599 0.549685
## BasementFinSF 1.872e-02 4.945e-03 3.785 0.000208 ***
## BasementSF 2.162e-02 6.128e-03 3.528 0.000529 ***
## GroundSF 6.995e-02 9.063e-03 7.718 7.26e-13 ***
## FullBath -5.835e+00 5.330e+00 -1.095 0.275088
## Bedroom -9.484e+00 3.655e+00 -2.595 0.010217 *
## TotalRooms 7.236e-01 2.641e+00 0.274 0.784422
## Fireplaces 4.253e+00 3.530e+00 1.205 0.229823
## GarageSF 2.436e-02 1.155e-02 2.110 0.036243 *
## ScreenPorchSF -1.340e-02 3.858e-02 -0.347 0.728727
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.24 on 184 degrees of freedom
## Multiple R-squared: 0.8938, Adjusted R-squared: 0.8851
## F-statistic: 103.2 on 15 and 184 DF, p-value: < 2.2e-16
plot(mod1.Test)
fitTrain <- predict(mod1.Train,newdata=Ames.Test.nv)
holdoutresid=Ames.Test.nv$Price - fitTrain
holdoutresid
## 1 2 3 4 5 6
## -9.20411050 -5.02367620 31.74368642 -4.25073104 -8.75487126 -11.55908468
## 7 8 9 10 11 12
## -1.33061174 170.50099741 24.55734461 3.67066072 -19.30159654 -18.37303723
## 13 14 15 16 17 18
## 24.16169913 -12.79080707 19.22064033 -8.31881750 49.76167620 6.66377209
## 19 20 21 22 23 24
## -9.30808088 -0.24915939 -4.71009649 -52.61632634 -3.61945163 -47.38050332
## 25 26 27 28 29 30
## -21.44815176 47.41985052 -18.72502297 18.15441086 -29.17456901 -5.58388970
## 31 32 33 34 35 36
## -11.47238771 36.48664160 -10.60614539 -16.10127061 -30.38194007 16.02236605
## 37 38 39 40 41 42
## -29.96106302 -29.93314058 22.30656014 27.51336123 -3.48022226 4.29144034
## 43 44 45 46 47 48
## 15.20515248 -23.34212713 2.64343296 14.38217541 17.09284171 5.79978714
## 49 50 51 52 53 54
## 42.96780499 -6.52034822 1.04972824 39.06119689 -6.25753891 0.15766421
## 55 56 57 58 59 60
## -16.91218082 6.84745152 -28.04008509 -0.39721573 2.86421722 -20.60349054
## 61 62 63 64 65 66
## -37.19345445 32.96197500 -12.01427137 8.77887731 29.88810916 -16.52689133
## 67 68 69 70 71 72
## 2.48558041 6.52307306 -1.27495214 22.80176622 10.93616123 25.92347216
## 73 74 75 76 77 78
## 7.85233183 -40.11445491 31.13567252 -3.25598756 -5.50976421 11.08566128
## 79 80 81 82 83 84
## -11.37450075 -2.72956477 28.29120388 -20.57304482 0.70327102 -1.45059779
## 85 86 87 88 89 90
## -31.52133189 -28.03975374 2.39118681 31.00692065 22.06956637 7.43537260
## 91 92 93 94 95 96
## 10.73391950 -10.33907846 19.28095849 -8.18173972 -21.45794833 30.25202285
## 97 98 99 100 101 102
## -22.41706239 -9.85910903 55.31539851 67.22080764 -16.07738746 -9.70051739
## 103 104 105 106 107 108
## 27.40923773 -12.89337519 1.28213364 13.80966386 -4.50509617 -15.81370341
## 109 110 111 112 113 114
## -7.71387682 8.19548787 7.97609826 3.31615567 -12.75072451 31.76355246
## 115 116 117 118 119 120
## 8.11569019 -15.75953173 -53.74284456 -10.10920249 3.83544916 -16.77717941
## 121 122 123 124 125 126
## -4.56356401 -0.61855755 -2.70649779 -19.44587292 0.28648004 47.20963472
## 127 128 129 130 131 132
## 14.26871272 16.85402590 -25.99973266 -47.22192383 -17.07032620 30.33749155
## 133 134 135 136 137 138
## -9.92642180 1.97459756 -4.42990254 42.47947152 74.89413363 10.49494164
## 139 140 141 142 143 144
## -18.48355746 11.16700283 -11.74944314 18.13537271 -20.31623769 9.79730224
## 145 146 147 148 149 150
## 3.88602016 16.61523124 -29.90857947 -16.43783903 -7.90178489 4.14637009
## 151 152 153 154 155 156
## 3.64981553 0.70961471 19.84081455 -15.82685245 53.34737792 8.54396199
## 157 158 159 160 161 162
## 0.34356412 66.97363264 -4.02097476 2.85314704 -26.90312110 -33.63186638
## 163 164 165 166 167 168
## 52.05849788 14.91328449 -13.61834041 -31.05474091 92.49861827 23.78913537
## 169 170 171 172 173 174
## -53.05687272 7.49010200 0.26166653 -23.48703150 -34.13707346 -0.03510799
## 175 176 177 178 179 180
## 7.81276200 18.07515907 -15.28302809 -11.22573619 32.88275452 -7.46957299
## 181 182 183 184 185 186
## 12.48267019 12.34906499 -14.63485985 27.38254351 -6.30354307 40.44897984
## 187 188 189 190 191 192
## 26.83835761 -7.09625497 2.70036965 6.28982841 -18.57032529 35.51913356
## 193 194 195 196 197 198
## 12.34906499 -13.10191075 -11.04634779 -53.14722599 -12.86906564 18.27642958
## 199 200
## -12.52894892 1.97933787
mean(holdoutresid)
## [1] 2.42646
sd(holdoutresid)
## [1] 26.75778
hist(holdoutresid)
crosscorr=cor(Ames.Test.nv$Price,fitTrain)
crosscorr^2
## [1] 0.8808207
shrinkage = summary(mod1.Test)$r.squared-crosscorr^2
shrinkage
## [1] 0.01293275
Part 7
Ames.Train
mod2 <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF+factor(LotConfig)+factor(HouseStyle)+factor(ExteriorQ)+factor(ExteriorC)+factor(Foundation)+factor(BasementHt)+factor(BasementC)+factor(BasementFin)+factor(Heating)+factor(HeatingQC)+factor(CentralAir)+factor(KitchenQ)+factor(GarageType)+factor(GarageQ)+factor(GarageC), data = Ames.Train)
summary(mod2)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF + factor(LotConfig) + factor(HouseStyle) +
## factor(ExteriorQ) + factor(ExteriorC) + factor(Foundation) +
## factor(BasementHt) + factor(BasementC) + factor(BasementFin) +
## factor(Heating) + factor(HeatingQC) + factor(CentralAir) +
## factor(KitchenQ) + factor(GarageType) + factor(GarageQ) +
## factor(GarageC), data = Ames.Train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -92.741 -12.846 -0.235 12.665 147.618
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.858e+02 2.357e+02 -3.333 0.000918 ***
## LotFrontage 9.786e-02 3.621e-02 2.702 0.007109 **
## LotArea 7.568e-04 1.556e-04 4.865 1.51e-06 ***
## Quality 1.014e+01 1.726e+00 5.874 7.53e-09 ***
## Condition 5.486e+00 1.508e+00 3.638 0.000302 ***
## YearBuilt 3.349e-01 9.734e-02 3.441 0.000626 ***
## YearRemodel 6.324e-02 9.180e-02 0.689 0.491242
## BasementFinSF 2.413e-02 4.086e-03 5.906 6.31e-09 ***
## BasementSF 1.604e-02 6.715e-03 2.388 0.017298 *
## GroundSF 5.938e-02 6.439e-03 9.223 < 2e-16 ***
## FullBath -8.318e+00 3.161e+00 -2.632 0.008747 **
## Bedroom -1.461e+00 2.203e+00 -0.663 0.507443
## TotalRooms 1.666e+00 1.466e+00 1.136 0.256333
## Fireplaces 6.855e+00 2.084e+00 3.290 0.001069 **
## GarageSF 3.242e-02 8.057e-03 4.024 6.56e-05 ***
## ScreenPorchSF 2.362e-02 2.103e-02 1.123 0.261984
## factor(LotConfig)CulDSac 9.288e+00 5.576e+00 1.666 0.096369 .
## factor(LotConfig)FR2 -3.357e+00 6.754e+00 -0.497 0.619389
## factor(LotConfig)FR3 -8.846e+00 1.501e+01 -0.589 0.555920
## factor(LotConfig)Inside 3.445e+00 2.948e+00 1.169 0.243076
## factor(HouseStyle)1.5Unf 8.654e+00 1.230e+01 0.704 0.481834
## factor(HouseStyle)1Story 3.895e+00 5.029e+00 0.775 0.438960
## factor(HouseStyle)2.5Fin -1.521e+01 2.784e+01 -0.547 0.584942
## factor(HouseStyle)2.5Unf -1.768e+01 1.286e+01 -1.374 0.169970
## factor(HouseStyle)2Story -4.711e+00 4.515e+00 -1.043 0.297321
## factor(HouseStyle)SFoyer -3.543e+00 8.147e+00 -0.435 0.663868
## factor(HouseStyle)SLvl 6.956e-01 7.347e+00 0.095 0.924602
## factor(ExteriorQ)Fa -4.588e+01 1.760e+01 -2.607 0.009396 **
## factor(ExteriorQ)Gd -5.448e+01 7.278e+00 -7.486 3.01e-13 ***
## factor(ExteriorQ)TA -5.922e+01 8.193e+00 -7.229 1.73e-12 ***
## factor(ExteriorC)Fa -2.826e+01 1.830e+01 -1.545 0.123037
## factor(ExteriorC)Gd -1.391e+00 1.566e+01 -0.089 0.929255
## factor(ExteriorC)Po -2.000e+01 3.280e+01 -0.610 0.542228
## factor(ExteriorC)TA -5.497e+00 1.534e+01 -0.358 0.720168
## factor(Foundation)CBlock -5.733e+00 4.944e+00 -1.160 0.246740
## factor(Foundation)PConc 4.134e+00 6.074e+00 0.681 0.496433
## factor(Foundation)Slab -3.819e+01 1.608e+01 -2.374 0.017941 *
## factor(Foundation)Stone 1.737e+01 2.042e+01 0.851 0.395330
## factor(Foundation)Wood -2.629e+01 2.008e+01 -1.309 0.191030
## factor(BasementHt)Fa -1.913e+01 9.475e+00 -2.019 0.044002 *
## factor(BasementHt)Gd -2.355e+01 5.003e+00 -4.706 3.23e-06 ***
## factor(BasementHt)None 8.791e+00 1.675e+01 0.525 0.599950
## factor(BasementHt)TA -2.792e+01 6.596e+00 -4.233 2.73e-05 ***
## factor(BasementC)Gd 1.117e+01 9.047e+00 1.235 0.217356
## factor(BasementC)None NA NA NA NA
## factor(BasementC)TA 1.279e+01 6.969e+00 1.835 0.067066 .
## factor(BasementFin)BLQ 3.554e+00 5.056e+00 0.703 0.482439
## factor(BasementFin)GLQ 1.418e+00 3.867e+00 0.367 0.714048
## factor(BasementFin)LwQ -2.980e+00 5.503e+00 -0.542 0.588357
## factor(BasementFin)None NA NA NA NA
## factor(BasementFin)Rec -8.300e-01 4.643e+00 -0.179 0.858196
## factor(BasementFin)Unf 3.018e+00 4.258e+00 0.709 0.478878
## factor(Heating)GasW -9.581e+00 1.747e+01 -0.549 0.583566
## factor(Heating)Grav 2.939e+01 2.413e+01 1.218 0.223718
## factor(Heating)OthW -2.012e+01 3.127e+01 -0.643 0.520220
## factor(Heating)Wall 5.806e+01 2.265e+01 2.564 0.010628 *
## factor(HeatingQC)Fa -9.919e+00 7.262e+00 -1.366 0.172558
## factor(HeatingQC)Gd -5.630e+00 3.435e+00 -1.639 0.101790
## factor(HeatingQC)Po -6.030e+00 2.931e+01 -0.206 0.837071
## factor(HeatingQC)TA -3.433e+00 3.399e+00 -1.010 0.312955
## factor(CentralAir)Y -1.426e+00 6.510e+00 -0.219 0.826694
## factor(KitchenQ)Fa 1.024e+00 9.993e+00 0.102 0.918455
## factor(KitchenQ)Gd -4.861e+00 5.543e+00 -0.877 0.380878
## factor(KitchenQ)TA -7.978e+00 6.190e+00 -1.289 0.198012
## factor(GarageType)Attchd 1.807e+01 1.384e+01 1.306 0.192089
## factor(GarageType)Basment 1.701e+01 1.742e+01 0.976 0.329341
## factor(GarageType)BuiltIn 1.921e+01 1.456e+01 1.320 0.187430
## factor(GarageType)CarPort -2.918e+01 2.373e+01 -1.230 0.219294
## factor(GarageType)Detchd 1.506e+01 1.388e+01 1.085 0.278406
## factor(GarageType)None 4.100e+01 1.760e+01 2.329 0.020223 *
## factor(GarageQ)Gd 9.769e+00 1.332e+01 0.734 0.463518
## factor(GarageQ)None NA NA NA NA
## factor(GarageQ)Po -2.368e+01 3.260e+01 -0.727 0.467846
## factor(GarageQ)TA -1.885e+00 7.491e+00 -0.252 0.801462
## factor(GarageC)Gd 1.745e+00 1.763e+01 0.099 0.921175
## factor(GarageC)None NA NA NA NA
## factor(GarageC)Po 6.586e+00 2.059e+01 0.320 0.749159
## factor(GarageC)TA 7.065e+00 1.025e+01 0.689 0.490849
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.17 on 526 degrees of freedom
## Multiple R-squared: 0.91, Adjusted R-squared: 0.8975
## F-statistic: 72.88 on 73 and 526 DF, p-value: < 2.2e-16
#Backwards Elimination
MSE = (summary(mod2)$sigma)^2
step(mod2, scale=MSE, trace = "false")
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath +
## Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) +
## factor(BasementHt) + factor(Heating) + factor(GarageType),
## data = Ames.Train)
##
## Coefficients:
## (Intercept) LotFrontage
## -8.080e+02 8.152e-02
## LotArea Quality
## 8.114e-04 1.019e+01
## Condition YearBuilt
## 7.630e+00 4.163e-01
## BasementFinSF BasementSF
## 2.161e-02 2.656e-02
## GroundSF FullBath
## 5.705e-02 -9.810e+00
## Fireplaces GarageSF
## 7.302e+00 3.169e-02
## factor(ExteriorQ)Fa factor(ExteriorQ)Gd
## -6.564e+01 -5.878e+01
## factor(ExteriorQ)TA factor(Foundation)CBlock
## -6.771e+01 -7.804e+00
## factor(Foundation)PConc factor(Foundation)Slab
## 3.748e+00 -4.812e+01
## factor(Foundation)Stone factor(Foundation)Wood
## -3.068e+00 -2.646e+01
## factor(BasementHt)Fa factor(BasementHt)Gd
## -2.071e+01 -2.493e+01
## factor(BasementHt)None factor(BasementHt)TA
## 1.202e+01 -2.836e+01
## factor(Heating)GasW factor(Heating)Grav
## -1.247e+01 1.363e+01
## factor(Heating)OthW factor(Heating)Wall
## -4.215e+01 5.347e+01
## factor(GarageType)Attchd factor(GarageType)Basment
## 1.206e+01 1.040e+01
## factor(GarageType)BuiltIn factor(GarageType)CarPort
## 1.466e+01 -2.739e+01
## factor(GarageType)Detchd factor(GarageType)None
## 8.353e+00 2.639e+01
#model produced from backwards elimination
mod3 <- lm(Price ~ LotFrontage + LotArea + Quality + Condition +
YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath +
Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) +
factor(BasementHt) + factor(Heating) + factor(GarageType),
data = Ames.Train)
#Forwards Selection
None <- lm(Price~1, data = Ames.Train)
step(None, scope=list(upper=mod2), scale=MSE, direction="forward",trace=FALSE)
##
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) +
## factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF +
## factor(HeatingQC) + Fireplaces + BasementSF + Condition +
## YearBuilt + LotFrontage + factor(Foundation) + FullBath +
## factor(GarageType) + factor(Heating), data = Ames.Train)
##
## Coefficients:
## (Intercept) Quality
## -7.088e+02 1.060e+01
## GroundSF BasementFinSF
## 6.089e-02 2.245e-02
## factor(ExteriorQ)Fa factor(ExteriorQ)Gd
## -6.252e+01 -5.858e+01
## factor(ExteriorQ)TA factor(HouseStyle)1.5Unf
## -6.536e+01 8.700e+00
## factor(HouseStyle)1Story factor(HouseStyle)2.5Fin
## 3.855e+00 -1.128e+01
## factor(HouseStyle)2.5Unf factor(HouseStyle)2Story
## -1.954e+01 -2.159e+00
## factor(HouseStyle)SFoyer factor(HouseStyle)SLvl
## -8.635e-01 2.966e+00
## LotArea factor(BasementHt)Fa
## 8.050e-04 -2.176e+01
## factor(BasementHt)Gd factor(BasementHt)None
## -2.403e+01 6.051e+00
## factor(BasementHt)TA GarageSF
## -2.845e+01 3.092e-02
## factor(HeatingQC)Fa factor(HeatingQC)Gd
## -1.310e+01 -6.606e+00
## factor(HeatingQC)Po factor(HeatingQC)TA
## -2.861e+00 -4.945e+00
## Fireplaces BasementSF
## 7.307e+00 2.095e-02
## Condition YearBuilt
## 7.146e+00 3.650e-01
## LotFrontage factor(Foundation)CBlock
## 8.541e-02 -5.861e+00
## factor(Foundation)PConc factor(Foundation)Slab
## 4.706e+00 -4.428e+01
## factor(Foundation)Stone factor(Foundation)Wood
## -2.640e+00 -2.441e+01
## FullBath factor(GarageType)Attchd
## -9.179e+00 1.163e+01
## factor(GarageType)Basment factor(GarageType)BuiltIn
## 8.515e+00 1.358e+01
## factor(GarageType)CarPort factor(GarageType)Detchd
## -2.986e+01 8.297e+00
## factor(GarageType)None factor(Heating)GasW
## 2.701e+01 -6.868e+00
## factor(Heating)Grav factor(Heating)OthW
## 1.990e+01 -1.826e+01
## factor(Heating)Wall
## 5.886e+01
#model produced from forward selection
mod4 <- lm(Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) +
factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF +
factor(HeatingQC) + Fireplaces + BasementSF + Condition +
YearBuilt + LotFrontage + factor(Foundation) + FullBath +
factor(GarageType) + factor(Heating), data = Ames.Train)
#Stepwise method
step(None, scope=list(upper=mod2), scale=MSE, trace=FALSE)
##
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) +
## LotArea + factor(BasementHt) + GarageSF + Fireplaces + BasementSF +
## Condition + YearBuilt + LotFrontage + FullBath + factor(Foundation) +
## factor(Heating) + factor(GarageType), data = Ames.Train)
##
## Coefficients:
## (Intercept) Quality
## -8.080e+02 1.019e+01
## GroundSF BasementFinSF
## 5.705e-02 2.161e-02
## factor(ExteriorQ)Fa factor(ExteriorQ)Gd
## -6.564e+01 -5.878e+01
## factor(ExteriorQ)TA LotArea
## -6.771e+01 8.114e-04
## factor(BasementHt)Fa factor(BasementHt)Gd
## -2.071e+01 -2.493e+01
## factor(BasementHt)None factor(BasementHt)TA
## 1.202e+01 -2.836e+01
## GarageSF Fireplaces
## 3.169e-02 7.302e+00
## BasementSF Condition
## 2.656e-02 7.630e+00
## YearBuilt LotFrontage
## 4.163e-01 8.152e-02
## FullBath factor(Foundation)CBlock
## -9.810e+00 -7.804e+00
## factor(Foundation)PConc factor(Foundation)Slab
## 3.748e+00 -4.812e+01
## factor(Foundation)Stone factor(Foundation)Wood
## -3.068e+00 -2.646e+01
## factor(Heating)GasW factor(Heating)Grav
## -1.247e+01 1.363e+01
## factor(Heating)OthW factor(Heating)Wall
## -4.215e+01 5.347e+01
## factor(GarageType)Attchd factor(GarageType)Basment
## 1.206e+01 1.040e+01
## factor(GarageType)BuiltIn factor(GarageType)CarPort
## 1.466e+01 -2.739e+01
## factor(GarageType)Detchd factor(GarageType)None
## 8.353e+00 2.639e+01
#The model produced from stepwise regression is the same as what the model produced from backwards elimination(mod3)
#comparing our full model(mod2) with the models that were produced from backwards elimination, forward selection, and stepwise regression methods.
anova(mod3,mod2)
anova(mod4,mod2)
summary(mod2)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF +
## FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF +
## ScreenPorchSF + factor(LotConfig) + factor(HouseStyle) +
## factor(ExteriorQ) + factor(ExteriorC) + factor(Foundation) +
## factor(BasementHt) + factor(BasementC) + factor(BasementFin) +
## factor(Heating) + factor(HeatingQC) + factor(CentralAir) +
## factor(KitchenQ) + factor(GarageType) + factor(GarageQ) +
## factor(GarageC), data = Ames.Train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -92.741 -12.846 -0.235 12.665 147.618
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.858e+02 2.357e+02 -3.333 0.000918 ***
## LotFrontage 9.786e-02 3.621e-02 2.702 0.007109 **
## LotArea 7.568e-04 1.556e-04 4.865 1.51e-06 ***
## Quality 1.014e+01 1.726e+00 5.874 7.53e-09 ***
## Condition 5.486e+00 1.508e+00 3.638 0.000302 ***
## YearBuilt 3.349e-01 9.734e-02 3.441 0.000626 ***
## YearRemodel 6.324e-02 9.180e-02 0.689 0.491242
## BasementFinSF 2.413e-02 4.086e-03 5.906 6.31e-09 ***
## BasementSF 1.604e-02 6.715e-03 2.388 0.017298 *
## GroundSF 5.938e-02 6.439e-03 9.223 < 2e-16 ***
## FullBath -8.318e+00 3.161e+00 -2.632 0.008747 **
## Bedroom -1.461e+00 2.203e+00 -0.663 0.507443
## TotalRooms 1.666e+00 1.466e+00 1.136 0.256333
## Fireplaces 6.855e+00 2.084e+00 3.290 0.001069 **
## GarageSF 3.242e-02 8.057e-03 4.024 6.56e-05 ***
## ScreenPorchSF 2.362e-02 2.103e-02 1.123 0.261984
## factor(LotConfig)CulDSac 9.288e+00 5.576e+00 1.666 0.096369 .
## factor(LotConfig)FR2 -3.357e+00 6.754e+00 -0.497 0.619389
## factor(LotConfig)FR3 -8.846e+00 1.501e+01 -0.589 0.555920
## factor(LotConfig)Inside 3.445e+00 2.948e+00 1.169 0.243076
## factor(HouseStyle)1.5Unf 8.654e+00 1.230e+01 0.704 0.481834
## factor(HouseStyle)1Story 3.895e+00 5.029e+00 0.775 0.438960
## factor(HouseStyle)2.5Fin -1.521e+01 2.784e+01 -0.547 0.584942
## factor(HouseStyle)2.5Unf -1.768e+01 1.286e+01 -1.374 0.169970
## factor(HouseStyle)2Story -4.711e+00 4.515e+00 -1.043 0.297321
## factor(HouseStyle)SFoyer -3.543e+00 8.147e+00 -0.435 0.663868
## factor(HouseStyle)SLvl 6.956e-01 7.347e+00 0.095 0.924602
## factor(ExteriorQ)Fa -4.588e+01 1.760e+01 -2.607 0.009396 **
## factor(ExteriorQ)Gd -5.448e+01 7.278e+00 -7.486 3.01e-13 ***
## factor(ExteriorQ)TA -5.922e+01 8.193e+00 -7.229 1.73e-12 ***
## factor(ExteriorC)Fa -2.826e+01 1.830e+01 -1.545 0.123037
## factor(ExteriorC)Gd -1.391e+00 1.566e+01 -0.089 0.929255
## factor(ExteriorC)Po -2.000e+01 3.280e+01 -0.610 0.542228
## factor(ExteriorC)TA -5.497e+00 1.534e+01 -0.358 0.720168
## factor(Foundation)CBlock -5.733e+00 4.944e+00 -1.160 0.246740
## factor(Foundation)PConc 4.134e+00 6.074e+00 0.681 0.496433
## factor(Foundation)Slab -3.819e+01 1.608e+01 -2.374 0.017941 *
## factor(Foundation)Stone 1.737e+01 2.042e+01 0.851 0.395330
## factor(Foundation)Wood -2.629e+01 2.008e+01 -1.309 0.191030
## factor(BasementHt)Fa -1.913e+01 9.475e+00 -2.019 0.044002 *
## factor(BasementHt)Gd -2.355e+01 5.003e+00 -4.706 3.23e-06 ***
## factor(BasementHt)None 8.791e+00 1.675e+01 0.525 0.599950
## factor(BasementHt)TA -2.792e+01 6.596e+00 -4.233 2.73e-05 ***
## factor(BasementC)Gd 1.117e+01 9.047e+00 1.235 0.217356
## factor(BasementC)None NA NA NA NA
## factor(BasementC)TA 1.279e+01 6.969e+00 1.835 0.067066 .
## factor(BasementFin)BLQ 3.554e+00 5.056e+00 0.703 0.482439
## factor(BasementFin)GLQ 1.418e+00 3.867e+00 0.367 0.714048
## factor(BasementFin)LwQ -2.980e+00 5.503e+00 -0.542 0.588357
## factor(BasementFin)None NA NA NA NA
## factor(BasementFin)Rec -8.300e-01 4.643e+00 -0.179 0.858196
## factor(BasementFin)Unf 3.018e+00 4.258e+00 0.709 0.478878
## factor(Heating)GasW -9.581e+00 1.747e+01 -0.549 0.583566
## factor(Heating)Grav 2.939e+01 2.413e+01 1.218 0.223718
## factor(Heating)OthW -2.012e+01 3.127e+01 -0.643 0.520220
## factor(Heating)Wall 5.806e+01 2.265e+01 2.564 0.010628 *
## factor(HeatingQC)Fa -9.919e+00 7.262e+00 -1.366 0.172558
## factor(HeatingQC)Gd -5.630e+00 3.435e+00 -1.639 0.101790
## factor(HeatingQC)Po -6.030e+00 2.931e+01 -0.206 0.837071
## factor(HeatingQC)TA -3.433e+00 3.399e+00 -1.010 0.312955
## factor(CentralAir)Y -1.426e+00 6.510e+00 -0.219 0.826694
## factor(KitchenQ)Fa 1.024e+00 9.993e+00 0.102 0.918455
## factor(KitchenQ)Gd -4.861e+00 5.543e+00 -0.877 0.380878
## factor(KitchenQ)TA -7.978e+00 6.190e+00 -1.289 0.198012
## factor(GarageType)Attchd 1.807e+01 1.384e+01 1.306 0.192089
## factor(GarageType)Basment 1.701e+01 1.742e+01 0.976 0.329341
## factor(GarageType)BuiltIn 1.921e+01 1.456e+01 1.320 0.187430
## factor(GarageType)CarPort -2.918e+01 2.373e+01 -1.230 0.219294
## factor(GarageType)Detchd 1.506e+01 1.388e+01 1.085 0.278406
## factor(GarageType)None 4.100e+01 1.760e+01 2.329 0.020223 *
## factor(GarageQ)Gd 9.769e+00 1.332e+01 0.734 0.463518
## factor(GarageQ)None NA NA NA NA
## factor(GarageQ)Po -2.368e+01 3.260e+01 -0.727 0.467846
## factor(GarageQ)TA -1.885e+00 7.491e+00 -0.252 0.801462
## factor(GarageC)Gd 1.745e+00 1.763e+01 0.099 0.921175
## factor(GarageC)None NA NA NA NA
## factor(GarageC)Po 6.586e+00 2.059e+01 0.320 0.749159
## factor(GarageC)TA 7.065e+00 1.025e+01 0.689 0.490849
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.17 on 526 degrees of freedom
## Multiple R-squared: 0.91, Adjusted R-squared: 0.8975
## F-statistic: 72.88 on 73 and 526 DF, p-value: < 2.2e-16
summary(mod3)
##
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition +
## YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath +
## Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) +
## factor(BasementHt) + factor(Heating) + factor(GarageType),
## data = Ames.Train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -84.101 -14.493 -0.509 11.652 153.947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.080e+02 1.603e+02 -5.042 6.21e-07 ***
## LotFrontage 8.152e-02 3.315e-02 2.459 0.014235 *
## LotArea 8.114e-04 1.436e-04 5.649 2.56e-08 ***
## Quality 1.019e+01 1.610e+00 6.331 4.97e-10 ***
## Condition 7.630e+00 1.141e+00 6.685 5.55e-11 ***
## YearBuilt 4.163e-01 8.115e-02 5.130 3.98e-07 ***
## BasementFinSF 2.161e-02 2.956e-03 7.312 9.03e-13 ***
## BasementSF 2.656e-02 4.385e-03 6.057 2.54e-09 ***
## GroundSF 5.705e-02 3.775e-03 15.115 < 2e-16 ***
## FullBath -9.810e+00 2.914e+00 -3.367 0.000812 ***
## Fireplaces 7.302e+00 1.996e+00 3.659 0.000277 ***
## GarageSF 3.169e-02 7.670e-03 4.132 4.14e-05 ***
## factor(ExteriorQ)Fa -6.564e+01 1.593e+01 -4.120 4.35e-05 ***
## factor(ExteriorQ)Gd -5.878e+01 6.618e+00 -8.882 < 2e-16 ***
## factor(ExteriorQ)TA -6.771e+01 7.458e+00 -9.078 < 2e-16 ***
## factor(Foundation)CBlock -7.804e+00 4.535e+00 -1.721 0.085857 .
## factor(Foundation)PConc 3.748e+00 5.643e+00 0.664 0.506885
## factor(Foundation)Slab -4.812e+01 1.548e+01 -3.108 0.001980 **
## factor(Foundation)Stone -3.068e+00 1.622e+01 -0.189 0.850054
## factor(Foundation)Wood -2.646e+01 1.958e+01 -1.351 0.177109
## factor(BasementHt)Fa -2.071e+01 9.004e+00 -2.301 0.021780 *
## factor(BasementHt)Gd -2.493e+01 4.784e+00 -5.211 2.63e-07 ***
## factor(BasementHt)None 1.202e+01 1.387e+01 0.867 0.386223
## factor(BasementHt)TA -2.836e+01 6.143e+00 -4.616 4.85e-06 ***
## factor(Heating)GasW -1.247e+01 1.592e+01 -0.784 0.433599
## factor(Heating)Grav 1.363e+01 1.837e+01 0.742 0.458566
## factor(Heating)OthW -4.215e+01 2.582e+01 -1.632 0.103162
## factor(Heating)Wall 5.347e+01 2.036e+01 2.626 0.008876 **
## factor(GarageType)Attchd 1.206e+01 1.332e+01 0.905 0.365631
## factor(GarageType)Basment 1.040e+01 1.677e+01 0.620 0.535515
## factor(GarageType)BuiltIn 1.466e+01 1.400e+01 1.047 0.295668
## factor(GarageType)CarPort -2.739e+01 2.242e+01 -1.222 0.222366
## factor(GarageType)Detchd 8.353e+00 1.340e+01 0.623 0.533270
## factor(GarageType)None 2.639e+01 1.487e+01 1.775 0.076430 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.09 on 566 degrees of freedom
## Multiple R-squared: 0.9038, Adjusted R-squared: 0.8982
## F-statistic: 161.2 on 33 and 566 DF, p-value: < 2.2e-16
summary(mod4)
##
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) +
## factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF +
## factor(HeatingQC) + Fireplaces + BasementSF + Condition +
## YearBuilt + LotFrontage + factor(Foundation) + FullBath +
## factor(GarageType) + factor(Heating), data = Ames.Train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -87.347 -14.123 -0.541 11.567 153.249
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.088e+02 1.700e+02 -4.169 3.55e-05 ***
## Quality 1.060e+01 1.649e+00 6.432 2.72e-10 ***
## GroundSF 6.089e-02 5.162e-03 11.794 < 2e-16 ***
## BasementFinSF 2.245e-02 2.994e-03 7.499 2.57e-13 ***
## factor(ExteriorQ)Fa -6.252e+01 1.607e+01 -3.891 0.000112 ***
## factor(ExteriorQ)Gd -5.858e+01 6.624e+00 -8.843 < 2e-16 ***
## factor(ExteriorQ)TA -6.536e+01 7.503e+00 -8.710 < 2e-16 ***
## factor(HouseStyle)1.5Unf 8.700e+00 1.149e+01 0.757 0.449221
## factor(HouseStyle)1Story 3.855e+00 4.781e+00 0.806 0.420363
## factor(HouseStyle)2.5Fin -1.128e+01 2.692e+01 -0.419 0.675400
## factor(HouseStyle)2.5Unf -1.954e+01 1.226e+01 -1.594 0.111556
## factor(HouseStyle)2Story -2.159e+00 4.319e+00 -0.500 0.617248
## factor(HouseStyle)SFoyer -8.635e-01 7.831e+00 -0.110 0.912237
## factor(HouseStyle)SLvl 2.966e+00 7.063e+00 0.420 0.674673
## LotArea 8.050e-04 1.447e-04 5.563 4.12e-08 ***
## factor(BasementHt)Fa -2.176e+01 9.101e+00 -2.391 0.017131 *
## factor(BasementHt)Gd -2.403e+01 4.799e+00 -5.008 7.42e-07 ***
## factor(BasementHt)None 6.051e+00 1.463e+01 0.414 0.679255
## factor(BasementHt)TA -2.845e+01 6.264e+00 -4.542 6.85e-06 ***
## GarageSF 3.092e-02 7.820e-03 3.954 8.67e-05 ***
## factor(HeatingQC)Fa -1.310e+01 6.907e+00 -1.896 0.058476 .
## factor(HeatingQC)Gd -6.606e+00 3.308e+00 -1.997 0.046300 *
## factor(HeatingQC)Po -2.861e+00 2.748e+01 -0.104 0.917125
## factor(HeatingQC)TA -4.945e+00 3.141e+00 -1.574 0.115971
## Fireplaces 7.307e+00 2.013e+00 3.630 0.000310 ***
## BasementSF 2.095e-02 6.319e-03 3.315 0.000978 ***
## Condition 7.146e+00 1.199e+00 5.963 4.41e-09 ***
## YearBuilt 3.650e-01 8.644e-02 4.223 2.82e-05 ***
## LotFrontage 8.541e-02 3.340e-02 2.557 0.010816 *
## factor(Foundation)CBlock -5.861e+00 4.673e+00 -1.254 0.210261
## factor(Foundation)PConc 4.706e+00 5.763e+00 0.817 0.414452
## factor(Foundation)Slab -4.428e+01 1.568e+01 -2.823 0.004929 **
## factor(Foundation)Stone -2.640e+00 1.665e+01 -0.159 0.874083
## factor(Foundation)Wood -2.441e+01 1.975e+01 -1.236 0.217136
## FullBath -9.179e+00 2.956e+00 -3.105 0.002000 **
## factor(GarageType)Attchd 1.163e+01 1.337e+01 0.870 0.384655
## factor(GarageType)Basment 8.515e+00 1.688e+01 0.504 0.614116
## factor(GarageType)BuiltIn 1.358e+01 1.409e+01 0.964 0.335537
## factor(GarageType)CarPort -2.986e+01 2.244e+01 -1.330 0.183925
## factor(GarageType)Detchd 8.297e+00 1.345e+01 0.617 0.537523
## factor(GarageType)None 2.701e+01 1.498e+01 1.803 0.071926 .
## factor(Heating)GasW -6.868e+00 1.643e+01 -0.418 0.676161
## factor(Heating)Grav 1.990e+01 2.015e+01 0.987 0.324010
## factor(Heating)OthW -1.826e+01 2.919e+01 -0.625 0.531996
## factor(Heating)Wall 5.886e+01 2.123e+01 2.772 0.005751 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.07 on 555 degrees of freedom
## Multiple R-squared: 0.9058, Adjusted R-squared: 0.8984
## F-statistic: 121.3 on 44 and 555 DF, p-value: < 2.2e-16
mod5 <- lm(Price ~ LotFrontage + LotArea + factor(LotConfig) + factor(HouseStyle) + Quality + Condition + YearBuilt + YearRemodel + factor(ExteriorQ) + factor(ExteriorC) + factor(Foundation) + factor(BasementHt) + factor(BasementC) + factor(BasementFin) + BasementFinSF + BasementUnFinSF + BasementSF + factor(Heating) + factor(HeatingQC) + factor(CentralAir) + FirstSF + SecondSF + GroundSF + BasementFBath + BasementHBath + FullBath + HalfBath + Bedroom + factor(KitchenQ) + TotalRooms + Fireplaces + factor(GarageType) + GarageCars + GarageSF + factor(GarageQ) + factor(GarageC) + WoodDeckSF + OpenPorchSF + EnclosedPorchSF + ScreenPorchSF, data = Ames.Train)
all.predictors1 <- regsubsets(Price ~ LotFrontage + LotArea + Quality + Condition +
YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath +
Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) +
factor(BasementHt) + factor(Heating) + factor(GarageType),
data = Ames.Train, nbest = 1, nvmax = 50)
ShowSubsets(all.predictors1)
all.predictors2 <- regsubsets(Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) +
factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF +
factor(HeatingQC) + Fireplaces + BasementSF + Condition +
YearBuilt + LotFrontage + factor(Foundation) + FullBath +
factor(GarageType) + factor(Heating), data = Ames.Train, nbest=1, nvmax = 50)
ShowSubsets(all.predictors2)
ols_mallows_cp(mod3,mod2)
## [1] -3.842899
ols_mallows_cp(mod4,mod2)
## [1] -11.41771
#transformations
mod6 <- lm(log(Price) ~ LotFrontage + LotArea + Quality + Condition +
YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath +
Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) +
factor(BasementHt) + factor(Heating) + factor(GarageType),
data = Ames.Train)
plot(mod6)
## Warning: not plotting observations with leverage one:
## 427
BC = boxCox(mod1)
lambda = BC$x[which.max(BC$y)]
lambda
## [1] 0.3030303
mod7 <- lm(Price^lambda~LotFrontage + LotArea + Quality + Condition +
YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath +
Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) +
factor(BasementHt) + factor(Heating) + factor(GarageType),
data = Ames.Train)
plot(mod7)
## Warning: not plotting observations with leverage one:
## 427
mod8 <- lm(log(Price) ~ sqrt(Quality) + sqrt(GroundSF) + factor(ExteriorQ) + sqrt(BasementFinSF) + sqrt(LotArea) + factor(BasementHt) + sqrt(YearBuilt) + sqrt(Condition) + sqrt(BasementSF) + sqrt(GarageSF) + sqrt(Fireplaces) + factor(Foundation) + sqrt(LotFrontage) + sqrt(FullBath) + factor(Heating) + sqrt(EnclosedPorchSF) + factor(GarageType), data = Ames.Train)
plot(mod8)
## Warning: not plotting observations with leverage one:
## 427
mod9 <- lm(sqrt(Price) ~ sqrt(Quality) + sqrt(GroundSF) + factor(ExteriorQ) + sqrt(BasementFinSF) + sqrt(LotArea) + factor(BasementHt) + sqrt(YearBuilt) + sqrt(Condition) + sqrt(BasementSF) + sqrt(GarageSF) + sqrt(Fireplaces) + factor(Foundation) + sqrt(LotFrontage) + sqrt(FullBath) + factor(Heating) + sqrt(EnclosedPorchSF) + factor(GarageType), data = Ames.Train)
plot(mod9)
## Warning: not plotting observations with leverage one:
## 427
Part 8
fitTrain.2 <- predict(mod8,newdata=Ames.Test)
holdoutresid.2 = Ames.Test$Price - fitTrain.2
mean(holdoutresid.2)
## [1] 177.1431
sd(holdoutresid.2)
## [1] 77.05929
sd(mod8$residuals)
## [1] 0.1311035
hist(holdoutresid.2)
crosscorr2 <- cor(Ames.Test$Price, fitTrain.2)
crosscorr^2
## [1] 0.8808207
shrinkage <- summary(mod8)$r.squared-crosscorr2^2
shrinkage
## [1] 0.0615858
Part 9
NewHome <-data.frame(YearBuilt=1995,YearRemodel=2003, Quality=7, Condition=5, ExteriorQ="Gd", ExteriorC="Gd", LotFrontage=90,LotArea=11060, LotConfig="Corner", HouseStyle="2Story", Foundation="PConc", BasementHt="Ex",BasementSF=1150, BasementFinSF=0, BasementFin="Unf", Heating="GasA", HeatingQC="Ex", CentralAir="Y", FirstSF=1164, SecondSF=1150, GroundSF=2314, BasementFBath=0, BasementHBath=0, FullBath=2, HalfBath=1, Bedroom=3, TotalRooms=9, Fireplaces=1, GarageType="Attchd",ScreenPorchSF=0,GarageCars=2, GarageSF=502, GarageQ="TA", GarageC="TA",OpenPorchSF=274,EnclosedPorchSF=0)
exp(predict.lm(mod8,NewHome, interval="prediction", level=.95))
## fit lwr upr
## 1 259.6453 197.9445 340.5787