library(readr)
library(car)
source("VIF.R")
source("ShowSubsets.R")
source("HistNormal.R")
library(corrplot) 
library(leaps)
library(tidyverse)
library(olsrr)
Ames.Train <- read_csv("AmesTrain16.csv")
Ames.Test <- read_csv("AmesTest16.csv")
#PART 1:
head(Ames.Train)
Ames.Train.nv <- select(Ames.Train, c(2,3,4,7,8,9,10,17,18,19,23,24,25,26,27,28,29,30,32,33,35,36,39,40,41,42))
head(Ames.Train.nv)
Ames.Train.nv.corr <- cor(Ames.Train.nv)
corrplot(Ames.Train.nv.corr, type="upper")

mod1 <- lm(Price~., data = Ames.Train.nv)
summary(mod1)
## 
## Call:
## lm(formula = Price ~ ., data = Ames.Train.nv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -93.788 -16.602  -0.395  13.693 185.004 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.282e+03  1.844e+02  -6.954  9.7e-12 ***
## LotFrontage      1.267e-01  3.874e-02   3.271  0.00114 ** 
## LotArea          8.973e-04  1.683e-04   5.332  1.4e-07 ***
## Quality          1.721e+01  1.730e+00   9.947  < 2e-16 ***
## Condition        2.775e+00  1.388e+00   2.000  0.04602 *  
## YearBuilt        3.458e-01  8.204e-02   4.215  2.9e-05 ***
## YearRemodel      2.532e-01  9.103e-02   2.782  0.00558 ** 
## BasementFinSF    2.473e-02  8.212e-03   3.012  0.00271 ** 
## BasementUnFinSF -1.224e-03  8.326e-03  -0.147  0.88317    
## BasementSF       2.792e-02  9.938e-03   2.810  0.00512 ** 
## FirstSF          9.300e-03  3.413e-02   0.272  0.78535    
## SecondSF        -2.105e-03  3.362e-02  -0.063  0.95011    
## GroundSF         4.762e-02  3.410e-02   1.397  0.16306    
## BasementFBath    1.170e+00  3.467e+00   0.338  0.73583    
## BasementHBath   -2.310e+00  5.207e+00  -0.444  0.65746    
## FullBath        -4.311e+00  3.736e+00  -1.154  0.24891    
## HalfBath         4.539e+00  3.764e+00   1.206  0.22832    
## Bedroom         -5.697e+00  2.369e+00  -2.405  0.01651 *  
## TotalRooms       3.562e+00  1.619e+00   2.200  0.02820 *  
## Fireplaces       4.411e+00  2.327e+00   1.896  0.05845 .  
## GarageCars      -5.027e-01  3.830e+00  -0.131  0.89561    
## GarageSF         3.131e-02  1.326e-02   2.362  0.01852 *  
## WoodDeckSF       1.208e-02  1.020e-02   1.185  0.23665    
## OpenPorchSF      8.583e-03  2.025e-02   0.424  0.67178    
## EnclosedPorchSF  2.758e-02  1.883e-02   1.465  0.14347    
## ScreenPorchSF    4.190e-02  2.442e-02   1.716  0.08669 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.45 on 574 degrees of freedom
## Multiple R-squared:  0.8656, Adjusted R-squared:  0.8598 
## F-statistic: 147.9 on 25 and 574 DF,  p-value: < 2.2e-16
plot(mod1)

vif(mod1)
##     LotFrontage         LotArea         Quality       Condition       YearBuilt 
##        1.124055        1.211997        3.738469        1.500289        4.269855 
##     YearRemodel   BasementFinSF BasementUnFinSF      BasementSF         FirstSF 
##        2.372390        8.867104        8.955856       10.640480       97.844513 
##        SecondSF        GroundSF   BasementFBath   BasementHBath        FullBath 
##      146.265317      181.674196        2.254359        1.194088        2.851072 
##        HalfBath         Bedroom      TotalRooms      Fireplaces      GarageCars 
##        2.397305        2.580379        4.068803        1.590703        5.264011 
##        GarageSF      WoodDeckSF     OpenPorchSF EnclosedPorchSF   ScreenPorchSF 
##        5.181973        1.166073        1.246137        1.272542        1.102638
all.predictors <- regsubsets(Price~., data = Ames.Train.nv, nbest = 2, nvmax = 25)
ShowSubsets(all.predictors)

Since the model with the #1 best model with 15 predictor variables has the highest adjusted R-squared and the lowest Cp, that is the best model.

mod2 <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)
summary(mod2)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF, data = Ames.Train.nv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -96.845 -16.864  -0.772  14.072 185.146 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.309e+03  1.706e+02  -7.673 7.10e-14 ***
## LotFrontage    1.300e-01  3.790e-02   3.430 0.000646 ***
## LotArea        9.186e-04  1.656e-04   5.546 4.42e-08 ***
## Quality        1.706e+01  1.674e+00  10.187  < 2e-16 ***
## Condition      2.630e+00  1.363e+00   1.930 0.054091 .  
## YearBuilt      3.512e-01  7.247e-02   4.846 1.62e-06 ***
## YearRemodel    2.642e-01  8.957e-02   2.950 0.003304 ** 
## BasementFinSF  2.640e-02  3.393e-03   7.782 3.26e-14 ***
## BasementSF     3.197e-02  4.298e-03   7.438 3.68e-13 ***
## GroundSF       5.233e-02  5.677e-03   9.219  < 2e-16 ***
## FullBath      -6.531e+00  3.367e+00  -1.940 0.052869 .  
## Bedroom       -6.459e+00  2.317e+00  -2.787 0.005492 ** 
## TotalRooms     3.732e+00  1.594e+00   2.341 0.019565 *  
## Fireplaces     4.941e+00  2.270e+00   2.177 0.029918 *  
## GarageSF       3.171e-02  7.905e-03   4.011 6.82e-05 ***
## ScreenPorchSF  3.899e-02  2.375e-02   1.641 0.101239    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.36 on 584 degrees of freedom
## Multiple R-squared:  0.8641, Adjusted R-squared:  0.8606 
## F-statistic: 247.5 on 15 and 584 DF,  p-value: < 2.2e-16
plot(mod2)

Full <- lm(Price~., data = Ames.Train.nv)
MSE = (summary(Full)$sigma)^2
step(Full, scale=MSE, trace = "false")
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF, data = Ames.Train.nv)
## 
## Coefficients:
##   (Intercept)    LotFrontage        LotArea        Quality      Condition  
##    -1.309e+03      1.300e-01      9.186e-04      1.706e+01      2.630e+00  
##     YearBuilt    YearRemodel  BasementFinSF     BasementSF       GroundSF  
##     3.512e-01      2.642e-01      2.640e-02      3.197e-02      5.233e-02  
##      FullBath        Bedroom     TotalRooms     Fireplaces       GarageSF  
##    -6.531e+00     -6.459e+00      3.732e+00      4.941e+00      3.171e-02  
## ScreenPorchSF  
##     3.899e-02

The model produced is the same as the model we have for mod2

None <- lm(Price~1, data = Ames.Train.nv)
step(None, scope=list(upper=Full), scale=MSE, direction="forward",trace=FALSE)
## 
## Call:
## lm(formula = Price ~ Quality + FirstSF + SecondSF + BasementFinSF + 
##     LotArea + YearBuilt + GarageSF + BasementSF + YearRemodel + 
##     LotFrontage + Fireplaces + HalfBath + Condition + Bedroom + 
##     TotalRooms + ScreenPorchSF + EnclosedPorchSF, data = Ames.Train.nv)
## 
## Coefficients:
##     (Intercept)          Quality          FirstSF         SecondSF  
##      -1.226e+03        1.708e+01        5.491e-02        4.224e-02  
##   BasementFinSF          LotArea        YearBuilt         GarageSF  
##       2.706e-02        8.876e-04        3.100e-01        3.059e-02  
##      BasementSF      YearRemodel      LotFrontage       Fireplaces  
##       2.747e-02        2.589e-01        1.255e-01        4.608e+00  
##        HalfBath        Condition          Bedroom       TotalRooms  
##       6.323e+00        2.917e+00       -5.895e+00        3.575e+00  
##   ScreenPorchSF  EnclosedPorchSF  
##       4.023e-02        2.849e-02

Here we get a different model than what we got for the 2 previous methods

mod3 <- lm(Price ~ Quality + FirstSF + SecondSF + BasementFinSF + 
    LotArea + YearBuilt + GarageSF + BasementSF + YearRemodel + 
    LotFrontage + Fireplaces + HalfBath + Condition + Bedroom + 
    TotalRooms + ScreenPorchSF + EnclosedPorchSF, data = Ames.Train.nv)
summary(mod3)
## 
## Call:
## lm(formula = Price ~ Quality + FirstSF + SecondSF + BasementFinSF + 
##     LotArea + YearBuilt + GarageSF + BasementSF + YearRemodel + 
##     LotFrontage + Fireplaces + HalfBath + Condition + Bedroom + 
##     TotalRooms + ScreenPorchSF + EnclosedPorchSF, data = Ames.Train.nv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -92.646 -16.832  -0.493  14.794 180.793 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.226e+03  1.664e+02  -7.367 6.00e-13 ***
## Quality          1.708e+01  1.699e+00  10.053  < 2e-16 ***
## FirstSF          5.491e-02  8.035e-03   6.833 2.10e-11 ***
## SecondSF         4.224e-02  6.072e-03   6.957 9.37e-12 ***
## BasementFinSF    2.706e-02  3.391e-03   7.980 7.81e-15 ***
## LotArea          8.876e-04  1.665e-04   5.332 1.39e-07 ***
## YearBuilt        3.100e-01  7.599e-02   4.080 5.14e-05 ***
## GarageSF         3.059e-02  8.023e-03   3.812 0.000152 ***
## BasementSF       2.747e-02  6.020e-03   4.563 6.16e-06 ***
## YearRemodel      2.589e-01  8.948e-02   2.893 0.003957 ** 
## LotFrontage      1.255e-01  3.840e-02   3.269 0.001145 ** 
## Fireplaces       4.608e+00  2.309e+00   1.996 0.046410 *  
## HalfBath         6.323e+00  3.453e+00   1.831 0.067584 .  
## Condition        2.917e+00  1.361e+00   2.143 0.032512 *  
## Bedroom         -5.895e+00  2.338e+00  -2.521 0.011955 *  
## TotalRooms       3.575e+00  1.599e+00   2.236 0.025716 *  
## ScreenPorchSF    4.023e-02  2.396e-02   1.679 0.093635 .  
## EnclosedPorchSF  2.849e-02  1.870e-02   1.524 0.128086    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.39 on 582 degrees of freedom
## Multiple R-squared:  0.8643, Adjusted R-squared:  0.8603 
## F-statistic:   218 on 17 and 582 DF,  p-value: < 2.2e-16
plot(mod3)

vif(mod2)
##   LotFrontage       LotArea       Quality     Condition     YearBuilt 
##      1.082105      1.181011      3.521544      1.455091      3.351422 
##   YearRemodel BasementFinSF    BasementSF      GroundSF      FullBath 
##      2.310357      1.522870      2.002441      5.066200      2.329781 
##       Bedroom    TotalRooms    Fireplaces      GarageSF ScreenPorchSF 
##      2.483329      3.966868      1.523651      1.854051      1.049800
vif(mod3)
##         Quality         FirstSF        SecondSF   BasementFinSF         LotArea 
##        3.618069        5.444095        4.788303        1.518124        1.190687 
##       YearBuilt        GarageSF      BasementSF     YearRemodel     LotFrontage 
##        3.677444        1.905684        3.920093        2.301364        1.108694 
##      Fireplaces        HalfBath       Condition         Bedroom      TotalRooms 
##        1.572568        2.025182        1.448649        2.522101        3.981090 
##   ScreenPorchSF EnclosedPorchSF 
##        1.065707        1.259741
ols_mallows_cp(mod2, mod1)
## [1] 12.49027
ols_mallows_cp(mod3, mod1)
## [1] 15.71616
#PART 2:
plot(mod2)

head(sort(abs(mod2$residuals), decreasing = TRUE), n=10)
##       343       222       109       319        78       380       292       382 
## 185.14553 159.15927 130.33355 112.68010 111.43223  96.84544  94.61528  87.73272 
##       588       351 
##  87.51145  87.35665
head(mod2$residuals[c(343,222,109,319,78,380,292,382,588,351)])
##       343       222       109       319        78       380 
## 185.14553 159.15927 130.33355 112.68010 111.43223 -96.84544
rstandard(mod2)[c(343,222,109,319,78,380,292,382,588,351)]
##       343       222       109       319        78       380       292       382 
##  6.560243  5.814848  4.476083  3.881726  3.848407 -3.381801 -3.292753  3.032637 
##       588       351 
## -3.023010  3.041522
rstudent(mod2)[c(343,222,109,319,78,380,292,382,588,351)]
##       343       222       109       319        78       380       292       382 
##  6.810364  5.985732  4.550996  3.929424  3.894814 -3.412483 -3.320904  3.054184 
##       588       351 
## -3.044334  3.063275
2*(16/600)
## [1] 0.05333333
3*(16/600)
## [1] 0.08
hatvalues(mod2)[c(343,222,109,319,78,380,292,382,588,351)]
##        343        222        109        319         78        380        292 
## 0.07603531 0.13092764 0.01647594 0.02250739 0.02741283 0.04867035 0.04220408 
##        382        588        351 
## 0.02915258 0.02788100 0.04307275
head(sort(cooks.distance(mod2)[c(343,222,109,319,78,380,292,382,588,351)]), decreasing = TRUE, n = 10)
##        588        382        109        319        351         78        292 
## 0.01638129 0.01726025 0.02097690 0.02168410 0.02602469 0.02608959 0.02985931 
##        380        343        222 
## 0.03656870 0.22135015 0.31836999
plot(mod2, 5)

plot(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)

plot(mod2)

mod4 <- lm(Price~log(LotFrontage+1)+log(LotArea+1)+log(Quality+1)+log(Condition+1)+log(YearBuilt)+log(YearRemodel)+log(BasementFinSF+1)+log(BasementSF+1)+log(GroundSF+1)+log(FullBath+1)+log(Bedroom+1)+log(TotalRooms+1)+log(Fireplaces+1)+log(GarageSF+1)+log(ScreenPorchSF+1), data = Ames.Train.nv)

mod5 <- lm(log(Price)~log(LotFrontage+1)+log(LotArea+1)+log(Quality+1)+log(Condition+1)+log(YearBuilt)+log(YearRemodel)+log(BasementFinSF+1)+log(BasementSF+1)+log(GroundSF+1)+log(FullBath+1)+log(Bedroom+1)+log(TotalRooms+1)+log(Fireplaces+1)+log(GarageSF+1)+log(ScreenPorchSF+1), data = Ames.Train.nv)

mod6 <- lm(log(Price)~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)

plot(mod4)

plot(mod5)

plot(mod6)

summary(mod4)
## 
## Call:
## lm(formula = Price ~ log(LotFrontage + 1) + log(LotArea + 1) + 
##     log(Quality + 1) + log(Condition + 1) + log(YearBuilt) + 
##     log(YearRemodel) + log(BasementFinSF + 1) + log(BasementSF + 
##     1) + log(GroundSF + 1) + log(FullBath + 1) + log(Bedroom + 
##     1) + log(TotalRooms + 1) + log(Fireplaces + 1) + log(GarageSF + 
##     1) + log(ScreenPorchSF + 1), data = Ames.Train.nv)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -102.407  -21.287   -4.512   16.746  251.717 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -1.296e+04  1.631e+03  -7.945 1.00e-14 ***
## log(LotFrontage + 1)    2.869e+00  9.385e-01   3.057  0.00234 ** 
## log(LotArea + 1)        2.921e+01  3.309e+00   8.828  < 2e-16 ***
## log(Quality + 1)        1.395e+02  1.421e+01   9.817  < 2e-16 ***
## log(Condition + 1)      5.337e+00  1.142e+01   0.467  0.64042    
## log(YearBuilt)          8.726e+02  1.764e+02   4.947 9.87e-07 ***
## log(YearRemodel)        7.122e+02  2.163e+02   3.293  0.00105 ** 
## log(BasementFinSF + 1)  2.978e+00  5.779e-01   5.152 3.52e-07 ***
## log(BasementSF + 1)     2.471e+00  1.465e+00   1.687  0.09214 .  
## log(GroundSF + 1)       6.898e+01  1.076e+01   6.411 2.99e-10 ***
## log(FullBath + 1)      -3.446e+00  1.073e+01  -0.321  0.74818    
## log(Bedroom + 1)       -4.580e+01  9.632e+00  -4.755 2.51e-06 ***
## log(TotalRooms + 1)     4.519e+01  1.495e+01   3.022  0.00262 ** 
## log(Fireplaces + 1)     1.506e+01  4.647e+00   3.242  0.00126 ** 
## log(GarageSF + 1)      -6.569e-01  1.414e+00  -0.465  0.64231    
## log(ScreenPorchSF + 1)  7.647e-01  1.039e+00   0.736  0.46187    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36.5 on 584 degrees of freedom
## Multiple R-squared:  0.7899, Adjusted R-squared:  0.7845 
## F-statistic: 146.4 on 15 and 584 DF,  p-value: < 2.2e-16
summary(mod5)
## 
## Call:
## lm(formula = log(Price) ~ log(LotFrontage + 1) + log(LotArea + 
##     1) + log(Quality + 1) + log(Condition + 1) + log(YearBuilt) + 
##     log(YearRemodel) + log(BasementFinSF + 1) + log(BasementSF + 
##     1) + log(GroundSF + 1) + log(FullBath + 1) + log(Bedroom + 
##     1) + log(TotalRooms + 1) + log(Fireplaces + 1) + log(GarageSF + 
##     1) + log(ScreenPorchSF + 1), data = Ames.Train.nv)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.27266 -0.07681 -0.00287  0.08203  0.45149 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -75.525367   6.699460 -11.273  < 2e-16 ***
## log(LotFrontage + 1)     0.006982   0.003854   1.811 0.070580 .  
## log(LotArea + 1)         0.137063   0.013588  10.087  < 2e-16 ***
## log(Quality + 1)         0.700683   0.058357  12.007  < 2e-16 ***
## log(Condition + 1)       0.309571   0.046895   6.601 9.16e-11 ***
## log(YearBuilt)           7.488680   0.724335  10.339  < 2e-16 ***
## log(YearRemodel)         2.247819   0.888113   2.531 0.011635 *  
## log(BasementFinSF + 1)   0.014182   0.002373   5.976 3.98e-09 ***
## log(BasementSF + 1)      0.020765   0.006015   3.452 0.000597 ***
## log(GroundSF + 1)        0.460841   0.044182  10.430  < 2e-16 ***
## log(FullBath + 1)       -0.028716   0.044052  -0.652 0.514743    
## log(Bedroom + 1)        -0.142349   0.039554  -3.599 0.000347 ***
## log(TotalRooms + 1)      0.086737   0.061411   1.412 0.158368    
## log(Fireplaces + 1)      0.052469   0.019081   2.750 0.006148 ** 
## log(GarageSF + 1)        0.003200   0.005805   0.551 0.581645    
## log(ScreenPorchSF + 1)   0.007193   0.004265   1.686 0.092263 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1499 on 584 degrees of freedom
## Multiple R-squared:  0.8718, Adjusted R-squared:  0.8685 
## F-statistic: 264.7 on 15 and 584 DF,  p-value: < 2.2e-16
summary(mod6)
## 
## Call:
## lm(formula = log(Price) ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF, data = Ames.Train.nv)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.57157 -0.07269  0.00404  0.07659  0.42543 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -5.824e+00  8.537e-01  -6.823 2.24e-11 ***
## LotFrontage    3.747e-04  1.897e-04   1.975  0.04874 *  
## LotArea        3.950e-06  8.290e-07   4.765 2.38e-06 ***
## Quality        8.533e-02  8.380e-03  10.182  < 2e-16 ***
## Condition      5.491e-02  6.822e-03   8.050 4.67e-15 ***
## YearBuilt      3.903e-03  3.627e-04  10.761  < 2e-16 ***
## YearRemodel    8.417e-04  4.483e-04   1.878  0.06094 .  
## BasementFinSF  9.540e-05  1.698e-05   5.618 3.00e-08 ***
## BasementSF     1.511e-04  2.151e-05   7.021 6.12e-12 ***
## GroundSF       3.052e-04  2.841e-05  10.742  < 2e-16 ***
## FullBath      -2.526e-02  1.685e-02  -1.499  0.13442    
## Bedroom       -8.224e-03  1.160e-02  -0.709  0.47862    
## TotalRooms     3.921e-03  7.979e-03   0.491  0.62333    
## Fireplaces     2.992e-02  1.136e-02   2.633  0.00868 ** 
## GarageSF       9.212e-05  3.957e-05   2.328  0.02025 *  
## ScreenPorchSF  2.854e-04  1.189e-04   2.400  0.01669 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.147 on 584 degrees of freedom
## Multiple R-squared:  0.8768, Adjusted R-squared:  0.8736 
## F-statistic:   277 on 15 and 584 DF,  p-value: < 2.2e-16
summary(mod2)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF, data = Ames.Train.nv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -96.845 -16.864  -0.772  14.072 185.146 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.309e+03  1.706e+02  -7.673 7.10e-14 ***
## LotFrontage    1.300e-01  3.790e-02   3.430 0.000646 ***
## LotArea        9.186e-04  1.656e-04   5.546 4.42e-08 ***
## Quality        1.706e+01  1.674e+00  10.187  < 2e-16 ***
## Condition      2.630e+00  1.363e+00   1.930 0.054091 .  
## YearBuilt      3.512e-01  7.247e-02   4.846 1.62e-06 ***
## YearRemodel    2.642e-01  8.957e-02   2.950 0.003304 ** 
## BasementFinSF  2.640e-02  3.393e-03   7.782 3.26e-14 ***
## BasementSF     3.197e-02  4.298e-03   7.438 3.68e-13 ***
## GroundSF       5.233e-02  5.677e-03   9.219  < 2e-16 ***
## FullBath      -6.531e+00  3.367e+00  -1.940 0.052869 .  
## Bedroom       -6.459e+00  2.317e+00  -2.787 0.005492 ** 
## TotalRooms     3.732e+00  1.594e+00   2.341 0.019565 *  
## Fireplaces     4.941e+00  2.270e+00   2.177 0.029918 *  
## GarageSF       3.171e-02  7.905e-03   4.011 6.82e-05 ***
## ScreenPorchSF  3.899e-02  2.375e-02   1.641 0.101239    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.36 on 584 degrees of freedom
## Multiple R-squared:  0.8641, Adjusted R-squared:  0.8606 
## F-statistic: 247.5 on 15 and 584 DF,  p-value: < 2.2e-16
head(sort(abs(mod6$residuals), decreasing = TRUE), n=10)
##       299       458       380       588       132       109       240        44 
## 1.5715711 0.4926986 0.4677200 0.4467211 0.4298559 0.4254321 0.4217658 0.4165398 
##       317       179 
## 0.3934745 0.3860867
head(mod6$residuals[c(299,458,380,588,132,109,240,44,317,179)])
##        299        458        380        588        132        109 
## -1.5715711 -0.4926986 -0.4677200 -0.4467211 -0.4298559  0.4254321
rstandard(mod6)[c(299,458,380,588,132,109,240,44,317,179)]
##        299        458        380        588        132        109        240 
## -11.239166  -3.383835  -3.263067  -3.083062  -2.943474   2.919062   2.890237 
##         44        317        179 
##   2.871125  -2.703059  -4.419756
rstudent(mod6)[c(299,458,380,588,132,109,240,44,317,179)]
##        299        458        380        588        132        109        240 
## -12.684894  -3.414576  -3.290405  -3.105800  -2.963015   2.938074   2.908639 
##         44        317        179 
##   2.889129  -2.717799  -4.491731
2*(16/600)
## [1] 0.05333333
3*(16/600)
## [1] 0.08
hatvalues(mod6)[c(299,458,380,588,132,109,240,44,317,179)]
##        299        458        380        588        132        109        240 
## 0.09466155 0.01835268 0.04867035 0.02788100 0.01250162 0.01647594 0.01397735 
##         44        317        179 
## 0.02541472 0.01885488 0.64666695
head(sort(cooks.distance(mod6)[c(299,458,380,588,132,109,240,44,317,179)]), decreasing = TRUE, n = 10)
##         132         240         317         109         458          44 
## 0.006855362 0.007400903 0.008775699 0.008921374 0.013379574 0.013435370 
##         588         380         299         179 
## 0.017038583 0.034045940 0.825488145 2.234462090
plot(mod6, 5)

mod6CP <- regsubsets(log(Price)~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv, nbest=1, nvmax=15)
ShowSubsets(mod6CP)
#Part 5
House<-data.frame(YearBuilt=1995,YearRemodel=2003, Quality=7, Condition=5, ExteriorQ="Gd", ExteriorC="Gd", LotFrontage=90,LotArea=11060, LotConfig="Corner", HouseStyle="2Story", Foundation="PConc", BasementHt="Ex",BasementSF=1150, BasementFinSF=0, BasementFin="Unf", Heating="GasA", HeatingQC="Ex",  CentralAir="Y", FirstSF=1164,   SecondSF=1150,  GroundSF=2314, BasementFBath=0, BasementHBath=0,    FullBath=2, HalfBath=1, Bedroom=3,  TotalRooms=9,   Fireplaces=1,   GarageType="Attchd",ScreenPorchSF=0,GarageCars=2,   GarageSF=502,   GarageQ="TA",   GarageC="TA",OpenPorchSF=274)

exp(predict.lm(mod6,House, interval="prediction", level=.95))
##        fit     lwr      upr
## 1 248.2351 185.452 332.2728
predict.lm(mod2,House, interval="prediction", level=.95)
##        fit      lwr      upr
## 1 255.5706 197.3161 313.8251

Part 6

head(Ames.Train)
Ames.Train.nv <- select(Ames.Train, c(2,3,4,7,8,9,10,17,18,19,23,24,25,26,27,28,29,30,32,33,35,36,39,40,41,42))
head(Ames.Train.nv)
Ames.Test.nv <- select(Ames.Test, c(2,3,4,7,8,9,10,17,18,19,23,24,25,26,27,28,29,30,32,33,35,36,39,40,41,42))
Ames.Test.nv
#model from part 1 and 2 of HW 3
mod1.Train <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Train.nv)
summary(mod1.Train)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF, data = Ames.Train.nv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -96.845 -16.864  -0.772  14.072 185.146 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.309e+03  1.706e+02  -7.673 7.10e-14 ***
## LotFrontage    1.300e-01  3.790e-02   3.430 0.000646 ***
## LotArea        9.186e-04  1.656e-04   5.546 4.42e-08 ***
## Quality        1.706e+01  1.674e+00  10.187  < 2e-16 ***
## Condition      2.630e+00  1.363e+00   1.930 0.054091 .  
## YearBuilt      3.512e-01  7.247e-02   4.846 1.62e-06 ***
## YearRemodel    2.642e-01  8.957e-02   2.950 0.003304 ** 
## BasementFinSF  2.640e-02  3.393e-03   7.782 3.26e-14 ***
## BasementSF     3.197e-02  4.298e-03   7.438 3.68e-13 ***
## GroundSF       5.233e-02  5.677e-03   9.219  < 2e-16 ***
## FullBath      -6.531e+00  3.367e+00  -1.940 0.052869 .  
## Bedroom       -6.459e+00  2.317e+00  -2.787 0.005492 ** 
## TotalRooms     3.732e+00  1.594e+00   2.341 0.019565 *  
## Fireplaces     4.941e+00  2.270e+00   2.177 0.029918 *  
## GarageSF       3.171e-02  7.905e-03   4.011 6.82e-05 ***
## ScreenPorchSF  3.899e-02  2.375e-02   1.641 0.101239    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.36 on 584 degrees of freedom
## Multiple R-squared:  0.8641, Adjusted R-squared:  0.8606 
## F-statistic: 247.5 on 15 and 584 DF,  p-value: < 2.2e-16
plot(mod1.Train)

mod1.Test <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF, data = Ames.Test.nv)
summary(mod1.Test)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF, data = Ames.Test.nv)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.477 -14.029  -3.168  11.916 142.003 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.091e+03  2.720e+02  -4.010 8.81e-05 ***
## LotFrontage    1.655e-01  6.219e-02   2.661 0.008488 ** 
## LotArea        2.604e-03  6.193e-04   4.205 4.08e-05 ***
## Quality        1.711e+01  2.390e+00   7.161 1.85e-11 ***
## Condition      3.659e+00  2.283e+00   1.603 0.110712    
## YearBuilt      4.191e-01  1.234e-01   3.397 0.000835 ***
## YearRemodel    8.560e-02  1.428e-01   0.599 0.549685    
## BasementFinSF  1.872e-02  4.945e-03   3.785 0.000208 ***
## BasementSF     2.162e-02  6.128e-03   3.528 0.000529 ***
## GroundSF       6.995e-02  9.063e-03   7.718 7.26e-13 ***
## FullBath      -5.835e+00  5.330e+00  -1.095 0.275088    
## Bedroom       -9.484e+00  3.655e+00  -2.595 0.010217 *  
## TotalRooms     7.236e-01  2.641e+00   0.274 0.784422    
## Fireplaces     4.253e+00  3.530e+00   1.205 0.229823    
## GarageSF       2.436e-02  1.155e-02   2.110 0.036243 *  
## ScreenPorchSF -1.340e-02  3.858e-02  -0.347 0.728727    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.24 on 184 degrees of freedom
## Multiple R-squared:  0.8938, Adjusted R-squared:  0.8851 
## F-statistic: 103.2 on 15 and 184 DF,  p-value: < 2.2e-16
plot(mod1.Test)

fitTrain <- predict(mod1.Train,newdata=Ames.Test.nv)
holdoutresid=Ames.Test.nv$Price - fitTrain

holdoutresid
##            1            2            3            4            5            6 
##  -9.20411050  -5.02367620  31.74368642  -4.25073104  -8.75487126 -11.55908468 
##            7            8            9           10           11           12 
##  -1.33061174 170.50099741  24.55734461   3.67066072 -19.30159654 -18.37303723 
##           13           14           15           16           17           18 
##  24.16169913 -12.79080707  19.22064033  -8.31881750  49.76167620   6.66377209 
##           19           20           21           22           23           24 
##  -9.30808088  -0.24915939  -4.71009649 -52.61632634  -3.61945163 -47.38050332 
##           25           26           27           28           29           30 
## -21.44815176  47.41985052 -18.72502297  18.15441086 -29.17456901  -5.58388970 
##           31           32           33           34           35           36 
## -11.47238771  36.48664160 -10.60614539 -16.10127061 -30.38194007  16.02236605 
##           37           38           39           40           41           42 
## -29.96106302 -29.93314058  22.30656014  27.51336123  -3.48022226   4.29144034 
##           43           44           45           46           47           48 
##  15.20515248 -23.34212713   2.64343296  14.38217541  17.09284171   5.79978714 
##           49           50           51           52           53           54 
##  42.96780499  -6.52034822   1.04972824  39.06119689  -6.25753891   0.15766421 
##           55           56           57           58           59           60 
## -16.91218082   6.84745152 -28.04008509  -0.39721573   2.86421722 -20.60349054 
##           61           62           63           64           65           66 
## -37.19345445  32.96197500 -12.01427137   8.77887731  29.88810916 -16.52689133 
##           67           68           69           70           71           72 
##   2.48558041   6.52307306  -1.27495214  22.80176622  10.93616123  25.92347216 
##           73           74           75           76           77           78 
##   7.85233183 -40.11445491  31.13567252  -3.25598756  -5.50976421  11.08566128 
##           79           80           81           82           83           84 
## -11.37450075  -2.72956477  28.29120388 -20.57304482   0.70327102  -1.45059779 
##           85           86           87           88           89           90 
## -31.52133189 -28.03975374   2.39118681  31.00692065  22.06956637   7.43537260 
##           91           92           93           94           95           96 
##  10.73391950 -10.33907846  19.28095849  -8.18173972 -21.45794833  30.25202285 
##           97           98           99          100          101          102 
## -22.41706239  -9.85910903  55.31539851  67.22080764 -16.07738746  -9.70051739 
##          103          104          105          106          107          108 
##  27.40923773 -12.89337519   1.28213364  13.80966386  -4.50509617 -15.81370341 
##          109          110          111          112          113          114 
##  -7.71387682   8.19548787   7.97609826   3.31615567 -12.75072451  31.76355246 
##          115          116          117          118          119          120 
##   8.11569019 -15.75953173 -53.74284456 -10.10920249   3.83544916 -16.77717941 
##          121          122          123          124          125          126 
##  -4.56356401  -0.61855755  -2.70649779 -19.44587292   0.28648004  47.20963472 
##          127          128          129          130          131          132 
##  14.26871272  16.85402590 -25.99973266 -47.22192383 -17.07032620  30.33749155 
##          133          134          135          136          137          138 
##  -9.92642180   1.97459756  -4.42990254  42.47947152  74.89413363  10.49494164 
##          139          140          141          142          143          144 
## -18.48355746  11.16700283 -11.74944314  18.13537271 -20.31623769   9.79730224 
##          145          146          147          148          149          150 
##   3.88602016  16.61523124 -29.90857947 -16.43783903  -7.90178489   4.14637009 
##          151          152          153          154          155          156 
##   3.64981553   0.70961471  19.84081455 -15.82685245  53.34737792   8.54396199 
##          157          158          159          160          161          162 
##   0.34356412  66.97363264  -4.02097476   2.85314704 -26.90312110 -33.63186638 
##          163          164          165          166          167          168 
##  52.05849788  14.91328449 -13.61834041 -31.05474091  92.49861827  23.78913537 
##          169          170          171          172          173          174 
## -53.05687272   7.49010200   0.26166653 -23.48703150 -34.13707346  -0.03510799 
##          175          176          177          178          179          180 
##   7.81276200  18.07515907 -15.28302809 -11.22573619  32.88275452  -7.46957299 
##          181          182          183          184          185          186 
##  12.48267019  12.34906499 -14.63485985  27.38254351  -6.30354307  40.44897984 
##          187          188          189          190          191          192 
##  26.83835761  -7.09625497   2.70036965   6.28982841 -18.57032529  35.51913356 
##          193          194          195          196          197          198 
##  12.34906499 -13.10191075 -11.04634779 -53.14722599 -12.86906564  18.27642958 
##          199          200 
## -12.52894892   1.97933787
mean(holdoutresid)
## [1] 2.42646
sd(holdoutresid)
## [1] 26.75778
hist(holdoutresid)

crosscorr=cor(Ames.Test.nv$Price,fitTrain)
crosscorr^2
## [1] 0.8808207
shrinkage = summary(mod1.Test)$r.squared-crosscorr^2
shrinkage
## [1] 0.01293275

Part 7

Ames.Train
mod2 <- lm(Price~LotFrontage+LotArea+Quality+Condition+YearBuilt+YearRemodel+BasementFinSF+BasementSF+GroundSF+FullBath+Bedroom+TotalRooms+Fireplaces+GarageSF+ScreenPorchSF+factor(LotConfig)+factor(HouseStyle)+factor(ExteriorQ)+factor(ExteriorC)+factor(Foundation)+factor(BasementHt)+factor(BasementC)+factor(BasementFin)+factor(Heating)+factor(HeatingQC)+factor(CentralAir)+factor(KitchenQ)+factor(GarageType)+factor(GarageQ)+factor(GarageC), data = Ames.Train)
summary(mod2)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF + factor(LotConfig) + factor(HouseStyle) + 
##     factor(ExteriorQ) + factor(ExteriorC) + factor(Foundation) + 
##     factor(BasementHt) + factor(BasementC) + factor(BasementFin) + 
##     factor(Heating) + factor(HeatingQC) + factor(CentralAir) + 
##     factor(KitchenQ) + factor(GarageType) + factor(GarageQ) + 
##     factor(GarageC), data = Ames.Train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -92.741 -12.846  -0.235  12.665 147.618 
## 
## Coefficients: (4 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -7.858e+02  2.357e+02  -3.333 0.000918 ***
## LotFrontage                9.786e-02  3.621e-02   2.702 0.007109 ** 
## LotArea                    7.568e-04  1.556e-04   4.865 1.51e-06 ***
## Quality                    1.014e+01  1.726e+00   5.874 7.53e-09 ***
## Condition                  5.486e+00  1.508e+00   3.638 0.000302 ***
## YearBuilt                  3.349e-01  9.734e-02   3.441 0.000626 ***
## YearRemodel                6.324e-02  9.180e-02   0.689 0.491242    
## BasementFinSF              2.413e-02  4.086e-03   5.906 6.31e-09 ***
## BasementSF                 1.604e-02  6.715e-03   2.388 0.017298 *  
## GroundSF                   5.938e-02  6.439e-03   9.223  < 2e-16 ***
## FullBath                  -8.318e+00  3.161e+00  -2.632 0.008747 ** 
## Bedroom                   -1.461e+00  2.203e+00  -0.663 0.507443    
## TotalRooms                 1.666e+00  1.466e+00   1.136 0.256333    
## Fireplaces                 6.855e+00  2.084e+00   3.290 0.001069 ** 
## GarageSF                   3.242e-02  8.057e-03   4.024 6.56e-05 ***
## ScreenPorchSF              2.362e-02  2.103e-02   1.123 0.261984    
## factor(LotConfig)CulDSac   9.288e+00  5.576e+00   1.666 0.096369 .  
## factor(LotConfig)FR2      -3.357e+00  6.754e+00  -0.497 0.619389    
## factor(LotConfig)FR3      -8.846e+00  1.501e+01  -0.589 0.555920    
## factor(LotConfig)Inside    3.445e+00  2.948e+00   1.169 0.243076    
## factor(HouseStyle)1.5Unf   8.654e+00  1.230e+01   0.704 0.481834    
## factor(HouseStyle)1Story   3.895e+00  5.029e+00   0.775 0.438960    
## factor(HouseStyle)2.5Fin  -1.521e+01  2.784e+01  -0.547 0.584942    
## factor(HouseStyle)2.5Unf  -1.768e+01  1.286e+01  -1.374 0.169970    
## factor(HouseStyle)2Story  -4.711e+00  4.515e+00  -1.043 0.297321    
## factor(HouseStyle)SFoyer  -3.543e+00  8.147e+00  -0.435 0.663868    
## factor(HouseStyle)SLvl     6.956e-01  7.347e+00   0.095 0.924602    
## factor(ExteriorQ)Fa       -4.588e+01  1.760e+01  -2.607 0.009396 ** 
## factor(ExteriorQ)Gd       -5.448e+01  7.278e+00  -7.486 3.01e-13 ***
## factor(ExteriorQ)TA       -5.922e+01  8.193e+00  -7.229 1.73e-12 ***
## factor(ExteriorC)Fa       -2.826e+01  1.830e+01  -1.545 0.123037    
## factor(ExteriorC)Gd       -1.391e+00  1.566e+01  -0.089 0.929255    
## factor(ExteriorC)Po       -2.000e+01  3.280e+01  -0.610 0.542228    
## factor(ExteriorC)TA       -5.497e+00  1.534e+01  -0.358 0.720168    
## factor(Foundation)CBlock  -5.733e+00  4.944e+00  -1.160 0.246740    
## factor(Foundation)PConc    4.134e+00  6.074e+00   0.681 0.496433    
## factor(Foundation)Slab    -3.819e+01  1.608e+01  -2.374 0.017941 *  
## factor(Foundation)Stone    1.737e+01  2.042e+01   0.851 0.395330    
## factor(Foundation)Wood    -2.629e+01  2.008e+01  -1.309 0.191030    
## factor(BasementHt)Fa      -1.913e+01  9.475e+00  -2.019 0.044002 *  
## factor(BasementHt)Gd      -2.355e+01  5.003e+00  -4.706 3.23e-06 ***
## factor(BasementHt)None     8.791e+00  1.675e+01   0.525 0.599950    
## factor(BasementHt)TA      -2.792e+01  6.596e+00  -4.233 2.73e-05 ***
## factor(BasementC)Gd        1.117e+01  9.047e+00   1.235 0.217356    
## factor(BasementC)None             NA         NA      NA       NA    
## factor(BasementC)TA        1.279e+01  6.969e+00   1.835 0.067066 .  
## factor(BasementFin)BLQ     3.554e+00  5.056e+00   0.703 0.482439    
## factor(BasementFin)GLQ     1.418e+00  3.867e+00   0.367 0.714048    
## factor(BasementFin)LwQ    -2.980e+00  5.503e+00  -0.542 0.588357    
## factor(BasementFin)None           NA         NA      NA       NA    
## factor(BasementFin)Rec    -8.300e-01  4.643e+00  -0.179 0.858196    
## factor(BasementFin)Unf     3.018e+00  4.258e+00   0.709 0.478878    
## factor(Heating)GasW       -9.581e+00  1.747e+01  -0.549 0.583566    
## factor(Heating)Grav        2.939e+01  2.413e+01   1.218 0.223718    
## factor(Heating)OthW       -2.012e+01  3.127e+01  -0.643 0.520220    
## factor(Heating)Wall        5.806e+01  2.265e+01   2.564 0.010628 *  
## factor(HeatingQC)Fa       -9.919e+00  7.262e+00  -1.366 0.172558    
## factor(HeatingQC)Gd       -5.630e+00  3.435e+00  -1.639 0.101790    
## factor(HeatingQC)Po       -6.030e+00  2.931e+01  -0.206 0.837071    
## factor(HeatingQC)TA       -3.433e+00  3.399e+00  -1.010 0.312955    
## factor(CentralAir)Y       -1.426e+00  6.510e+00  -0.219 0.826694    
## factor(KitchenQ)Fa         1.024e+00  9.993e+00   0.102 0.918455    
## factor(KitchenQ)Gd        -4.861e+00  5.543e+00  -0.877 0.380878    
## factor(KitchenQ)TA        -7.978e+00  6.190e+00  -1.289 0.198012    
## factor(GarageType)Attchd   1.807e+01  1.384e+01   1.306 0.192089    
## factor(GarageType)Basment  1.701e+01  1.742e+01   0.976 0.329341    
## factor(GarageType)BuiltIn  1.921e+01  1.456e+01   1.320 0.187430    
## factor(GarageType)CarPort -2.918e+01  2.373e+01  -1.230 0.219294    
## factor(GarageType)Detchd   1.506e+01  1.388e+01   1.085 0.278406    
## factor(GarageType)None     4.100e+01  1.760e+01   2.329 0.020223 *  
## factor(GarageQ)Gd          9.769e+00  1.332e+01   0.734 0.463518    
## factor(GarageQ)None               NA         NA      NA       NA    
## factor(GarageQ)Po         -2.368e+01  3.260e+01  -0.727 0.467846    
## factor(GarageQ)TA         -1.885e+00  7.491e+00  -0.252 0.801462    
## factor(GarageC)Gd          1.745e+00  1.763e+01   0.099 0.921175    
## factor(GarageC)None               NA         NA      NA       NA    
## factor(GarageC)Po          6.586e+00  2.059e+01   0.320 0.749159    
## factor(GarageC)TA          7.065e+00  1.025e+01   0.689 0.490849    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.17 on 526 degrees of freedom
## Multiple R-squared:   0.91,  Adjusted R-squared:  0.8975 
## F-statistic: 72.88 on 73 and 526 DF,  p-value: < 2.2e-16
#Backwards Elimination
MSE = (summary(mod2)$sigma)^2
step(mod2, scale=MSE, trace = "false")
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath + 
##     Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) + 
##     factor(BasementHt) + factor(Heating) + factor(GarageType), 
##     data = Ames.Train)
## 
## Coefficients:
##               (Intercept)                LotFrontage  
##                -8.080e+02                  8.152e-02  
##                   LotArea                    Quality  
##                 8.114e-04                  1.019e+01  
##                 Condition                  YearBuilt  
##                 7.630e+00                  4.163e-01  
##             BasementFinSF                 BasementSF  
##                 2.161e-02                  2.656e-02  
##                  GroundSF                   FullBath  
##                 5.705e-02                 -9.810e+00  
##                Fireplaces                   GarageSF  
##                 7.302e+00                  3.169e-02  
##       factor(ExteriorQ)Fa        factor(ExteriorQ)Gd  
##                -6.564e+01                 -5.878e+01  
##       factor(ExteriorQ)TA   factor(Foundation)CBlock  
##                -6.771e+01                 -7.804e+00  
##   factor(Foundation)PConc     factor(Foundation)Slab  
##                 3.748e+00                 -4.812e+01  
##   factor(Foundation)Stone     factor(Foundation)Wood  
##                -3.068e+00                 -2.646e+01  
##      factor(BasementHt)Fa       factor(BasementHt)Gd  
##                -2.071e+01                 -2.493e+01  
##    factor(BasementHt)None       factor(BasementHt)TA  
##                 1.202e+01                 -2.836e+01  
##       factor(Heating)GasW        factor(Heating)Grav  
##                -1.247e+01                  1.363e+01  
##       factor(Heating)OthW        factor(Heating)Wall  
##                -4.215e+01                  5.347e+01  
##  factor(GarageType)Attchd  factor(GarageType)Basment  
##                 1.206e+01                  1.040e+01  
## factor(GarageType)BuiltIn  factor(GarageType)CarPort  
##                 1.466e+01                 -2.739e+01  
##  factor(GarageType)Detchd     factor(GarageType)None  
##                 8.353e+00                  2.639e+01
#model produced from backwards elimination
mod3 <- lm(Price ~ LotFrontage + LotArea + Quality + Condition + 
    YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath + 
    Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) + 
    factor(BasementHt) + factor(Heating) + factor(GarageType), 
    data = Ames.Train)
#Forwards Selection
None <- lm(Price~1, data = Ames.Train)
step(None, scope=list(upper=mod2), scale=MSE, direction="forward",trace=FALSE)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) + 
##     factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF + 
##     factor(HeatingQC) + Fireplaces + BasementSF + Condition + 
##     YearBuilt + LotFrontage + factor(Foundation) + FullBath + 
##     factor(GarageType) + factor(Heating), data = Ames.Train)
## 
## Coefficients:
##               (Intercept)                    Quality  
##                -7.088e+02                  1.060e+01  
##                  GroundSF              BasementFinSF  
##                 6.089e-02                  2.245e-02  
##       factor(ExteriorQ)Fa        factor(ExteriorQ)Gd  
##                -6.252e+01                 -5.858e+01  
##       factor(ExteriorQ)TA   factor(HouseStyle)1.5Unf  
##                -6.536e+01                  8.700e+00  
##  factor(HouseStyle)1Story   factor(HouseStyle)2.5Fin  
##                 3.855e+00                 -1.128e+01  
##  factor(HouseStyle)2.5Unf   factor(HouseStyle)2Story  
##                -1.954e+01                 -2.159e+00  
##  factor(HouseStyle)SFoyer     factor(HouseStyle)SLvl  
##                -8.635e-01                  2.966e+00  
##                   LotArea       factor(BasementHt)Fa  
##                 8.050e-04                 -2.176e+01  
##      factor(BasementHt)Gd     factor(BasementHt)None  
##                -2.403e+01                  6.051e+00  
##      factor(BasementHt)TA                   GarageSF  
##                -2.845e+01                  3.092e-02  
##       factor(HeatingQC)Fa        factor(HeatingQC)Gd  
##                -1.310e+01                 -6.606e+00  
##       factor(HeatingQC)Po        factor(HeatingQC)TA  
##                -2.861e+00                 -4.945e+00  
##                Fireplaces                 BasementSF  
##                 7.307e+00                  2.095e-02  
##                 Condition                  YearBuilt  
##                 7.146e+00                  3.650e-01  
##               LotFrontage   factor(Foundation)CBlock  
##                 8.541e-02                 -5.861e+00  
##   factor(Foundation)PConc     factor(Foundation)Slab  
##                 4.706e+00                 -4.428e+01  
##   factor(Foundation)Stone     factor(Foundation)Wood  
##                -2.640e+00                 -2.441e+01  
##                  FullBath   factor(GarageType)Attchd  
##                -9.179e+00                  1.163e+01  
## factor(GarageType)Basment  factor(GarageType)BuiltIn  
##                 8.515e+00                  1.358e+01  
## factor(GarageType)CarPort   factor(GarageType)Detchd  
##                -2.986e+01                  8.297e+00  
##    factor(GarageType)None        factor(Heating)GasW  
##                 2.701e+01                 -6.868e+00  
##       factor(Heating)Grav        factor(Heating)OthW  
##                 1.990e+01                 -1.826e+01  
##       factor(Heating)Wall  
##                 5.886e+01
#model produced from forward selection
mod4 <- lm(Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) + 
    factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF + 
    factor(HeatingQC) + Fireplaces + BasementSF + Condition + 
    YearBuilt + LotFrontage + factor(Foundation) + FullBath + 
    factor(GarageType) + factor(Heating), data = Ames.Train)
#Stepwise method
step(None, scope=list(upper=mod2), scale=MSE, trace=FALSE)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) + 
##     LotArea + factor(BasementHt) + GarageSF + Fireplaces + BasementSF + 
##     Condition + YearBuilt + LotFrontage + FullBath + factor(Foundation) + 
##     factor(Heating) + factor(GarageType), data = Ames.Train)
## 
## Coefficients:
##               (Intercept)                    Quality  
##                -8.080e+02                  1.019e+01  
##                  GroundSF              BasementFinSF  
##                 5.705e-02                  2.161e-02  
##       factor(ExteriorQ)Fa        factor(ExteriorQ)Gd  
##                -6.564e+01                 -5.878e+01  
##       factor(ExteriorQ)TA                    LotArea  
##                -6.771e+01                  8.114e-04  
##      factor(BasementHt)Fa       factor(BasementHt)Gd  
##                -2.071e+01                 -2.493e+01  
##    factor(BasementHt)None       factor(BasementHt)TA  
##                 1.202e+01                 -2.836e+01  
##                  GarageSF                 Fireplaces  
##                 3.169e-02                  7.302e+00  
##                BasementSF                  Condition  
##                 2.656e-02                  7.630e+00  
##                 YearBuilt                LotFrontage  
##                 4.163e-01                  8.152e-02  
##                  FullBath   factor(Foundation)CBlock  
##                -9.810e+00                 -7.804e+00  
##   factor(Foundation)PConc     factor(Foundation)Slab  
##                 3.748e+00                 -4.812e+01  
##   factor(Foundation)Stone     factor(Foundation)Wood  
##                -3.068e+00                 -2.646e+01  
##       factor(Heating)GasW        factor(Heating)Grav  
##                -1.247e+01                  1.363e+01  
##       factor(Heating)OthW        factor(Heating)Wall  
##                -4.215e+01                  5.347e+01  
##  factor(GarageType)Attchd  factor(GarageType)Basment  
##                 1.206e+01                  1.040e+01  
## factor(GarageType)BuiltIn  factor(GarageType)CarPort  
##                 1.466e+01                 -2.739e+01  
##  factor(GarageType)Detchd     factor(GarageType)None  
##                 8.353e+00                  2.639e+01
#The model produced from stepwise regression is the same as what the model produced from backwards elimination(mod3)
#comparing our full model(mod2) with the models that were produced from backwards elimination, forward selection, and stepwise regression methods.
anova(mod3,mod2)
anova(mod4,mod2)
summary(mod2)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + YearRemodel + BasementFinSF + BasementSF + GroundSF + 
##     FullBath + Bedroom + TotalRooms + Fireplaces + GarageSF + 
##     ScreenPorchSF + factor(LotConfig) + factor(HouseStyle) + 
##     factor(ExteriorQ) + factor(ExteriorC) + factor(Foundation) + 
##     factor(BasementHt) + factor(BasementC) + factor(BasementFin) + 
##     factor(Heating) + factor(HeatingQC) + factor(CentralAir) + 
##     factor(KitchenQ) + factor(GarageType) + factor(GarageQ) + 
##     factor(GarageC), data = Ames.Train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -92.741 -12.846  -0.235  12.665 147.618 
## 
## Coefficients: (4 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -7.858e+02  2.357e+02  -3.333 0.000918 ***
## LotFrontage                9.786e-02  3.621e-02   2.702 0.007109 ** 
## LotArea                    7.568e-04  1.556e-04   4.865 1.51e-06 ***
## Quality                    1.014e+01  1.726e+00   5.874 7.53e-09 ***
## Condition                  5.486e+00  1.508e+00   3.638 0.000302 ***
## YearBuilt                  3.349e-01  9.734e-02   3.441 0.000626 ***
## YearRemodel                6.324e-02  9.180e-02   0.689 0.491242    
## BasementFinSF              2.413e-02  4.086e-03   5.906 6.31e-09 ***
## BasementSF                 1.604e-02  6.715e-03   2.388 0.017298 *  
## GroundSF                   5.938e-02  6.439e-03   9.223  < 2e-16 ***
## FullBath                  -8.318e+00  3.161e+00  -2.632 0.008747 ** 
## Bedroom                   -1.461e+00  2.203e+00  -0.663 0.507443    
## TotalRooms                 1.666e+00  1.466e+00   1.136 0.256333    
## Fireplaces                 6.855e+00  2.084e+00   3.290 0.001069 ** 
## GarageSF                   3.242e-02  8.057e-03   4.024 6.56e-05 ***
## ScreenPorchSF              2.362e-02  2.103e-02   1.123 0.261984    
## factor(LotConfig)CulDSac   9.288e+00  5.576e+00   1.666 0.096369 .  
## factor(LotConfig)FR2      -3.357e+00  6.754e+00  -0.497 0.619389    
## factor(LotConfig)FR3      -8.846e+00  1.501e+01  -0.589 0.555920    
## factor(LotConfig)Inside    3.445e+00  2.948e+00   1.169 0.243076    
## factor(HouseStyle)1.5Unf   8.654e+00  1.230e+01   0.704 0.481834    
## factor(HouseStyle)1Story   3.895e+00  5.029e+00   0.775 0.438960    
## factor(HouseStyle)2.5Fin  -1.521e+01  2.784e+01  -0.547 0.584942    
## factor(HouseStyle)2.5Unf  -1.768e+01  1.286e+01  -1.374 0.169970    
## factor(HouseStyle)2Story  -4.711e+00  4.515e+00  -1.043 0.297321    
## factor(HouseStyle)SFoyer  -3.543e+00  8.147e+00  -0.435 0.663868    
## factor(HouseStyle)SLvl     6.956e-01  7.347e+00   0.095 0.924602    
## factor(ExteriorQ)Fa       -4.588e+01  1.760e+01  -2.607 0.009396 ** 
## factor(ExteriorQ)Gd       -5.448e+01  7.278e+00  -7.486 3.01e-13 ***
## factor(ExteriorQ)TA       -5.922e+01  8.193e+00  -7.229 1.73e-12 ***
## factor(ExteriorC)Fa       -2.826e+01  1.830e+01  -1.545 0.123037    
## factor(ExteriorC)Gd       -1.391e+00  1.566e+01  -0.089 0.929255    
## factor(ExteriorC)Po       -2.000e+01  3.280e+01  -0.610 0.542228    
## factor(ExteriorC)TA       -5.497e+00  1.534e+01  -0.358 0.720168    
## factor(Foundation)CBlock  -5.733e+00  4.944e+00  -1.160 0.246740    
## factor(Foundation)PConc    4.134e+00  6.074e+00   0.681 0.496433    
## factor(Foundation)Slab    -3.819e+01  1.608e+01  -2.374 0.017941 *  
## factor(Foundation)Stone    1.737e+01  2.042e+01   0.851 0.395330    
## factor(Foundation)Wood    -2.629e+01  2.008e+01  -1.309 0.191030    
## factor(BasementHt)Fa      -1.913e+01  9.475e+00  -2.019 0.044002 *  
## factor(BasementHt)Gd      -2.355e+01  5.003e+00  -4.706 3.23e-06 ***
## factor(BasementHt)None     8.791e+00  1.675e+01   0.525 0.599950    
## factor(BasementHt)TA      -2.792e+01  6.596e+00  -4.233 2.73e-05 ***
## factor(BasementC)Gd        1.117e+01  9.047e+00   1.235 0.217356    
## factor(BasementC)None             NA         NA      NA       NA    
## factor(BasementC)TA        1.279e+01  6.969e+00   1.835 0.067066 .  
## factor(BasementFin)BLQ     3.554e+00  5.056e+00   0.703 0.482439    
## factor(BasementFin)GLQ     1.418e+00  3.867e+00   0.367 0.714048    
## factor(BasementFin)LwQ    -2.980e+00  5.503e+00  -0.542 0.588357    
## factor(BasementFin)None           NA         NA      NA       NA    
## factor(BasementFin)Rec    -8.300e-01  4.643e+00  -0.179 0.858196    
## factor(BasementFin)Unf     3.018e+00  4.258e+00   0.709 0.478878    
## factor(Heating)GasW       -9.581e+00  1.747e+01  -0.549 0.583566    
## factor(Heating)Grav        2.939e+01  2.413e+01   1.218 0.223718    
## factor(Heating)OthW       -2.012e+01  3.127e+01  -0.643 0.520220    
## factor(Heating)Wall        5.806e+01  2.265e+01   2.564 0.010628 *  
## factor(HeatingQC)Fa       -9.919e+00  7.262e+00  -1.366 0.172558    
## factor(HeatingQC)Gd       -5.630e+00  3.435e+00  -1.639 0.101790    
## factor(HeatingQC)Po       -6.030e+00  2.931e+01  -0.206 0.837071    
## factor(HeatingQC)TA       -3.433e+00  3.399e+00  -1.010 0.312955    
## factor(CentralAir)Y       -1.426e+00  6.510e+00  -0.219 0.826694    
## factor(KitchenQ)Fa         1.024e+00  9.993e+00   0.102 0.918455    
## factor(KitchenQ)Gd        -4.861e+00  5.543e+00  -0.877 0.380878    
## factor(KitchenQ)TA        -7.978e+00  6.190e+00  -1.289 0.198012    
## factor(GarageType)Attchd   1.807e+01  1.384e+01   1.306 0.192089    
## factor(GarageType)Basment  1.701e+01  1.742e+01   0.976 0.329341    
## factor(GarageType)BuiltIn  1.921e+01  1.456e+01   1.320 0.187430    
## factor(GarageType)CarPort -2.918e+01  2.373e+01  -1.230 0.219294    
## factor(GarageType)Detchd   1.506e+01  1.388e+01   1.085 0.278406    
## factor(GarageType)None     4.100e+01  1.760e+01   2.329 0.020223 *  
## factor(GarageQ)Gd          9.769e+00  1.332e+01   0.734 0.463518    
## factor(GarageQ)None               NA         NA      NA       NA    
## factor(GarageQ)Po         -2.368e+01  3.260e+01  -0.727 0.467846    
## factor(GarageQ)TA         -1.885e+00  7.491e+00  -0.252 0.801462    
## factor(GarageC)Gd          1.745e+00  1.763e+01   0.099 0.921175    
## factor(GarageC)None               NA         NA      NA       NA    
## factor(GarageC)Po          6.586e+00  2.059e+01   0.320 0.749159    
## factor(GarageC)TA          7.065e+00  1.025e+01   0.689 0.490849    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.17 on 526 degrees of freedom
## Multiple R-squared:   0.91,  Adjusted R-squared:  0.8975 
## F-statistic: 72.88 on 73 and 526 DF,  p-value: < 2.2e-16
summary(mod3)
## 
## Call:
## lm(formula = Price ~ LotFrontage + LotArea + Quality + Condition + 
##     YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath + 
##     Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) + 
##     factor(BasementHt) + factor(Heating) + factor(GarageType), 
##     data = Ames.Train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -84.101 -14.493  -0.509  11.652 153.947 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -8.080e+02  1.603e+02  -5.042 6.21e-07 ***
## LotFrontage                8.152e-02  3.315e-02   2.459 0.014235 *  
## LotArea                    8.114e-04  1.436e-04   5.649 2.56e-08 ***
## Quality                    1.019e+01  1.610e+00   6.331 4.97e-10 ***
## Condition                  7.630e+00  1.141e+00   6.685 5.55e-11 ***
## YearBuilt                  4.163e-01  8.115e-02   5.130 3.98e-07 ***
## BasementFinSF              2.161e-02  2.956e-03   7.312 9.03e-13 ***
## BasementSF                 2.656e-02  4.385e-03   6.057 2.54e-09 ***
## GroundSF                   5.705e-02  3.775e-03  15.115  < 2e-16 ***
## FullBath                  -9.810e+00  2.914e+00  -3.367 0.000812 ***
## Fireplaces                 7.302e+00  1.996e+00   3.659 0.000277 ***
## GarageSF                   3.169e-02  7.670e-03   4.132 4.14e-05 ***
## factor(ExteriorQ)Fa       -6.564e+01  1.593e+01  -4.120 4.35e-05 ***
## factor(ExteriorQ)Gd       -5.878e+01  6.618e+00  -8.882  < 2e-16 ***
## factor(ExteriorQ)TA       -6.771e+01  7.458e+00  -9.078  < 2e-16 ***
## factor(Foundation)CBlock  -7.804e+00  4.535e+00  -1.721 0.085857 .  
## factor(Foundation)PConc    3.748e+00  5.643e+00   0.664 0.506885    
## factor(Foundation)Slab    -4.812e+01  1.548e+01  -3.108 0.001980 ** 
## factor(Foundation)Stone   -3.068e+00  1.622e+01  -0.189 0.850054    
## factor(Foundation)Wood    -2.646e+01  1.958e+01  -1.351 0.177109    
## factor(BasementHt)Fa      -2.071e+01  9.004e+00  -2.301 0.021780 *  
## factor(BasementHt)Gd      -2.493e+01  4.784e+00  -5.211 2.63e-07 ***
## factor(BasementHt)None     1.202e+01  1.387e+01   0.867 0.386223    
## factor(BasementHt)TA      -2.836e+01  6.143e+00  -4.616 4.85e-06 ***
## factor(Heating)GasW       -1.247e+01  1.592e+01  -0.784 0.433599    
## factor(Heating)Grav        1.363e+01  1.837e+01   0.742 0.458566    
## factor(Heating)OthW       -4.215e+01  2.582e+01  -1.632 0.103162    
## factor(Heating)Wall        5.347e+01  2.036e+01   2.626 0.008876 ** 
## factor(GarageType)Attchd   1.206e+01  1.332e+01   0.905 0.365631    
## factor(GarageType)Basment  1.040e+01  1.677e+01   0.620 0.535515    
## factor(GarageType)BuiltIn  1.466e+01  1.400e+01   1.047 0.295668    
## factor(GarageType)CarPort -2.739e+01  2.242e+01  -1.222 0.222366    
## factor(GarageType)Detchd   8.353e+00  1.340e+01   0.623 0.533270    
## factor(GarageType)None     2.639e+01  1.487e+01   1.775 0.076430 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.09 on 566 degrees of freedom
## Multiple R-squared:  0.9038, Adjusted R-squared:  0.8982 
## F-statistic: 161.2 on 33 and 566 DF,  p-value: < 2.2e-16
summary(mod4)
## 
## Call:
## lm(formula = Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) + 
##     factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF + 
##     factor(HeatingQC) + Fireplaces + BasementSF + Condition + 
##     YearBuilt + LotFrontage + factor(Foundation) + FullBath + 
##     factor(GarageType) + factor(Heating), data = Ames.Train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -87.347 -14.123  -0.541  11.567 153.249 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -7.088e+02  1.700e+02  -4.169 3.55e-05 ***
## Quality                    1.060e+01  1.649e+00   6.432 2.72e-10 ***
## GroundSF                   6.089e-02  5.162e-03  11.794  < 2e-16 ***
## BasementFinSF              2.245e-02  2.994e-03   7.499 2.57e-13 ***
## factor(ExteriorQ)Fa       -6.252e+01  1.607e+01  -3.891 0.000112 ***
## factor(ExteriorQ)Gd       -5.858e+01  6.624e+00  -8.843  < 2e-16 ***
## factor(ExteriorQ)TA       -6.536e+01  7.503e+00  -8.710  < 2e-16 ***
## factor(HouseStyle)1.5Unf   8.700e+00  1.149e+01   0.757 0.449221    
## factor(HouseStyle)1Story   3.855e+00  4.781e+00   0.806 0.420363    
## factor(HouseStyle)2.5Fin  -1.128e+01  2.692e+01  -0.419 0.675400    
## factor(HouseStyle)2.5Unf  -1.954e+01  1.226e+01  -1.594 0.111556    
## factor(HouseStyle)2Story  -2.159e+00  4.319e+00  -0.500 0.617248    
## factor(HouseStyle)SFoyer  -8.635e-01  7.831e+00  -0.110 0.912237    
## factor(HouseStyle)SLvl     2.966e+00  7.063e+00   0.420 0.674673    
## LotArea                    8.050e-04  1.447e-04   5.563 4.12e-08 ***
## factor(BasementHt)Fa      -2.176e+01  9.101e+00  -2.391 0.017131 *  
## factor(BasementHt)Gd      -2.403e+01  4.799e+00  -5.008 7.42e-07 ***
## factor(BasementHt)None     6.051e+00  1.463e+01   0.414 0.679255    
## factor(BasementHt)TA      -2.845e+01  6.264e+00  -4.542 6.85e-06 ***
## GarageSF                   3.092e-02  7.820e-03   3.954 8.67e-05 ***
## factor(HeatingQC)Fa       -1.310e+01  6.907e+00  -1.896 0.058476 .  
## factor(HeatingQC)Gd       -6.606e+00  3.308e+00  -1.997 0.046300 *  
## factor(HeatingQC)Po       -2.861e+00  2.748e+01  -0.104 0.917125    
## factor(HeatingQC)TA       -4.945e+00  3.141e+00  -1.574 0.115971    
## Fireplaces                 7.307e+00  2.013e+00   3.630 0.000310 ***
## BasementSF                 2.095e-02  6.319e-03   3.315 0.000978 ***
## Condition                  7.146e+00  1.199e+00   5.963 4.41e-09 ***
## YearBuilt                  3.650e-01  8.644e-02   4.223 2.82e-05 ***
## LotFrontage                8.541e-02  3.340e-02   2.557 0.010816 *  
## factor(Foundation)CBlock  -5.861e+00  4.673e+00  -1.254 0.210261    
## factor(Foundation)PConc    4.706e+00  5.763e+00   0.817 0.414452    
## factor(Foundation)Slab    -4.428e+01  1.568e+01  -2.823 0.004929 ** 
## factor(Foundation)Stone   -2.640e+00  1.665e+01  -0.159 0.874083    
## factor(Foundation)Wood    -2.441e+01  1.975e+01  -1.236 0.217136    
## FullBath                  -9.179e+00  2.956e+00  -3.105 0.002000 ** 
## factor(GarageType)Attchd   1.163e+01  1.337e+01   0.870 0.384655    
## factor(GarageType)Basment  8.515e+00  1.688e+01   0.504 0.614116    
## factor(GarageType)BuiltIn  1.358e+01  1.409e+01   0.964 0.335537    
## factor(GarageType)CarPort -2.986e+01  2.244e+01  -1.330 0.183925    
## factor(GarageType)Detchd   8.297e+00  1.345e+01   0.617 0.537523    
## factor(GarageType)None     2.701e+01  1.498e+01   1.803 0.071926 .  
## factor(Heating)GasW       -6.868e+00  1.643e+01  -0.418 0.676161    
## factor(Heating)Grav        1.990e+01  2.015e+01   0.987 0.324010    
## factor(Heating)OthW       -1.826e+01  2.919e+01  -0.625 0.531996    
## factor(Heating)Wall        5.886e+01  2.123e+01   2.772 0.005751 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.07 on 555 degrees of freedom
## Multiple R-squared:  0.9058, Adjusted R-squared:  0.8984 
## F-statistic: 121.3 on 44 and 555 DF,  p-value: < 2.2e-16
mod5 <- lm(Price ~ LotFrontage + LotArea + factor(LotConfig) + factor(HouseStyle) + Quality + Condition + YearBuilt + YearRemodel + factor(ExteriorQ) + factor(ExteriorC) + factor(Foundation) + factor(BasementHt) + factor(BasementC) + factor(BasementFin) + BasementFinSF + BasementUnFinSF + BasementSF + factor(Heating) + factor(HeatingQC) + factor(CentralAir) + FirstSF + SecondSF + GroundSF + BasementFBath + BasementHBath + FullBath + HalfBath + Bedroom + factor(KitchenQ) + TotalRooms + Fireplaces + factor(GarageType) + GarageCars + GarageSF + factor(GarageQ) + factor(GarageC) + WoodDeckSF + OpenPorchSF + EnclosedPorchSF + ScreenPorchSF, data = Ames.Train)
all.predictors1 <- regsubsets(Price ~ LotFrontage + LotArea + Quality + Condition + 
    YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath + 
    Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) + 
    factor(BasementHt) + factor(Heating) + factor(GarageType), 
    data = Ames.Train, nbest = 1, nvmax = 50)
ShowSubsets(all.predictors1)
all.predictors2 <- regsubsets(Price ~ Quality + GroundSF + BasementFinSF + factor(ExteriorQ) + 
    factor(HouseStyle) + LotArea + factor(BasementHt) + GarageSF + 
    factor(HeatingQC) + Fireplaces + BasementSF + Condition + 
    YearBuilt + LotFrontage + factor(Foundation) + FullBath + 
    factor(GarageType) + factor(Heating), data = Ames.Train, nbest=1, nvmax = 50)
ShowSubsets(all.predictors2)
ols_mallows_cp(mod3,mod2)
## [1] -3.842899
ols_mallows_cp(mod4,mod2)
## [1] -11.41771
#transformations
mod6 <- lm(log(Price) ~ LotFrontage + LotArea + Quality + Condition + 
    YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath + 
    Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) + 
    factor(BasementHt) + factor(Heating) + factor(GarageType), 
    data = Ames.Train)
plot(mod6)
## Warning: not plotting observations with leverage one:
##   427

BC = boxCox(mod1)

lambda = BC$x[which.max(BC$y)]

lambda
## [1] 0.3030303
mod7 <- lm(Price^lambda~LotFrontage + LotArea + Quality + Condition + 
    YearBuilt + BasementFinSF + BasementSF + GroundSF + FullBath + 
    Fireplaces + GarageSF + factor(ExteriorQ) + factor(Foundation) + 
    factor(BasementHt) + factor(Heating) + factor(GarageType), 
    data = Ames.Train)
plot(mod7)
## Warning: not plotting observations with leverage one:
##   427

mod8 <- lm(log(Price) ~ sqrt(Quality) + sqrt(GroundSF) + factor(ExteriorQ) + sqrt(BasementFinSF) + sqrt(LotArea) + factor(BasementHt) + sqrt(YearBuilt) + sqrt(Condition) + sqrt(BasementSF) + sqrt(GarageSF) + sqrt(Fireplaces) + factor(Foundation) + sqrt(LotFrontage) + sqrt(FullBath) + factor(Heating) + sqrt(EnclosedPorchSF) + factor(GarageType), data = Ames.Train)
plot(mod8)
## Warning: not plotting observations with leverage one:
##   427

mod9 <- lm(sqrt(Price) ~ sqrt(Quality) + sqrt(GroundSF) + factor(ExteriorQ) + sqrt(BasementFinSF) + sqrt(LotArea) + factor(BasementHt) + sqrt(YearBuilt) + sqrt(Condition) + sqrt(BasementSF) + sqrt(GarageSF) + sqrt(Fireplaces) + factor(Foundation) + sqrt(LotFrontage) + sqrt(FullBath) + factor(Heating) + sqrt(EnclosedPorchSF) + factor(GarageType), data = Ames.Train)
plot(mod9)
## Warning: not plotting observations with leverage one:
##   427

Part 8

fitTrain.2 <- predict(mod8,newdata=Ames.Test)
holdoutresid.2 = Ames.Test$Price - fitTrain.2


mean(holdoutresid.2)
## [1] 177.1431
sd(holdoutresid.2)
## [1] 77.05929
sd(mod8$residuals)
## [1] 0.1311035
hist(holdoutresid.2)

crosscorr2 <- cor(Ames.Test$Price, fitTrain.2)
crosscorr^2
## [1] 0.8808207
shrinkage <- summary(mod8)$r.squared-crosscorr2^2
shrinkage
## [1] 0.0615858

Part 9

NewHome <-data.frame(YearBuilt=1995,YearRemodel=2003, Quality=7, Condition=5, ExteriorQ="Gd", ExteriorC="Gd", LotFrontage=90,LotArea=11060, LotConfig="Corner", HouseStyle="2Story", Foundation="PConc", BasementHt="Ex",BasementSF=1150, BasementFinSF=0, BasementFin="Unf", Heating="GasA", HeatingQC="Ex",   CentralAir="Y", FirstSF=1164,   SecondSF=1150,  GroundSF=2314, BasementFBath=0, BasementHBath=0,    FullBath=2, HalfBath=1, Bedroom=3,  TotalRooms=9,   Fireplaces=1,   GarageType="Attchd",ScreenPorchSF=0,GarageCars=2,   GarageSF=502,   GarageQ="TA",   GarageC="TA",OpenPorchSF=274,EnclosedPorchSF=0)
exp(predict.lm(mod8,NewHome, interval="prediction", level=.95))
##        fit      lwr      upr
## 1 259.6453 197.9445 340.5787