Saturday, October 31, 2020

Linear Regression Using R (Faraway's Book) - Part 2

Estimation

Chapter 2 (Estimation)

Data Estimation Using R

library(faraway)
data(gala, package="faraway")
head(gala[,-2])
##              Species  Area Elevation Nearest Scruz Adjacent
## Baltra            58 25.09       346     0.6   0.6     1.84
## Bartolome         31  1.24       109     0.6  26.3   572.33
## Caldwell           3  0.21       114     2.8  58.7     0.78
## Champion          25  0.10        46     1.9  47.4     0.18
## Coamano            2  0.05        77     1.9   1.9   903.82
## Daphne.Major      18  0.34       119     8.0   8.0     1.84
lmod <- lm(Species ~ Area + Elevation + Nearest + Scruz  + Adjacent, data=gala)
summary(lmod)
## 
## Call:
## lm(formula = Species ~ Area + Elevation + Nearest + Scruz + Adjacent, 
##     data = gala)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -111.679  -34.898   -7.862   33.460  182.584 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.068221  19.154198   0.369 0.715351    
## Area        -0.023938   0.022422  -1.068 0.296318    
## Elevation    0.319465   0.053663   5.953 3.82e-06 ***
## Nearest      0.009144   1.054136   0.009 0.993151    
## Scruz       -0.240524   0.215402  -1.117 0.275208    
## Adjacent    -0.074805   0.017700  -4.226 0.000297 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 60.98 on 24 degrees of freedom
## Multiple R-squared:  0.7658, Adjusted R-squared:  0.7171 
## F-statistic:  15.7 on 5 and 24 DF,  p-value: 6.838e-07
require(faraway)
sumary(lmod)
##              Estimate Std. Error t value  Pr(>|t|)
## (Intercept)  7.068221  19.154198  0.3690 0.7153508
## Area        -0.023938   0.022422 -1.0676 0.2963180
## Elevation    0.319465   0.053663  5.9532 3.823e-06
## Nearest      0.009144   1.054136  0.0087 0.9931506
## Scruz       -0.240524   0.215402 -1.1166 0.2752082
## Adjacent    -0.074805   0.017700 -4.2262 0.0002971
## 
## n = 30, p = 6, Residual SE = 60.97519, R-Squared = 0.77

Estimation Process

x <- model.matrix( ~ Area + Elevation + Nearest + Scruz  + Adjacent,gala)
y <- gala$Species
xtxi <- solve(t(x) %*% x)
xtxi %*% t(x) %*% y
##                     [,1]
## (Intercept)  7.068220709
## Area        -0.023938338
## Elevation    0.319464761
## Nearest      0.009143961
## Scruz       -0.240524230
## Adjacent    -0.074804832
solve(crossprod(x,x),crossprod(x,y))
##                     [,1]
## (Intercept)  7.068220709
## Area        -0.023938338
## Elevation    0.319464761
## Nearest      0.009143961
## Scruz       -0.240524230
## Adjacent    -0.074804832
names(lmod)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"
lmodsum <- summary(lmod)
names(lmodsum)
##  [1] "call"          "terms"         "residuals"     "coefficients" 
##  [5] "aliased"       "sigma"         "df"            "r.squared"    
##  [9] "adj.r.squared" "fstatistic"    "cov.unscaled"
sqrt(deviance(lmod)/df.residual(lmod))
## [1] 60.97519
lmodsum$sigma
## [1] 60.97519
xtxi <- lmodsum$cov.unscaled
sqrt(diag(xtxi))*60.975
## (Intercept)        Area   Elevation     Nearest       Scruz    Adjacent 
## 19.15413865  0.02242228  0.05366264  1.05413269  0.21540158  0.01770013
lmodsum$coef[,2]
## (Intercept)        Area   Elevation     Nearest       Scruz    Adjacent 
## 19.15419782  0.02242235  0.05366280  1.05413595  0.21540225  0.01770019
qrx <- qr(x)
dim(qr.Q(qrx))
## [1] 30  6
(f <- t(qr.Q(qrx)) %*% y)
##             [,1]
## [1,] -466.842193
## [2,]  381.405574
## [3,]  256.250473
## [4,]    5.407646
## [5,] -119.498340
## [6,]  257.694369
backsolve(qr.R(qrx),f)
##              [,1]
## [1,]  7.068220709
## [2,] -0.023938338
## [3,]  0.319464761
## [4,]  0.009143961
## [5,] -0.240524230
## [6,] -0.074804832
gala$Adiff <- gala$Area -gala$Adjacent
lmod <- lm(Species ~ Area+Elevation+Nearest+Scruz+Adjacent +Adiff,gala)
sumary(lmod)
## 
## Coefficients: (1 not defined because of singularities)
##              Estimate Std. Error t value  Pr(>|t|)
## (Intercept)  7.068221  19.154198  0.3690 0.7153508
## Area        -0.023938   0.022422 -1.0676 0.2963180
## Elevation    0.319465   0.053663  5.9532 3.823e-06
## Nearest      0.009144   1.054136  0.0087 0.9931506
## Scruz       -0.240524   0.215402 -1.1166 0.2752082
## Adjacent    -0.074805   0.017700 -4.2262 0.0002971
## 
## n = 30, p = 6, Residual SE = 60.97519, R-Squared = 0.77
set.seed(123)
Adiffe <- gala$Adiff+0.001*(runif(30)-0.5)
lmod <- lm(Species ~ Area+Elevation+Nearest+Scruz +Adjacent+Adiffe,gala)
sumary(lmod)
##                Estimate  Std. Error t value  Pr(>|t|)
## (Intercept)  3.2964e+00  1.9434e+01  0.1696    0.8668
## Area        -4.5123e+04  4.2583e+04 -1.0596    0.3003
## Elevation    3.1302e-01  5.3870e-02  5.8107 6.398e-06
## Nearest      3.8273e-01  1.1090e+00  0.3451    0.7331
## Scruz       -2.6199e-01  2.1581e-01 -1.2140    0.2371
## Adjacent     4.5123e+04  4.2583e+04  1.0596    0.3003
## Adiffe       4.5123e+04  4.2583e+04  1.0596    0.3003
## 
## n = 30, p = 7, Residual SE = 60.81975, R-Squared = 0.78
data(odor, package="faraway")
odor
##    odor temp gas pack
## 1    66   -1  -1    0
## 2    39    1  -1    0
## 3    43   -1   1    0
## 4    49    1   1    0
## 5    58   -1   0   -1
## 6    17    1   0   -1
## 7    -5   -1   0    1
## 8   -40    1   0    1
## 9    65    0  -1   -1
## 10    7    0   1   -1
## 11   43    0  -1    1
## 12  -22    0   1    1
## 13  -31    0   0    0
## 14  -35    0   0    0
## 15  -26    0   0    0
cov(odor[,-1])
##           temp       gas      pack
## temp 0.5714286 0.0000000 0.0000000
## gas  0.0000000 0.5714286 0.0000000
## pack 0.0000000 0.0000000 0.5714286
lmod <- lm(odor ~ temp + gas + pack, odor)
summary(lmod,cor=T)
## 
## Call:
## lm(formula = odor ~ temp + gas + pack, data = odor)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.200 -17.138   1.175  20.300  62.925 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   15.200      9.298   1.635    0.130
## temp         -12.125     12.732  -0.952    0.361
## gas          -17.000     12.732  -1.335    0.209
## pack         -21.375     12.732  -1.679    0.121
## 
## Residual standard error: 36.01 on 11 degrees of freedom
## Multiple R-squared:  0.3337, Adjusted R-squared:  0.1519 
## F-statistic: 1.836 on 3 and 11 DF,  p-value: 0.1989
## 
## Correlation of Coefficients:
##      (Intercept) temp gas 
## temp 0.00                 
## gas  0.00        0.00     
## pack 0.00        0.00 0.00
lmod <- lm(odor ~ gas + pack, odor)
summary(lmod)
## 
## Call:
## lm(formula = odor ~ gas + pack, data = odor)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -50.200 -26.700   1.175  26.800  50.800 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   15.200      9.262   1.641    0.127
## gas          -17.000     12.683  -1.340    0.205
## pack         -21.375     12.683  -1.685    0.118
## 
## Residual standard error: 35.87 on 12 degrees of freedom
## Multiple R-squared:  0.2787, Adjusted R-squared:  0.1585 
## F-statistic: 2.319 on 2 and 12 DF,  p-value: 0.1408

Reference

Faraway, J. J. (2004). Linear Models with R. In Linear Models with R. https://doi.org/10.4324/9780203507278

Faraway, J. (2009). Texts in Statistical Science: Linear Models with R. In Taylor and Francis Group.

Faraway, J. (2014). Texts in Statistical Science: Linear Models with R. Chapman & Hall/CRC Press. 274 pages, ISBN-13: 9781439887332.

0 comments :

Post a Comment

Related Posts Plugin for WordPress, Blogger...

 
Design by Free WordPress Themes | Bloggerized by Lasantha - Premium Blogger Themes | Web Hosting Coupons