Chapter 25: Inferences for Regression

Section 25.1: The population and the sample

library(mosaic); library(readr)
BodyFat <- read_csv("")
We can confirm the coefficients from the model on page 690.

BodyFatmod <- lm(PctBF ~ waist, data=BodyFat)
## (Intercept)       waist 
##       -42.7         1.7

Section 25.2: Assumptions and conditions

We can regenerate the output and figures for the example on pages 692-696.

##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -42.7341     2.7165   -15.7   <2e-16 ***
## waist         1.7000     0.0743    22.9   <2e-16 ***
## Residual standard error: 4.71 on 248 degrees of freedom
## Multiple R-squared:  0.678,  Adjusted R-squared:  0.677 
## F-statistic:  523 on 1 and 248 DF,  p-value: <2e-16
## [1] 0.678
confint(BodyFatmod)    # see page 700
##              2.5 % 97.5 %
## (Intercept) -48.08 -37.38
## waist         1.55   1.85
# Figure 25.4 
xyplot(PctBF ~ waist, xlab="Waist (in.)", 
       type=c("p", "r", "smooth"), data=BodyFat)   # see smoothers on p.92-93

# Figure 25.5
xyplot(resid(BodyFatmod) ~ waist, xlab="Waist (in.)", 
       type=c("p", "r", "smooth"), data=BodyFat)   

# equiv of Figure 25.6   note that Figure 25.6 refers to the diamonds dataset
xyplot(resid(BodyFatmod) ~ fitted(BodyFatmod), xlab="Predicted values", 
       type=c("p", "r", "smooth"), data=BodyFat)

# Figure on bottom of page 695
xqqmath(~ resid(BodyFatmod))

Section 25.6: Confidence intervals for predicted values

We can reproduce Figure 25.12 (page 707) using the panel.lmbands() function.

xyplot(PctBF ~ waist, xlab="Waist (in.)", 
       panel=panel.lmbands, lwd=2, cex=0.2, data=BodyFat)

Craters <- read.csv("")
## [1] 168   4
Craters <- mutate(Craters,
                  logDiam = log(,
                  logAge = log(age..Ma.))
Cratermod <- lm(logDiam ~ logAge, data=Craters)
favstats(~ logAge, data=Craters)   # note example in book has n=39
##    min   Q1 median   Q3  max mean   sd   n missing
##  -9.81 3.61   4.82 5.95 7.78 3.76 3.46 168       0
confpred <- predict(Cratermod, interval="confidence")
intpred <- predict(Cratermod, interval="prediction")
## Warning in predict.lm(Cratermod, interval = "prediction"): predictions on current data refer to _future_ responses
select(Craters, -Name) %>% head(., 3)
##                              Location age..Ma. logDiam logAge
## 1                      Kansas, U.S.A.    0.015  1.0e-03   -4.20  -6.91
## 2 Western Australia,        Australia    0.024  2.7e-01   -3.73  -1.31
## 3                              Russia    0.027  5.5e-05   -3.61  -9.81
head(confpred, 3)
##       fit    lwr    upr
## 1 -2.1535 -2.766 -1.541
## 2 -0.0639 -0.399  0.271
## 3 -3.2362 -4.001 -2.471
head(intpred, 3)
##       fit   lwr    upr
## 1 -2.1535 -4.68  0.368
## 2 -0.0639 -2.53  2.405
## 3 -3.2362 -5.80 -0.673

Section 25.7: Logistic regression

The Pima Indian dataset example is given on pages 708-712.

Pima <- read_csv("")
Diabetes <- filter(Pima, BMI>0)  # get rid of missing values for BMI
bwplot(BMI ~ as.factor(Diabetes), data=Pima)

pimamod <- glm(Diabetes ~ BMI, family="binomial", data=Pima)
f2 <- makeFun(pimamod)
xyplot(Diabetes ~ BMI, data=Pima)
plotFun(f2, add=TRUE)

## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.6864     0.4090   -9.01  < 2e-16 ***
## BMI           0.0935     0.0121    7.76  8.4e-15 ***
## (Dispersion parameter for binomial family taken to be 1)
##     Null deviance: 993.48  on 767  degrees of freedom
## Residual deviance: 920.71  on 766  degrees of freedom
## AIC: 924.7
## Number of Fisher Scoring iterations: 4