## ----echo=FALSE,eval=TRUE------------------------------------------------ options(continue=" ") ## ------------------------------------------------------------------------ options(digits=3) # read in Stata format library(foreign) ds = read.dta("http://www.amherst.edu/~nhorton/r2/datasets/help.dta", convert.underscore=FALSE) library(dplyr) ds = mutate(ds, sub=factor(substance, levels=c("heroin", "alcohol", "cocaine"))) newds = filter(ds, female==1) alcohol = filter(newds, substance=="alcohol") cocaine = filter(newds, substance=="cocaine") heroin = filter(newds, substance=="heroin") ## ----lowessr,echo=TRUE,eval=TRUE----------------------------------------- with(newds, plot(age, i1, ylim=c(0,40), type="n", cex.lab=1.2, cex.axis=1.2)) with(alcohol, points(age, i1, pch="a")) with(alcohol, lines(lowess(age, i1), lty=1, lwd=2)) with(cocaine, points(age, i1, pch="c")) with(cocaine, lines(lowess(age, i1), lty=2, lwd=2)) with(heroin, points(age, i1, pch="h")) with(heroin, lines(lowess(age, i1), lty=3, lwd=2)) legend(44, 38, legend=c("alcohol", "cocaine", "heroin"), lty=1:3, cex=1.4, lwd=2, pch=c("a", "c", "h")) ## ----latplot1------------------------------------------------------------ xyplot(i1 ~ age, groups=substance, type=c("p", "smooth"), auto.key=list(columns=3, lines=TRUE, points=FALSE), ylim=c(0, 40), data=newds) ## ----ggplot1------------------------------------------------------------- library(ggplot2) ggplot(data=newds, aes(x=age, y=i1)) + geom_point(aes(shape=substance)) + stat_smooth(method=loess, level=0.50, colour="black") + aes(linetype=substance) + coord_cartesian(ylim = c(0, 40)) + theme(legend.position="top") + labs(title="") ## ----comparemod---------------------------------------------------------- options(show.signif.stars=FALSE) lm1 = lm(i1 ~ sub * age, data=newds) lm2 = lm(i1 ~ sub + age, data=newds) ## ------------------------------------------------------------------------ anova(lm2, lm1) ## ------------------------------------------------------------------------ summary.aov(lm1) ## ------------------------------------------------------------------------ summary(lm1) ## ------------------------------------------------------------------------ confint(lm1) ## ----echo=TRUE,eval=FALSE------------------------------------------------ ## library(xtable) ## lmtab = xtable(lm1, digits=c(0,3,3,2,4), label="better", ## caption="Formatted results using the {\\tt xtable} package") ## print(lmtab) # output the LaTeX ## ----results='asis',echo=FALSE------------------------------------------- library(xtable) lmtab = xtable(lm1, digits=c(0,3,3,2,4), label="better", caption="Formatted results using the {\\tt xtable} package") print(lmtab) # output the LaTeX ## ------------------------------------------------------------------------ names(summary(lm1)) summary(lm1)$sigma ## ----names--------------------------------------------------------------- names(lm1) ## ------------------------------------------------------------------------ coef(lm1) ## ------------------------------------------------------------------------ vcov(lm1) ## ------------------------------------------------------------------------ mymodel = coef(summary(lm1)) mymodel mymodel[2,3] # alcohol t-value ## ----coefplot1----------------------------------------------------------- library(mosaic) mplot(lm2, which=7, rows=-1) ## ----resid--------------------------------------------------------------- library(dplyr) newds = mutate(newds, pred = fitted(lm1), resid = residuals(lm1)) with(newds, quantile(resid)) ## ----whichresid---------------------------------------------------------- library(dplyr) newds %>% select(id, age, i1, sub, pred, resid) %>% mutate(rstand = rstandard(lm1)) %>% filter(resid==max(resid)) ## ----rdxplots,echo=TRUE,eval=TRUE, tidy=FALSE---------------------------- oldpar = par(mfrow=c(2, 2), mar=c(4, 4, 2, 2) + .1) plot(lm1); par(oldpar) ## ----histr,echo=TRUE,eval=TRUE,message=FALSE----------------------------- library(MASS) std.res = rstandard(lm1) hist(std.res, breaks=seq(-2.5, 3.5, by=.5), main="", xlab="standardized residuals", col="gray80", freq=FALSE) lines(density(std.res), lwd=2) xvals = seq(from=min(std.res), to=max(std.res), length=100) lines(xvals, dnorm(xvals, mean(std.res), sd(std.res)), lty=2) ## ----echo=FALSE---------------------------------------------------------- detach(package:MASS) ## ----unique-------------------------------------------------------------- uniquevals = unique(newds$substance) numunique = length(uniquevals) formula = as.formula(i1 ~ age) p = length(coef(lm(formula, data=newds))) res = matrix(rep(0, numunique*p), p, numunique) for (i in 1:length(uniquevals)) { res[,i] = coef(lm(formula, data=subset(newds, substance==uniquevals[i]))) } rownames(res) = c("intercept","slope") colnames(res) = uniquevals res ## ------------------------------------------------------------------------ library(dplyr) ds = mutate(ds, genf = as.factor(ifelse(female, "F", "M"))) ## ----intplotr,echo=TRUE, eval=TRUE--------------------------------------- with(ds, interaction.plot(substance, genf, cesd, xlab="substance", las=1, lwd=2)) ## ----message=FALSE------------------------------------------------------- library(dplyr) library(memisc) ds = mutate(ds, subs = cases( "Alc" = substance=="alcohol", "Coc" = substance=="cocaine", "Her" = substance=="heroin")) ## ----boxplotr,echo=TRUE, eval=TRUE,message=FALSE------------------------- boxout = with(ds, boxplot(cesd ~ subs + genf, notch=TRUE, varwidth=TRUE, col="gray80")) boxmeans = with(ds, tapply(cesd, list(subs, genf), mean)) points(seq(boxout$n), boxmeans, pch=4, cex=2) ## ------------------------------------------------------------------------ aov1 = aov(cesd ~ sub * genf, data=ds) aov2 = aov(cesd ~ sub + genf, data=ds) anova(aov2, aov1) ## ------------------------------------------------------------------------ options(digits=8) logLik(aov1) logLik(aov2) lldiff = logLik(aov1)[1] - logLik(aov2)[1] lldiff 1 - pchisq(2*lldiff, df=2) options(digits=3) ## ------------------------------------------------------------------------ summary(aov2) ## ------------------------------------------------------------------------ AIC(aov1) AIC(aov2) ## ----mycont-------------------------------------------------------------- contrasts(ds$sub) = contr.SAS(3) aov3 = lm(cesd ~ sub + genf, data=ds) summary(aov3) ## ------------------------------------------------------------------------ mult = TukeyHSD(aov(cesd ~ sub, data=ds), "sub") mult ## ----pairwiseplot,echo=TRUE, eval=TRUE----------------------------------- require(mosaic) mplot(mult) ## ----message=FALSE------------------------------------------------------- library(dplyr) library(factorplot) newds = mutate(newds, cesdgrp = cut(cesd, breaks=c(-1, 10, 20, 30, 40, 50, 61), labels=c("0-10", "11-20", "21-30", "31-40", "41-50", "51-60"))) tally(~ cesdgrp, data=newds) mod = lm(pcs ~ age + cesdgrp, data=newds) fp = factorplot(mod, adjust.method="none", factor.variable="cesdgrp", pval=0.05, two.sided=TRUE, order="natural") ## ----pairwiseplot2,echo=TRUE, eval=TRUE,fig.height=5.3------------------- plot(fp, abbrev.char=100) ## ------------------------------------------------------------------------ library(gmodels) levels(ds$sub) fit.contrast(aov2, "sub", c(1,1,-2), conf.int=0.95 )