## ----echo=FALSE,eval=TRUE------------------------------------------------ options(continue=" ") ## ------------------------------------------------------------------------ options(digits=3) options(width=72) # narrow output ds = read.csv("http://www.amherst.edu/~nhorton/r2/datasets/help.csv") library(dplyr) newds = select(ds, cesd, female, i1, i2, id, treat, f1a, f1b, f1c, f1d, f1e, f1f, f1g, f1h, f1i, f1j, f1k, f1l, f1m, f1n, f1o, f1p, f1q, f1r, f1s, f1t) ## ------------------------------------------------------------------------ names(newds) str(newds[,1:10]) # structure of the first 10 variables ## ------------------------------------------------------------------------ summary(newds[,1:10]) # summary of the first 10 variables ## ------------------------------------------------------------------------ head(newds, n=3) ## ------------------------------------------------------------------------ comment(newds) = "HELP baseline dataset" comment(newds) save(ds, file="savedfile") ## ------------------------------------------------------------------------ write.csv(ds, file="ds.csv") ## ------------------------------------------------------------------------ library(foreign) write.foreign(newds, "file.dat", "file.sas", package="SAS") ## ------------------------------------------------------------------------ with(newds, cesd[1:10]) with(newds, head(cesd, 10)) ## ------------------------------------------------------------------------ with(newds, cesd[cesd > 56]) ## ------------------------------------------------------------------------ library(dplyr) filter(newds, cesd > 56) %>% select(id, cesd) ## ------------------------------------------------------------------------ with(newds, sort(cesd)[1:4]) with(newds, which.min(cesd)) ## ------------------------------------------------------------------------ library(mosaic) tally(~ is.na(f1g), data=newds) favstats(~ f1g, data=newds) ## ------------------------------------------------------------------------ # reverse code f1d, f1h, f1l and f1p cesditems = with(newds, cbind(f1a, f1b, f1c, (3 - f1d), f1e, f1f, f1g, (3 - f1h), f1i, f1j, f1k, (3 - f1l), f1m, f1n, f1o, (3 - f1p), f1q, f1r, f1s, f1t)) nmisscesd = apply(is.na(cesditems), 1, sum) ncesditems = cesditems ncesditems[is.na(cesditems)] = 0 newcesd = apply(ncesditems, 1, sum) imputemeancesd = 20/(20-nmisscesd)*newcesd ## ------------------------------------------------------------------------ data.frame(newcesd, newds$cesd, nmisscesd, imputemeancesd)[nmisscesd>0,] ## ----createdrink,message=FALSE------------------------------------------- library(dplyr) library(memisc) newds = mutate(newds, drinkstat= cases( "abstinent" = i1==0, "moderate" = (i1>0 & i1<=1 & i2<=3 & female==1) | (i1>0 & i1<=2 & i2<=4 & female==0), "highrisk" = ((i1>1 | i2>3) & female==1) | ((i1>2 | i2>4) & female==0))) ## ----echo=FALSE---------------------------------------------------------- library(mosaic) ## ----echo=FALSE---------------------------------------------------------- detach(package:memisc) detach(package:MASS) ## ------------------------------------------------------------------------ library(dplyr) tmpds = select(newds, i1, i2, female, drinkstat) tmpds[365:370,] ## ------------------------------------------------------------------------ library(dplyr) filter(tmpds, drinkstat=="moderate" & female==1) ## ----message=FALSE------------------------------------------------------- library(gmodels) with(tmpds, CrossTable(drinkstat)) ## ------------------------------------------------------------------------ with(tmpds, CrossTable(drinkstat, female, prop.t=FALSE, prop.c=FALSE, prop.chisq=FALSE)) ## ------------------------------------------------------------------------ newds = transform(newds, gender=factor(female, c(0,1), c("Male","Female"))) tally(~ female + gender, margin=FALSE, data=newds) ## ------------------------------------------------------------------------ library(dplyr) newds = arrange(ds, cesd, i1) newds[1:5, c("cesd", "i1", "id")] ## ------------------------------------------------------------------------ library(dplyr) females = filter(ds, female==1) with(females, mean(cesd)) # an alternative approach mean(ds$cesd[ds$female==1]) ## ------------------------------------------------------------------------ with(ds, tapply(cesd, female, mean)) library(mosaic) mean(cesd ~ female, data=ds)