## ----echo=FALSE,eval=TRUE------------------------------------------------ options(continue=" ") ## ------------------------------------------------------------------------ options(digits=3) ds = read.csv("http://www.amherst.edu/~nhorton/r2/datasets/help.csv") ## ------------------------------------------------------------------------ plottwoy = function(x, y1, y2, xname="X", y1name="Y1", y2name="Y2") { plot(x, y1, ylab=y1name, xlab=xname) lines(lowess(x, y1), lwd=3) addsecondy(x, y2, y1, yname=y2name) } ## ------------------------------------------------------------------------ addsecondy = function(x, y, origy, yname="Y2") { prevlimits = range(origy) axislimits = range(y) axis(side=4, at=prevlimits[1] + diff(prevlimits)*c(0:5)/5, labels=round(axislimits[1] + diff(axislimits)*c(0:5)/5, 1)) mtext(yname, side=4) newy = (y-axislimits[1])/(diff(axislimits)/diff(prevlimits)) + prevlimits[1] points(x, newy, pch=2) lines(lowess(x, newy), lty=2, lwd=3) } ## ----multiyr,echo=TRUE,eval=TRUE----------------------------------------- with(ds, plottwoy(cesd[female==1&substance=="alcohol"], indtot[female==1&substance=="alcohol"], mcs[female==1&substance=="alcohol"], xname="cesd", y1name="indtot", y2name="mcs")) ## ------------------------------------------------------------------------ library(lattice) ## ----coplotr,echo=TRUE,eval=TRUE----------------------------------------- trellis.par.set(theme=col.mosaic(bw=TRUE)) ds = transform(ds, suicidal.thoughts = ifelse(g1b==1, "Y", "N")) coplot(mcs ~ cesd | suicidal.thoughts*substance, panel=panel.smooth, data=ds) ## ----latset,echo=TRUE,eval=TRUE------------------------------------------ show.settings() ## ----marghist0, echo= TRUE, eval=TRUE------------------------------------ scatterhist = function(x, y, xlab="x label", ylab="y label", cexval=1.3){ zones=matrix(c(3,1,2,4), ncol=2, byrow=TRUE) layout(zones, widths=c(4/5,1/5), heights=c(1/5,4/5)) par(mar=c(0,0,0,0)) plot(type="n",x=1, y =1, bty="n",xaxt="n", yaxt="n") text(x=1,y=1,paste0("n = ",min(length(x), length(y))), cex=cexval) xhist = hist(x, plot=FALSE) yhist = hist(y, plot=FALSE) top = max(c(xhist$counts, yhist$counts)) par(mar=c(3.9,3.9,1,1)) plot(x,y, xlab=xlab, ylab=ylab, cex.sub=cexval, pch=19, col="#00000044") lines(lowess(x, y), lwd=2) par(mar=c(0,3,1,1)) barplot(xhist$counts, axes=FALSE, ylim=c(0, top), space=0) par(mar=c(3,0,1,1)) barplot(yhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE) par(oma=c(3,3,0,0)) } ## ----marghist------------------------------------------------------------ with(ds, scatterhist(mcs, pcs, xlab="MCS", ylab="PCS")) ## ----message=FALSE------------------------------------------------------- library(survival) survobj = survfit(Surv(dayslink, linkstatus) ~ treat, data=ds) print(survobj) ## ----kmplotr,echo=TRUE,eval=TRUE----------------------------------------- plot(survobj, lty=1:2, lwd=2, col=c(4,2)) title("Product-Limit Survival Estimates") legend(20, .38, legend=c("Control", "Treatment"), lty=c(1,2), lwd=2, col=c(4,2), cex=1.2) ## ----message=FALSE------------------------------------------------------- library(ROCR) pred = with(ds, prediction(cesd, g1b)) auc = slot(performance(pred, "auc"), "y.values")[[1]] ## ----rocplotr,echo=TRUE,eval=TRUE---------------------------------------- plot(performance(pred, "tpr", "fpr"), print.cutoffs.at=seq(from=20, to=50, by=5), text.adj=c(1, -.5), lwd=2) lines(c(0, 1), c(0, 1)) text(.6, .2, paste("AUC=", round(auc,3), sep=""), cex=1.4) title("ROC Curve for Model") ## ----eval=FALSE, echo=TRUE----------------------------------------------- ## pairs(c(ds[72:74], ds[67])) ## ----eval = FALSE, echo=TRUE--------------------------------------------- ## pairs(ds[c("pcs", "mcs", "cesd", "i1")]) ## ------------------------------------------------------------------------ panel.hist = function(x, ...) { usr = par("usr"); on.exit(par(usr)) par(usr=c(usr[1:2], 0, 1.5)) h = hist(x, plot=FALSE) breaks = h$breaks; nB = length(breaks) y = h$counts; y = y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...) } ## ------------------------------------------------------------------------ panel.lm = function(x, y, col=par("col"), bg=NA, pch=par("pch"), cex=1, col.lm="red", ...) { points(x, y, pch=pch, col=col, bg=bg, cex=cex) ok = is.finite(x) & is.finite(y) if (any(ok)) abline(lm(y[ok] ~ x[ok])) } ## ----label=pairsplot,echo=TRUE,eval=TRUE--------------------------------- pairs(~ cesd + mcs + pcs + i1, subset=(female==1), lower.panel=panel.smooth, diag.panel=panel.hist, upper.panel=panel.lm, data=ds) ## ----label=pairsplot2,echo=TRUE,eval=TRUE-------------------------------- library(GGally) library(dplyr) smallds = ds %>% filter(substance=="alcohol") %>% mutate(sex = ifelse(female==1, "female", "male")) %>% select(cesd, mcs, sex) ## ----tidy=FALSE---------------------------------------------------------- ggpairs(smallds, axisLabels="show", diag = list(continuous = "bar", discrete = "bar"), upper = list(continuous = "points", combo = "box"), lower = list(continuous = "cor", combo = "facethist")) ## ------------------------------------------------------------------------ cormat = with(ds, cor(cbind(mcs, pcs, pss_fr, drugrisk, cesd, indtot, i1, sexrisk), use="pairwise.complete.obs")) oldopt = options(digits=2) cormat options(oldopt) ## ----message=FALSE------------------------------------------------------- ds$drugrisk[is.na(ds$drugrisk)] = 0 panel.corrgram = function(x, y, z, at, level=0.9, label=FALSE, ...) { require(ellipse, quietly=TRUE) zcol = level.colors(z, at=at, col.regions=gray.colors) for (i in seq(along=z)) { ell = ellipse(z[i], level=level, npoints=50, scale=c(.2, .2), centre=c(x[i], y[i])) panel.polygon(ell, col=zcol[i], border=zcol[i], ...) } if (label) panel.text(x=x, y=y, lab=100*round(z, 2), cex=0.8, col=ifelse(z < 0, "white", "black")) } ## ----label=cormat,echo=TRUE,eval=TRUE,message=FALSE---------------------- library(ellipse); library(lattice) levelplot(cormat, at=do.breaks(c(-1.01, 1.01), 20), xlab=NULL, ylab=NULL, colorkey=list(space = "top", col=gray.colors), scales=list(x=list(rot=90)), panel=panel.corrgram, labels=TRUE)