--- output: pdf_document --- Airline Delays in the First Course (part 4: analyzing the data from Flagstaff) ======================================================== Nicholas Horton, nhorton@amherst.edu, July 15, 2014 ------------------------------------------------------ #### Accessing the data ```{r, message=FALSE, tidy=FALSE} require(mosaic) options(digits=3) trellis.par.set(theme=col.mosaic()) # get a better color scheme for lattice load("Meetup-talk/ICOTS-FLG.rda") names(ds) filter(ds, Year==2013 & DayofMonth==20) %>% # Saturday, July 20th, 2013 select(DayofMonth, Month, Year, Origin, Dest, CRSDepTime, ArrDelay, Cancelled) %>% arrange(CRSDepTime) ds = mutate(ds, TimeOfDay = cut(CRSDepTime, breaks=c(0, 1200, 1800, 2400), labels=c("morning", "afternoon", "evening"))) ds = mutate(ds, delayorcancel = ifelse(is.na(ArrDelay) | ArrDelay > 15, "yes", "no")) ``` ```{r} favstats(~ CRSDepTime, data=ds) tally(~ TimeOfDay, data=ds) ``` ```{r} tally(~ Cancelled, format="percent", data=ds) tally(~ delayorcancel, format="percent", data=ds) tally(~ Cancelled | TimeOfDay, format="percent", data=ds) tally(~ delayorcancel | TimeOfDay, format="percent", data=ds) ``` ```{r} favstats(~ ArrDelay, data=ds) ds = mutate(ds, ActDelay = ifelse(ArrDelay < 0, 0, ArrDelay)) favstats(~ ActDelay, data=ds) favstats(ActDelay ~ TimeOfDay, data=ds) bwplot(ActDelay ~ TimeOfDay, ylim=c(-10, 120), main="July flights from Flagstaff, 2011-2013", ylab="Actual arrival delay (in minutes)", data=ds) bwplot(TimeOfDay ~ ActDelay, xlim=c(-10, 120), main="July flights from Flagstaff, 2011-2013", xlab="Actual arrival delay (in minutes)", data=ds) ``` ```{r} densityplot(~ ArrDelay, groups=TimeOfDay, auto.key=TRUE, xlab="Arrival delay (in minutes)", xlim=c(-35, 120), data=ds) densityplot(~ ArrDelay, groups=TimeOfDay, adjust=1/2, auto.key=TRUE, xlab="Arrival delay (in minutes)", xlim=c(-35, 120), data=ds) ``` ```{r} ds2 = filter(ds, Year==2013) sort(tally(~ TailNum, data=ds2), decreasing = TRUE)