--- output: pdf_document --- Airline Delays in the First Course (part 2: analyzing the data from Boston) ======================================================== Nicholas Horton, nhorton@amherst.edu, July 31, 2014 ------------------------------------------------------ #### Accessing the data ```{r, message=FALSE, tidy=FALSE} require(mosaic) options(digits=3) trellis.par.set(theme=col.mosaic()) # get a better color scheme for lattice load("Meetup-talk/jsm2014/JSM-BOS.rda") names(bosFlights) oneday = filter(bosFlights, Year==2013 & DayofMonth==8) %>% # Thursday, August 8th, 2013 select(DayofMonth, Month, Year, Origin, Dest, CRSDepTime, ArrDelay, Cancelled) head(oneday) ``` ```{r} bosFlights = mutate(bosFlights, RealDelay = ifelse(ArrDelay < 0, 0, ArrDelay)) bosFlights = mutate(bosFlights, TimeOfDay = cut(CRSDepTime, breaks=c(0, 1200, 1800, 2400), labels=c("morning", "afternoon", "evening"))) bosFlights = mutate(bosFlights, DelayOrCancel = ifelse(is.na(ArrDelay) | ArrDelay > 15, 1, 0)) ``` ```{r} favstats(~ DelayOrCancel, data=bosFlights) favstats(DelayOrCancel ~ TimeOfDay, data=bosFlights) favstats(DelayOrCancel ~ UniqueCarrier, data=bosFlights) favstats(DelayOrCancel ~ Dest, data=bosFlights) ``` ```{r} destDelay = bosFlights %>% filter(!is.na(RealDelay)) %>% group_by(DayofMonth, Month, Year, Dest) %>% summarise(delay = mean(RealDelay), n = n()) %>% filter(n > 10) ``` ```{r} filter(destDelay, Dest=="ATL") %>% head(.) mean(delay ~ Dest, data=destDelay) bwplot(delay ~ Dest, data=destDelay) ``` ```{r} airlineDelay = bosFlights %>% group_by(DayofMonth, Month, Year, UniqueCarrier) %>% summarise(delay = mean(DelayOrCancel), n = n()) %>% filter(n > 10) ``` ```{r} filter(airlineDelay, UniqueCarrier=="B6") %>% head(.) mean(delay ~ UniqueCarrier, data=airlineDelay) bwplot(delay ~ UniqueCarrier, data=airlineDelay) # B6 is JetBlue ``` ```{r}