Airline Delays in the First Course (part 1: accessing the data) ======================================================== Nicholas Horton, nhorton@amherst.edu, January 17, 2014 ------------------------------------------------------ #### Accessing the data ```{r,eval=FALSE} require(RMySQL) # also need to provide the password # SQLite can be used to access these data without a MySQL server con = dbConnect(MySQL(), user="mth292", host="rucker.smith.edu", dbname="airlines") ``` ```{r} ds = dbGetQuery(con, "SELECT DayofMonth, Month, Year, Origin, sum(1) as numFlights FROM ontime WHERE Origin='BDL' GROUP BY DayofMonth,Month,Year") # returns a data frame with 7,763 rows and 5 columns ds = transform(ds, date = as.Date(paste(Year, "-", Month, "-", DayofMonth, sep=""))) ds = transform(ds, weekday = weekdays(date)) ds = ds[order(ds$date),] mondays = subset(ds, weekday=="Monday") library(lattice) xyplot(numFlights ~ date, xlab="", ylab="number of flights on Monday", type="l", col="black", lwd=2, data=mondays) ``` ```{r} ds2 = dbGetQuery(con, "SELECT UniqueCarrier, ArrDelay, Month, Year, Origin, Dest FROM ontime WHERE Origin='GRB' AND Dest='ORD' AND Year=2005") dim(ds2) ``` ```{r} head(subset(ds2, UniqueCarrier=="MQ")) head(subset(ds2, UniqueCarrier=="OO")) ``` ```{r} tally(~ UniqueCarrier, data=ds2) favstats(ArrDelay ~ UniqueCarrier, data=ds2) bwplot(ArrDelay ~ UniqueCarrier, data=ds2) bwplot(ArrDelay ~ UniqueCarrier, ylim=c(-60, 200), ylab="Arrival delay (in minutes)", data=ds2) densityplot(~ ArrDelay, groups=UniqueCarrier, auto.key=TRUE, xlab="Arrival delay (in minutes)", xlim=c(-60, 200), data=ds2) # save(ds2, file="GRB.Rd") ```