Airline Delays in the First Course (part 2: processing the data) ======================================================== Nicholas Horton, nhorton@amherst.edu, January 17, 2014 ------------------------------------------------------ #### Accessing the data ```{r, message=FALSE} require(mosaic) ``` ```{r,eval=TRUE} # reads in ds2 from part 1 load(url("http://www.amherst.edu/~nhorton/airlines/GRB.Rd")) ds2 = na.omit(ds2) # get rid of missing values alleagle = subset(ds2, UniqueCarrier=='OO') allmesa = subset(ds2, UniqueCarrier=='MQ') ``` ### Define a rule (then test it) ```{r} compareI = function(airlinea, airlineb) { # difference in means greater than 30 minutes and both standard deviations < 60 meana = mean(airlinea); meanb = mean(airlineb); diffmeans = meana - meanb; sda = mean(airlinea); sdb = sd(airlineb) if ((max(c(sda, sdb) >= 60)) | (abs(diffmeans) < 30)) { return("NEITHER") } else return(ifelse(diffmeans < 0, "Airline A", "Airline B")) } compareI(c(10, 10), c(-40, -40)) compareI(c(10, 10), c(-10, -10)) compareI(c(10, 10), c(-10, 1000)) ``` ```{r} # observed result americaneagle = c(-10, -9, -2, -1, 9, 13, 17, 54, 98, 236) mesa = c(-22, -16, -14, -8, -5, 0, 0, 3, 4, 28) mean(americaneagle); sd(americaneagle) mean(mesa); sd(mesa) compareI(americaneagle, mesa) ``` ```{r} compareI(sample(alleagle$ArrDelay, 10), sample(allmesa$ArrDelay, 10)) compareI(sample(alleagle$ArrDelay, 10), sample(allmesa$ArrDelay, 10)) ``` ```{r} res = do(2000) * compareI(sample(alleagle$ArrDelay, 10), sample(allmesa$ArrDelay, 10)) tally(~ result, data=res) ``` ### Define another rule (then test it) ```{r} compareII = function(airlinea, airlineb) { # difference in means greater than 30 minutes and 10% more delays meana = mean(airlinea); meanb = mean(airlineb); diffmeans = meana - meanb; propdelays = mean(airlinea > 15) - mean(airlineb > 15) if (diffmeans > 30 & propdelays >= .1) { return("Airline B") } else if (diffmeans < -30 & propdelays <= -.1) { return("Airline A") } else return("NEITHER") } compareII(c(0,0,0,0,0,0,20,20,20,20), c(0,0,0,0,200,200,200,200,200,200)) compareII(c(0,0,0,0,200,200,200,200,200,200), c(0,0,0,0,0,0,20,20,20,20)) compareII(c(0,0,0,0,200,200,200,200,200,200), c(0,0,0,20,20,20,20,20,20,20)) ``` ```{r} compareII(americaneagle, mesa) res = do(2000) * compareII(sample(alleagle$ArrDelay, 10), sample(allmesa$ArrDelay, 10)) tally(~ result, data=res) ```