--- title: "Rail Trails dataset" author: "" date: "October 13, 2014" output: html_document: fig_height: 4 fig_width: 6 pdf_document: fig_height: 4 fig_width: 6 word_document: fig_height: 4 fig_width: 6 --- ## load packages and data ```{r include=FALSE} # Don't delete this chunk if you are using the mosaic package # This loads the mosaic and dplyr packages require(mosaic) require(dplyr) require(GGally) trellis.par.set(theme=col.mosaic(bw=TRUE)) ``` ```{r include=FALSE} require(knitr) opts_chunk$set( tidy=FALSE, # display code as typed size="small" # slightly smaller font for code ) ``` ```{r} ds = read.csv("JSE13-070R2.csv") ``` ## exploratory analysis: pairs plots ```{r} ds = mutate(ds, group = ifelse(distgroup=="Closer", "Closer", "Farther")) ggpairs(ds[,c("price2007", "bedrooms", "acre", "squarefeet", "group")], diag=list(continuous="density", discrete="bar"), axisLabels="show") ``` ```{r} ggpairs(ds[,c("diff2014", "adj1998", "walkscore", "bikescore", "group")], diag=list(continuous="density", discrete="bar"), axisLabels="show") ``` ## comparison of prices by distance ```{r} favstats(diff2014 ~ distgroup, data=ds) densityplot(~ diff2014, groups=distgroup, auto.key=TRUE, xlab="Price change 1998 to 2014 (in thousands of 2014 dollars)", data=ds) ``` ## examine outliers ```{r} results = ds %>% filter(diff2014 < -190 | diff2014 > 250) %>% select(streetno, streetname, price1998, adj1998, price2014, diff2014) %>% arrange(diff2014) results ``` ## create a map ```{r} require(ggmap) northampton = c(lon=-72.675, lat=42.3250) mymap = get_map(location=northampton, zoom=13, color="bw") realmap = ggmap(mymap) + geom_point(aes(x=longitude, y=latitude, colour=distgroup, size=walkscore), data=ds) + theme(legend.position="top") + theme(legend.position="top") print(realmap) ``` ## compare distributions by distance ```{r} favstats(no_full_baths ~ distgroup, data=ds) with(ds, fisher.test(no_full_baths, distgroup)) tally(~ bedgroup | distgroup, data=ds) with(ds, fisher.test(bedgroup, distgroup)) tally(garagegroup ~ distgroup, data=ds) with(ds, fisher.test(garagegroup, distgroup)) tally(zip ~ distgroup, data=ds) with(ds, fisher.test(zip, distgroup)) favstats(acre ~ distgroup, data=ds) with(ds, wilcox.test(acre[distgroup=="Closer"], acre[distgroup=="Farther Away"])) favstats(squarefeet ~ distgroup, data=ds) with(ds, wilcox.test(squarefeet[distgroup=="Closer"], squarefeet[distgroup=="Farther Away"])) favstats(walkscore ~ distgroup, data=ds) with(ds, wilcox.test(walkscore[distgroup=="Closer"], walkscore[distgroup=="Farther Away"])) favstats(bikescore ~ distgroup, data=ds) with(ds, wilcox.test(bikescore[distgroup=="Closer"], bikescore[distgroup=="Farther Away"])) ``` ## fit variety of models (univariate outcome) ```{r} lm1 = lm(diff2014 ~ distgroup, data=ds) summary(lm1) lm2 = lm(diff2014 ~ adj1998 + distgroup, data=ds) summary(lm2) lm3 = lm(diff2014 ~ adj1998 + bedgroup + garagegroup + acre + squarefeet + zip + distgroup, data=ds) summary(lm3) lm4 = lm(pctchange ~ distgroup + adj1998, data=ds) summary(lm4) ``` ## reshape dataset to be tall ```{r} results = ds %>% filter(housenum==97) %>% select(streetno, streetname, adj1998, adj2007, adj2011, price2014, acre, distgroup) results ``` ```{r} require(tidyr) tall = ds %>% mutate(adj2014 = price2014) %>% gather(year, price, adj1998, adj2007, adj2011, adj2014) %>% mutate(year = gsub("adj", "", year)) %>% select(housenum, streetno, streetname, zip, year, price, acre, bedgroup, squarefeet, sfgroup, garagegroup, distgroup) toprint = tall %>% filter(housenum==97) %>% select(housenum, streetno, streetname, year, price, acre, distgroup) toprint ``` ```{r} bwplot(price ~ year| sfgroup + distgroup, ylab="price (in thousands of adjusted dollars)", data=tall) ``` ## repeated measures modeling ```{r} tall = mutate(tall, time = 1*(year=="1998") + 2*(year=="2007") + 3*(year=="2011") + 4*(year=="2014")) require(nlme) glmcd=gls(price ~ zip + acre + bedgroup + garagegroup + squarefeet + distgroup + year + distgroup*year, data=tall,correlation=corSymm(form=~time | housenum), weights = varIdent(form = ~ 1 |time)) summary(glmcd) # alternative model randeff = lme(fixed = price ~ zip + acre + bedgroup + garagegroup + squarefeet + distgroup + year + distgroup*year, random = ~ time | housenum, data=tall) summary(randeff) ```