--- title: 'D-vine quantile regression with discrete variables: analysis of bike rental data' author: "Dani Kraus and Thomas Nagler" date: "November 8, 2017" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Analysis of bike rental data} %\VignetteEngine{knitr::rmarkdown} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) ``` #### Required packages ```{r, message = FALSE} library(vinereg) require(ggplot2) require(dplyr) require(purrr) require(scales) require(quantreg) ``` #### Plot function for marginal effects ```{r} plot_marginal_effects <- function(covs, preds) { cbind(covs, preds) %>% tidyr::gather(alpha, prediction, -seq_len(NCOL(covs))) %>% dplyr::mutate(prediction = as.numeric(prediction)) %>% tidyr::gather(variable, value, -(alpha:prediction)) %>% dplyr::mutate(value = as.numeric(value)) %>% ggplot(aes(value, prediction, color = alpha)) + geom_point(alpha = 0.15) + geom_smooth(span = 0.5, se = FALSE) + facet_wrap(~ variable, scale = "free_x") + theme(legend.position = "none") + theme(plot.margin = unit(c(0, 0, 0, 0), "mm")) + xlab("") } ``` ## Data preparation #### Load data ```{r} bikedata <- read.csv("day.csv") bikedata[, 2] <- as.Date(bikedata[, 2]) head(bikedata) ``` #### Rename variables ```{r} bikedata <- bikedata %>% rename( temperature = atemp, month = mnth, weathersituation = weathersit, humidity = hum, count = cnt ) ``` #### Un-normalize variables See variable description on UCI web page. ```{r} bikedata <- bikedata %>% mutate( temperature = 66 * temperature + 16, windspeed = 67 * windspeed, humidity = 100 * humidity ) ``` #### Show trend ```{r count_with_trend} ggplot(bikedata, aes(dteday, count)) + geom_line() + scale_x_date(labels = scales::date_format("%b %y")) + xlab("date") + ylab("rental count") + stat_smooth(method = "lm", se = FALSE, linetype = "dashed") + theme(plot.title = element_text(lineheight = 0.8, size = 20)) + theme(text = element_text(size = 18)) ``` #### Remove trend ```{r count_detrended} lm_trend <- lm(count ~ instant, data = bikedata) trend <- predict(lm_trend) bikedata <- mutate(bikedata, count = count / trend) ggplot(bikedata, aes(dteday, count)) + geom_line() + scale_x_date(labels = scales::date_format("%b %y")) + xlab("date") + ylab("detrended rental count") + theme(plot.title = element_text(lineheight = 0.8, size = 20)) + theme(text = element_text(size = 18)) ``` #### Drop useless variables ```{r} bikedata <- bikedata %>% select(-instant, -dteday, -yr) %>% # time indices select(-casual, -registered) %>% # casual + registered = count select(-holiday) %>% # we use 'workingday' instead select(-temp) # we use 'temperature' (feeling temperature) ``` #### Declare discrete variables as `ordered` ```{r} disc_vars <- c("season", "month", "weekday", "workingday", "weathersituation") bikedata <- bikedata %>% mutate(weekday = ifelse(weekday == 0, 7, weekday)) %>% # sun at end of week purrr::modify_at(disc_vars, as.ordered) ``` ## D-vine regression model #### Fit model ```{r} fit <- vinereg( count ~ ., data = bikedata, family = c("onepar", "tll"), selcrit = "aic" ) fit summary(fit) ``` #### In-sample predictions ```{r} alpha_vec <- c(0.1, 0.5, 0.9) pred <- fitted(fit, alpha_vec) ``` ### Marginal effects ```{r me_temperature, fig.width=4, fig.height=4} plot_marginal_effects( covs = select(bikedata, temperature), preds = pred ) ``` ```{r me_humidity, fig.width=4, fig.height=4, message=FALSE} plot_marginal_effects(covs = select(bikedata, humidity), preds = pred) + xlim(c(25, 100)) ``` ```{r me_windspeed, fig.width=4, fig.height=4, message=FALSE} plot_marginal_effects(covs = select(bikedata, windspeed), preds = pred) ``` ```{r me_month, fig.width=4, fig.height=4, message=FALSE} month_labs <- c("Jan","", "Mar", "", "May", "", "Jul", "", "Sep", "", "Nov", "") plot_marginal_effects(covs = select(bikedata, month), preds = pred) + scale_x_discrete(limits = 1:12, labels = month_labs) ``` ```{r me_weathersituation, fig.width=4, fig.height=4, message=FALSE} plot_marginal_effects(covs = select(bikedata, weathersituation), preds = pred) + scale_x_discrete(limits = 1:3,labels = c("good", "medium", "bad")) ``` ```{r me_weekday, fig.width=4, fig.height=4, message=FALSE} weekday_labs <- c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun") plot_marginal_effects(covs = select(bikedata, weekday), preds = pred) + scale_x_discrete(limits = 1:7, labels = weekday_labs) ``` ```{r me_workingday, fig.width=4, fig.height=4, message=FALSE} plot_marginal_effects(covs = select(bikedata, workingday), preds = pred) + scale_x_discrete(limits = 0:1, labels = c("no", "yes")) + geom_smooth(method = "lm", se = FALSE) + xlim(c(0, 1)) ``` ```{r me_season, fig.width=4, fig.height=4, message=FALSE} season_labs <- c("spring", "summer", "fall", "winter") plot_marginal_effects(covs = select(bikedata, season), preds = pred) + scale_x_discrete(limits = 1:4, labels = season_labs) ```