Libraries

This projecti ncludes the following libraries.

library(dplyr)
library(rockchalk)
library(psych)
library(splines)
library(car)

Data Cleaning and Set Up

In this step, we load in the CSV containing sensor data joined with weather data. Since weather is measured hourly, we filter sensor data to only include the hourly measurements. In this specific model, we examine the intersection of Regents ad Stadium Drive, which combines readings from four sensors.

#import data
dat <- read.csv("Datasets/dots_data.csv")
summary(dat)
 
#data recode
dat.r <- dat[dat$minutes==0,] #include only hourly sensor data
#combine four sensors on Regents Stadium intersection
dat.r$location <- combineLevels(dat.r$location, c("Regents_Dr_&_Stadium_Dr_1",
                                                  "Regents_Dr_&_Stadium_Dr_2",
                                                  "Regents_Dr_&_Stadium_Dr_3",
                                                  "Stadium_Dr_East_Of_Regents_Dr"), newLabel = "Regents_Stadium_combo")
dat.regStd <- dat.r[dat.r$location == "Regents_Stadium_combo",]

Model 1: Draft

The first draft of the multiple linear regression model includes all of the numerical variables in conjunction with a dummy-coded condition (weather description) variable. This initial model posed several issues:

#Model 1: Condition, Temperature, Hour, Wind spd, Pressure, Humidity
regstd.car = lm(dat.regStd$car ~ factor(dat.regStd$Condition) + dat.regStd$Temperature + dat.regStd$hours
                + dat.regStd$Wind.Speed + dat.regStd$Pressure + dat.regStd$Humidity)
#Correlation and model summary
summary(regstd.car)
## 
## Call:
## lm(formula = dat.regStd$car ~ factor(dat.regStd$Condition) + 
##     dat.regStd$Temperature + dat.regStd$hours + dat.regStd$Wind.Speed + 
##     dat.regStd$Pressure + dat.regStd$Humidity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -161.76  -52.97  -18.62   34.75  357.89 
## 
## Coefficients:
##                                                     Estimate Std. Error
## (Intercept)                                       -1259.8299  1385.5628
## factor(dat.regStd$Condition)Fair                      8.8182     6.7933
## factor(dat.regStd$Condition)Light Rain               -7.7607    13.1519
## factor(dat.regStd$Condition)Mostly Cloudy             0.9787    11.3994
## factor(dat.regStd$Condition)Partly Cloudy            15.3896     9.6972
## factor(dat.regStd$Condition)Partly Cloudy / Windy   -14.2115    22.1433
## factor(dat.regStd$Condition)T-Storm                 -20.3282    19.0305
## dat.regStd$Temperature                                1.8712     0.4727
## dat.regStd$hours                                      7.8177     0.3012
## dat.regStd$Wind.Speed                                 2.4598     0.7109
## dat.regStd$Pressure                                  36.6236    45.8149
## dat.regStd$Humidity                                  46.8541    45.2474
##                                                   t value Pr(>|t|)    
## (Intercept)                                        -0.909 0.363380    
## factor(dat.regStd$Condition)Fair                    1.298 0.194486    
## factor(dat.regStd$Condition)Light Rain             -0.590 0.555235    
## factor(dat.regStd$Condition)Mostly Cloudy           0.086 0.931597    
## factor(dat.regStd$Condition)Partly Cloudy           1.587 0.112745    
## factor(dat.regStd$Condition)Partly Cloudy / Windy  -0.642 0.521115    
## factor(dat.regStd$Condition)T-Storm                -1.068 0.285627    
## dat.regStd$Temperature                              3.959 7.93e-05 ***
## dat.regStd$hours                                   25.955  < 2e-16 ***
## dat.regStd$Wind.Speed                               3.460 0.000557 ***
## dat.regStd$Pressure                                 0.799 0.424211    
## dat.regStd$Humidity                                 1.036 0.300618    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 76.05 on 1336 degrees of freedom
## Multiple R-squared:  0.3592, Adjusted R-squared:  0.3539 
## F-statistic: 68.08 on 11 and 1336 DF,  p-value: < 2.2e-16
#VIF Scores
vif(regstd.car)
##                                   GVIF Df GVIF^(1/(2*Df))
## factor(dat.regStd$Condition) 76.131936  6        1.434819
## dat.regStd$Temperature        4.154470  1        2.038252
## dat.regStd$hours              1.013649  1        1.006802
## dat.regStd$Wind.Speed         4.874290  1        2.207779
## dat.regStd$Pressure           9.450152  1        3.074110
## dat.regStd$Humidity           8.290033  1        2.879242
#Regression Plots
plot(regstd.car, which = c(2,3,4))

Model 2: Refined

We then recreated the model and removed Condition, Humidity, and Pressure, which left a model with four significant dependant variables and an overall significant model at alpha 0.05. However, we still had some major issues:

regstd.car2 = lm(dat.regStd$car ~ dat.regStd$Temperature + dat.regStd$hours
                + dat.regStd$Wind.Speed)
#Correlation and model summary
summary(regstd.car2)
## 
## Call:
## lm(formula = dat.regStd$car ~ dat.regStd$Temperature + dat.regStd$hours + 
##     dat.regStd$Wind.Speed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -150.03  -53.60  -19.86   36.29  369.55 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -78.3326    12.4446  -6.294 4.17e-10 ***
## dat.regStd$Temperature   1.3214     0.2344   5.638 2.10e-08 ***
## dat.regStd$hours         7.7719     0.3013  25.793  < 2e-16 ***
## dat.regStd$Wind.Speed    0.7577     0.3254   2.329     0.02 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 76.52 on 1344 degrees of freedom
## Multiple R-squared:  0.3474, Adjusted R-squared:  0.3459 
## F-statistic: 238.5 on 3 and 1344 DF,  p-value: < 2.2e-16
#VIF Scores
vif(regstd.car2)
## dat.regStd$Temperature       dat.regStd$hours  dat.regStd$Wind.Speed 
##               1.009241               1.002105               1.008585
#Regression Plots
plot(regstd.car2, which = c(2,3,4))