# # # Please participate in the R&SS Client Feedback Survey. # https://unt.az1.qualtrics.com/SE/?SID=SV_diLLVU9iuJZN8C9 # # ########## Regression Diagnostics with the "gvlma" package ########## # # The 'gvlma' stands for "Global Validation of Linear Models Assumptions" # # First, import the data from the web. df.1 <- read.table("http://bayes.acs.unt.edu:8083:8083/BayesContent/class/Jon/R_SC/Module6/df.1.txt", header=TRUE, sep=",", na.strings="NA", dec=".", strip.white=TRUE) summary(df.1) # Next, create a subset of the variables of interest (checking the correlations for multicolinearity). subset.1 <- df.1[,6:12] cor(subset.1) pairs(subset.1) rm(subset.1) # Using Bayesian Model Averaging to select the best predictors for a linear model. library(BMA) predictors <- as.matrix(df.1[,7:12]) bma.1 <- bicreg(predictors, df.1$y) summary(bma.1) # Detach all the packages required for BMA. detach("package:BMA") detach("package:leaps") detach("package:survival") detach("package:splines") rm(predictors, bma.1) # Fit some models to reinforce the fact that "model 1" from the BMA was the best (only included # x1, x2, x3). mod.4 <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6, df.1) summary(mod.4) mod.3 <- lm(y ~ x1 + x2 + x3 + x4 + x5, df.1) summary(mod.3) mod.2 <- lm(y ~ x1 + x2 + x3 + x4, df.1) summary(mod.2) mod.1 <- lm(y ~ x1 + x2 + x3, df.1) summary(mod.1) # Check the standardized coefficients. library(QuantPsyc) lm.beta(mod.4) lm.beta(mod.3) lm.beta(mod.2) lm.beta(mod.1) # Remove unnecessary models and the QuantPsyc package. rm(mod.4, mod.3, mod.2) detach("package:QuantPsyc") ######## Conduct tests of Linear Model assumptions. library(gvlma) global.test <- gvlma(mod.1) summary(global.test) plot(global.test) detach("package:gvlma") rm(global.test) # Traditional diagnostic plots. oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2)) plot(mod.1) par(oldpar) ###### See also: http://www.unt.edu/benchmarks/archives/2008/june08/rss.htm # Miscellaneous; plots / graphs below. library(car) scatterplot(df.1$y, df.1$x1) scatterplotMatrix(df.1[,6:12]) hist(df.1$y, col = "lightgreen", prob = TRUE) lines(density(df.1$y), col = "blue") hist(df.1$x1, col = "lightgreen", prob = TRUE) lines(density(df.1$x1), col = "blue") hist(df.1$x5, col = "lightgray", prob = TRUE) lines(density(df.1$x5), col = "red") hist(df.1$x6, col = "lightgray", prob = TRUE) lines(density(df.1$x6), col = "red") # # Please participate in the R&SS Client Feedback Survey. # https://unt.az1.qualtrics.com/SE/?SID=SV_diLLVU9iuJZN8C9 # # End: Feb, 2011.