# # # Please participate in the R&SS Client Feedback Survey. # https://unt.az1.qualtrics.com/SE/?SID=SV_diLLVU9iuJZN8C9 # # ############### Recoding Likert Variables as numeric. ############### ############ And a lesson in programming speed/efficiency. ########## # # Import some example data and extract a smaller / more manageable subset of it. orig.data <- read.table( "http://bayes.acs.unt.edu:8083:8083/BayesContent/class/Jon/R_SC/Module10/IntroPsych_Fall2011.txt", header=TRUE, sep=",", na.strings="NA", dec=".", strip.white=TRUE) subset.1 <- orig.data[1:400,]; rm(orig.data) summary(subset.1) ## The old -- SLOW -- way to recode Likert data as numeric. recoding.7.point <- function(data){ new.data <- data.frame(matrix(rep(0, nrow(data)*ncol(data)), ncol = ncol(data))) for (j in 1:ncol(data)){ for (i in 1:nrow(data)){ if(data[i,j] == "StronglyAgree"){ new.data[i,j] <- 1} if(data[i,j] == "Agree"){ new.data[i,j] <- 2} if(data[i,j] == "SomewhatAgree"){ new.data[i,j] <- 3} if(data[i,j] == "Neutral"){ new.data[i,j] <- 4} if(data[i,j] == "SomewhatDisagree"){new.data[i,j] <- 5} if(data[i,j] == "Disagree"){ new.data[i,j] <- 6} if(data[i,j] == "StronglyDisagree"){new.data[i,j] <- 7} } } names(new.data) <- names(data) return(new.data) } recoding.4.point <- function(data){ new.data <- data.frame(matrix(rep(0, nrow(data)*ncol(data)), ncol = ncol(data))) for (j in 1:ncol(data)){ for (i in 1:nrow(data)){ if(data[i,j] == "StronglyAgree"){ new.data[i,j] <- 1} if(data[i,j] == "Agree"){ new.data[i,j] <- 2} if(data[i,j] == "Disagree"){ new.data[i,j] <- 3} if(data[i,j] == "StronglyDisagree"){new.data[i,j] <- 4} } } names(new.data) <- names(data) return(new.data) } ## New -- FASTER -- way. recode.7 <- function(vector){ new.vec <- rep(0, length(vector)) c1 <- which(vector == "StronglyAgree"); new.vec[c1] <- 1 c2 <- which(vector == "Agree"); new.vec[c2] <- 2 c3 <- which(vector == "SomewhatAgree"); new.vec[c3] <- 3 c4 <- which(vector == "Neutral"); new.vec[c4] <- 4 c5 <- which(vector == "SomewhatDisagree"); new.vec[c5] <- 5 c6 <- which(vector == "Disagree"); new.vec[c6] <- 6 c7 <- which(vector == "StronlgyDisagree"); new.vec[c7] <- 7 return(new.vec) } recode.4 <- function(vector){ new.vec <- rep(0, length(vector)) c1 <- which(vector == "StronglyAgree"); new.vec[c1] <- 1 c2 <- which(vector == "Agree"); new.vec[c2] <- 2 c3 <- which(vector == "Disagree"); new.vec[c3] <- 3 c4 <- which(vector == "StronglyDisagree"); new.vec[c4] <- 4 return(new.vec) } # Appling the functions above and measuing how long they take to finish. # The time differences *seem* harmless here (n = 400), but with large # data the differences can be very meaningful (e.g., 15 minutes vs. 1 minute). system.time(old.7 <- recoding.7.point(subset.1[,8:12])) system.time(old.4 <- recoding.4.point(subset.1[,13:21])) system.time(new.7 <- apply(subset.1[,8:12], 2, recode.7)) system.time(new.4 <- apply(subset.1[,13:21], 2, recode.4)) # Merging the demographic variables with the newly recoded variables. subset.2 <- data.frame(subset.1[,1:7], new.7, new.4) summary(subset.2); rm(old.7, old.4, new.7, new.4) head(subset.1) head(subset.2) # These functions can easily be modified for other response sets or other numeric # codes. The examples below are not used here, simply listed so one can see # how to modify the functions for other data situations. recode.5 <- function(vector){ new.vec <- rep(0, length(vector)) c1 <- which(vector == "StronglyAgree"); new.vec[c1] <- 1 c2 <- which(vector == "Agree"); new.vec[c2] <- 2 c3 <- which(vector == "Neutral"); new.vec[c4] <- 3 c4 <- which(vector == "Disagree"); new.vec[c6] <- 4 c5 <- which(vector == "StronlgyDisagree"); new.vec[c7] <- 5 return(new.vec) } recode.5r <- function(vector){ new.vec <- rep(0, length(vector)) c1 <- which(vector == "StronglyAgree"); new.vec[c1] <- 5 c2 <- which(vector == "Agree"); new.vec[c2] <- 4 c3 <- which(vector == "Neutral"); new.vec[c4] <- 3 c4 <- which(vector == "Disagree"); new.vec[c6] <- 2 c5 <- which(vector == "StronlgyDisagree"); new.vec[c7] <- 1 return(new.vec) } # To apply the function(s) to a single variable: new.recoded.variable <- recode.5(old.variable) # Not to be run, just an example. # To apply the function to multiple variables of a data frame, you would need # to use the 'apply' function (as was done in the examples above) and you would # need to know which columns of the data you wanted to apply the function to. # Below, 'cols' would be assigned the columns you want recoded; the '2' dictates # that you want to apply the function to the columns of the data frame (rather # than '1' which would indicate you wanted to apply the function across the # rows of the data frame). new.recoded.data.frame <- apply(old.data.frame[,cols], 2, recode.5.r) # Not run. # For more information about the 'apply' function: help(apply) # Clean up the workspace. ls() rm(recode.4, recode.5, recode.5r, recode.7, recoding.4.point, recoding.7.point, subset.1, subset.2) ls() # # Please participate in the R&SS Client Feedback Survey. # https://unt.az1.qualtrics.com/SE/?SID=SV_diLLVU9iuJZN8C9 # # End script; March 7, 2012.