# 9/16/05 # R Script for combining "Single Response", "Multiple Response" # and "Text Response" files. These files are collected by a QSurvey # survey on a Zope server (i.e. http://web2survey.unt.edu/ ) # # Download these files from your Zope RESULTS folder into a # directory on your local C: drive in a folder called c:\temp # Tutorials on using QSurvey and downloading data from collected # survey results can be found at(review AVI video #9): # # http://www.unt.edu/benchmarks/archives/2005/june05/rss.htm # # Download three types of data sets: single response, multiple # response and text response. # e.g. c:\temp\asTsvSingle # c:\temp\asTsvMultiple # c:\temp\asTsvText # # These files are subsequently called: # # "mdata.part1" - single response "forced choice & other" data # "mdata.part2" - multiple response data # "mdata.part3" - single response text data mdata.part1<-read.delim("c:/temp/asTsvSingle",quote="") mdata.part2<-read.delim("c:/temp/asTsvMultiple",quote="") mdata.part3<-read.delim("c:/temp/asTsvText",quote="") # Break apart the string sequence which contains "id" information id.matrix1<-matrix(unlist(strsplit(as.character(mdata.part1$id), "_")), ncol=8, byrow=T) id.matrix2<-matrix(unlist(strsplit(as.character(mdata.part2$id), "_")), ncol=8, byrow=T) id.matrix3<-matrix(unlist(strsplit(as.character(mdata.part3$id), "_")), ncol=8, byrow=T) # Calculate the number of rows for each data type id.number1<-1:nrow(id.matrix1) id.number2<-1:nrow(id.matrix2) id.number3<-1:nrow(id.matrix3) # Combine the "exploded" string information with the original data # into one data.frame mdata.text.to.combine.dat<-data.frame(cbind(id.number3, id.matrix3, mdata.part3)) # Rename three of the columns names(mdata.text.to.combine.dat)[c(1,9,10)]<-c("rec.num","id","full.id") mdata.single.to.combine.dat<-data.frame(cbind(id.number1, id.matrix1, mdata.part1)) # Rename three of the columns names(mdata.single.to.combine.dat)[c(1,9,10)]<-c("rec.num","id","full.id") # Create two seperate data.frames for frequency tabulation for # Single response and Multiple response data # First, prepare Single response data for tabulation mdata.single.dat<-data.frame(cbind(id.number1, id.matrix1, mdata.part1)) col.size<-ncol(mdata.single.dat) mdata.single.dat<-mdata.single.dat[, c(1, 9, 10, 12:col.size)] names(mdata.single.dat)[1:3]<-c("rec.num","id","full.id") # Now prepare Multiple response data mdata.multiple.dat<-data.frame(cbind(id.number2, id.matrix2, mdata.part2)) mdata.multiple.dat<-mdata.multiple.dat[, c(1, 9, 10, 11, 12)] names(mdata.multiple.dat)<-c("rec.num", "id", "full.id", "page.question", "response") # Calculate information that is needed for unstacking multiple # response data - will need to unstack before we tabulate mdata.ids<-as.vector(unique(mdata.multiple.dat$id)) mdata.id.split<-split(as.character(mdata.multiple.dat$id), f=as.character(mdata.multiple.dat$id)) # Calculate a sequence for a set of records within each "id" # We will need this to unstack the data into unique variable names mdata.id.seq<-lapply(mdata.id.split, seq) # Now stack the sequences (that were just created) - this will # range from 1 to the number of records for a particular "id" value. # Again, we will need this variable to unstack the multiple response data temp<-0 for (i in 1:length(mdata.id.seq)){ temp<-c(temp,mdata.id.seq[[mdata.ids[i]]]) } # Strip off temp value from top of the sequence column (i.e. zero) col.seq.ids<-temp[2:length(temp)] attach(mdata.multiple.dat) # Combine the question type info (e.g. "P1q2") with the actual response # into one column. This will save ALOT of space in the final # merged data.frame! We can tabulate the type of question and its # response together as a single "response type" response.type<-paste(mdata.multiple.dat$page.question, mdata.multiple.dat$response) # Create a new data.frame with renamed columns. mdata.mult.dat<-data.frame(col.seq.ids, id, full.id, response.type) # Finally!! Now unstack multiple response data into a "wide" format mdata.mult<-reshape(mdata.mult.dat, timevar=c("col.seq.ids"), idvar=c("id","full.id"), direction="wide") # Rename the row names row.names(mdata.mult)<-1:nrow(mdata.mult) # Now what we've all been waiting for.... # Print out frequency tables for multiple response data ncol.mdata.mult<-ncol(mdata.mult) freq.response.type<-unlist(apply(mdata.mult[,2:ncol.mdata.mult], 2, table)) freq.response.type.df<-data.frame(freq.response.type) names(freq.response.type.df)<-c("freq.response.type") freq.response.type.df # Print out frequency tables for single response data table.freq.single<-unlist(apply(mdata.single.dat[,3:ncol(mdata.single.dat)], 2, table)) table.freq.single.df<-data.frame(table.freq.single) table.freq.single.df #### In this next section we merge all three types of #### data (i.e. single response, multiple response, and text) #### into one data.frame for export into an .xls format # Check to make sure records match up for each of the three records head(mdata.single.to.combine.dat, n=1) head(mdata.mult, n=1) head(mdata.text.to.combine.dat, n=1) # Merge "Single Response" and "Multiple Response" records together by # variable "id" mdata.combined.s.m.dat<-merge(mdata.single.to.combine.dat, mdata.mult, by=c("id","full.id"), all=TRUE, sort=FALSE) # Merge previous merged file (single and multiple) with "Text Response" # records # Duplicate columns from the merged files will have variable names appended # with ".x" and ".y" mdata.combined.s.m.t.dat<-merge(mdata.combined.s.m.dat, mdata.text.to.combine.dat, by=c("id","full.id"), all=TRUE, sort=FALSE) ############ Clean the data.frame up a bit # Clean leading and trailing spaces trim<- function(s){ s <- sub("^ +", "", s) s <- sub(" +$", "", s) s} # This function will examine each cell and trim leading and trailing spaces clean.data<-function(data){ x<-data data<-if(is.character(data)){ x<-trim(x) return(x)} data<-if(!is.character(data)){return(x)} } # Apply the clean.data function to the data mdata<-data.frame(apply(mdata.combined.s.m.t.dat, c(1,2), clean.data)) # Finished processing data. # Write file out to a "tab-delimited" file with variable names on line 1 write.table(mdata, "c:/temp/mdata.txt", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE, na="")