New Page 1

# # The following is an example of changing up an # existing function in a package to tailor the # function to our particular goals. # # We change the scree.plot function in package "psy" to # use a bootstrap resampling algorithm rather than a # monte carlo simulation for the screeplot option "sim". # A matrix of values that we want to resample from x<-matrix(1:100, ncol=10, byrow=T) x # Create a sequence of randomly sampled values (with replacement) # from 1 to number of rows. These will serve as an index to subset # the original matrix index<-sample(1:10, 10, replace=T) index # Pull out the subset of rows...notice that we maintain # columns for every row. This is equivalent to resampling # under H1 (alternate distribution). This is equivalent to # bootstrapping a correlation according to Efron's original scheme. x[index, ] # Now we want to look at the "psy" library and in particular the # scree.plot function. We want to replace the random matrices in which # the scree plot is based on, with bootstrap estimates. library(psy) # To see the function code, type the scree.plot with the "paren's" scree.plot # This is what we see: # # > scree.plot # function (namefile, title = "Scree Plot", type = "R", use = "complete.obs", # simu = "F") #{ # mat <- namefile # if (use == "complete.obs") # mat <- na.omit(namefile) # if (type == "R") # eigenval <- eigen(cor(mat, use = "pairwise.complete.obs"), # symmetric = TRUE)$values # if (type == "V") # eigenval <- eigen(cov(mat, use = "pairwise.complete.obs"), # symmetric = TRUE)$values # if (type == "E") # eigenval <- namefile # if (type == "M") # eigenval <- eigen(namefile, symmetric = TRUE)$values # nev <- length(eigenval) # plot(eigenval, type = "b", pch = 16, bty = "o", main = title, # xlab = "Dimension", ylab = "Eigenvalue") # lines(c(1, nev), c(1, 1), lty = 2) # if (is.numeric(simu) && (type == "R")) { # n <- dim(mat)[1] # p <- dim(mat)[2] # matsimu <- matrix(nrow = n, ncol = p) # int <- rep(1, n * p) # attr(int, "dim") <- c(n, p) # mat <- pmax(as.matrix(mat), int) # for (i in 1:simu) { # matnorm <- rnorm(n * p) # attr(matnorm, "dim") <- c(n, p) # matsimu <- (mat/mat) * matnorm # eigenval <- eigen(cor(matsimu, use = "pairwise.complete.obs"))$values # points(eigenval, type = "l") # } # } #} ###################################### # # Comments: # # Notice the 8th line from the bottom: # # matnorm <- rnorm(n * p) # # Our original data matrix is "mat". # This line needs to be replaced with: # # matnorm<-mat[sample(1:n, n, replace=T), ] # # This allows the matrix to be bootstrapped rows, not random # data from a normal distribution. ######### Creating the new function # First we create a local copy of the scree.plot function scree.plot.boot<-scree.plot # For interactive users (local R session): # Now we edit the function scree.plot.boot...make the change, save and exit. # # scree.plot.boot<-edit(scree.plot.boot) # For batch submission (through the Rweb interface - not interactive): # Create the function "scree.plot.boot": scree.plot.boot<-function (namefile, title = "Scree Plot", type = "R", use = "complete.obs", simu = "F") { mat <- namefile if (use == "complete.obs") mat <- na.omit(namefile) if (type == "R") eigenval <- eigen(cor(mat, use = "pairwise.complete.obs"), symmetric = TRUE)$values if (type == "V") eigenval <- eigen(cov(mat, use = "pairwise.complete.obs"), symmetric = TRUE)$values if (type == "E") eigenval <- namefile if (type == "M") eigenval <- eigen(namefile, symmetric = TRUE)$values nev <- length(eigenval) plot(eigenval, type = "b", pch = 16, bty = "o", main = title, xlab = "Dimension", ylab = "Eigenvalue") lines(c(1, nev), c(1, 1), lty = 2) if (is.numeric(simu) && (type == "R")) { n <- dim(mat)[1] p <- dim(mat)[2] matsimu <- matrix(nrow = n, ncol = p) int <- rep(1, n * p) attr(int, "dim") <- c(n, p) mat <- pmax(as.matrix(mat), int) for (i in 1:simu) { matnorm<-mat[sample(1:n, n, replace=T), ] attr(matnorm, "dim") <- c(n, p) matsimu <- (mat/mat) * matnorm eigenval <- eigen(cor(matsimu, use = "pairwise.complete.obs"))$values points(eigenval, type = "l") } } } # Check to see if change was made: # # > scree.plot.boot # # . # . # for (i in 1:simu) { # matnorm<-mat[sample(1:n, n, replace=T), ] # attr(matnorm, "dim") <- c(n, p) # matsimu <- (mat/mat) * matnorm # eigenval <- eigen(cor(matsimu, use = "pairwise.complete.obs"))$values # points(eigenval, type = "l") # . # . # Ok it was ...now we are good to go. # Run example from help, but with our new function, # and compare with the original par(mfrow=c(2,1)) data(expsy) # Random data scree.plot(expsy[,1:10],simu=20,use="P") # Bootstrap scree.plot.boot(expsy[,1:10],simu=20,use="P") # Final Note: # # This is the power of an "open source" approach: # 1) We can see the code (algorithm) that was used # to create our analysis/graphs # 2) With a small amount of effort we can change up the # the code to suit our own needs # # POWER TO THE PEOPLE!