########################################################################## # A Simulation to demonstrate a Linear Common Factor Model - 2 orthogonal/oblique # factors # Note: Comment sections are preceded by a pound sign # Set seed so we can repeat same simulation each time # simulation is run set.seed(123456) # Number of subjects n<-1000 # Error of items # loadings (set 0 to 1) i11.loading<-.6 i21.loading<-.6 i31.loading<-.6 i42.loading<-.6 i52.loading<-.6 i62.loading<-.6 # Factor 1 item errors stdev11<-sqrt(1-i11.loading^2) stdev21<-sqrt(1-i21.loading^2) stdev31<-sqrt(1-i31.loading^2) # Factor 2 item errors stdev42<-sqrt(1-i42.loading^2) stdev52<-sqrt(1-i52.loading^2) stdev62<-sqrt(1-i62.loading^2) # Grandmean isn't necessary...just included to make comparison # to ANOVA model grandmean<-3 # Simulate the common attribute value (latent trait or common factor) # for n subjects. Drawn from normal distribution with mean zero and # stdev equal to one. Draw two different set of scores - one set # for factor 1 scores, one set for factor 2 scores. Notice that these # factor scores are independent. # Independent latent trait scores for individuals # Factor 1 scores drawn independently from f2 f1<-rnorm(n,0,1) # Factor 2 scores drawn independently from f1 f2<-rnorm(n,0,1) cor.factors<-.5 Sigma<-matrix(c(1,cor.factors,cor.factors,1),ncol=2) f.mat<-mvrnorm(1000*2,rep(0,2),Sigma,empirical=TRUE) cor(f.mat) f.df<-data.frame(f.mat) head(f.df) # Comment section: # Calculate response for each of 6 items (y1-y6) on n subjects # Produces a 1000 subjects by 6 items matrix # # Outline of the Model: # __ __ # grandmean + item_difficulty |+ (factor1_loading)*(f1_common_attribute)| + error_item # |+ (factor2_loading)*(f2_common_attribute)| # |__ _| # # Create Data Array (1000 subjects by 6 items # Items 1-3 measure Factor 1 ('0*f2' is just a place-holder..really # isn't necessary) y1<- grandmean + .1 + i11.loading*f.df$X1 + 0*f.df$X2 + rnorm(n, 0, stdev11) y2<- grandmean + .2 + i21.loading*f.df$X1 + 0*f.df$X2 + rnorm(n, 0, stdev21) y3<- grandmean + .3 + i31.loading*f.df$X1 + 0*f.df$X2 + rnorm(n, 0, stdev31) # Items 4-6 measure Factor 2 ('0*f1' is place-holder) y4<- grandmean + .4 + 0*f.df$X1 + i42.loading*f.df$X2 + rnorm(n, 0, stdev42) y5<- grandmean + .5 + 0*f.df$X1 + i52.loading*f.df$X2 + rnorm(n, 0, stdev52) y6<- grandmean + .6 + 0*f.df$X1 + i62.loading*f.df$X2 + rnorm(n, 0, stdev62) # Convert matrix to data.frame f2.data<-data.frame(cbind(y1, y2, y3, y4, y5, y6)) f2.data<-data.frame(cbind(trunc(y1), trunc(y2), trunc(y3), trunc(y4), trunc(y5), trunc(y6))) f2.data$X1<-ifelse(f2.data$X1>7,7,f2.data$X1) f2.data$X1<-ifelse(f2.data$X1<=0,1,f2.data$X1) f2.data$X2<-ifelse(f2.data$X2>7,7,f2.data$X2) f2.data$X2<-ifelse(f2.data$X2<=0,1,f2.data$X2) f2.data$X3<-ifelse(f2.data$X3>7,7,f2.data$X3) f2.data$X3<-ifelse(f2.data$X3<=0,1,f2.data$X3) f2.data$X4<-ifelse(f2.data$X4>7,7,f2.data$X4) f2.data$X4<-ifelse(f2.data$X4<=0,1,f2.data$X4) f2.data$X5<-ifelse(f2.data$X5>7,7,f2.data$X5) f2.data$X5<-ifelse(f2.data$X5<=0,1,f2.data$X5) f2.data$X6<-ifelse(f2.data$X6>7,7,f2.data$X6) f2.data$X6<-ifelse(f2.data$X6<=0,1,f2.data$X6) # Calculate covariance and correlation matrix of 6 variables library(corpcor) #f2.cov<-cov.shrink(f2.data) #f2.cov #f2.cor<-cor.shrink(f2.data) #f2.cor f2.cov<-cov(f2.data) f2.cov f2.cor<-cor(f2.data) f2.cor # Run Maximum Likelihood Factor Analysis - 2 Factors library(GPArotation) # Try rotation with "varimax" and "promax" f2.fact<-factanal(f2.data, factors=2, rotation="oblimin",scores=c("regression")) f2.fact library(psych) f2.fact.omega<-omega(f2.data,nfactors=2) f2.fact.omega$schmid # Extract factor loadings f2.fact$loadings ########################################################## # # Calculate Cronbach's Alpha and compare to Omega Coefficient library(psy) cronbach(f2.data) # Calculate Omega Coefficient PartA<-(sum(f2.fact$loadings))^2 PartB<-sum(1-f2.fact$loadings^2) omega<-PartA/(PartA+PartB) omega ########################################################### # # Perform Principal Component Extraction (note lower loadings) # Extract component loadings (eigenvectors) f2.comp<-princomp(f2.data, cor=T) f2.comp$loadings ########################################################################## # # Perform a confirmatory factor analysis with structural # equation modeling package # # Confirmatory factor analysis (CFA) - 2 factors # # Load library for structural equation modeling library(sem) # Calculate correlation matrix f2.cor<-cor(f2.data) # Create lower triangular matrix from correlation matrix f2.cor[upper.tri(f2.cor)] <- 0 f2.cor # Comment Section: # Arbitrary Path Loading Name # Factor Path for Items | Item 1 on Factor 1 to Item 6 on Factor 2 # | | # | | Fix variance of path loading # Model Setup: | | | NA means not used # V V V library(sem) model.f2<-matrix(c('F1->y1' , 'lam11' , 1, 'F1->y2' , 'lam21' ,NA, 'F1->y3' , 'lam31' ,NA, 'F2->y4' , 'lam42' , 1, 'F2->y5' , 'lam52' ,NA, 'F2->y6' , 'lam62' ,NA, 'y1<->y1', 'th11' ,NA, 'y2<->y2', 'th21' ,NA, 'y3<->y3', 'th31' ,NA, 'y4<->y4', 'th42' ,NA, 'y5<->y5', 'th52' ,NA, 'y6<->y6', 'th62' ,NA, 'F1<->F2', 'gamma',NA, 'F1<->F1', NA , 1, 'F2<->F2', NA , 1), ncol=3, byrow=T) # Examine the model structure model.f2 # Names for Items obs.vars.f2<-c('y1','y2','y3','y4','y5','y6') # Estimate Model sem.f2<-sem(model.f2, f2.cor, 1000, obs.vars.f2) # summarize model summary(sem.f2) # Modification indices: # # Modification indices that are (rule-of-thumb) larger than 3.84 # (sig. level Chisquare df=1), can be used to identify modifications # (additions) to the path structure that will improve the model fit # (smaller chisquare value...larger p value. The 'A' matrix represents # regression paths: x:y represents y->x . The 'P' matrix represents # covariance paths: x:y represents x<->y . mod.indices(sem.f2) # Creating a path diagram - Use program 'dot.ext' from the following public # domain graphing package: # # http://www.graphviz.org/pub/graphviz/ARCHIVE/graphviz-2.2.1.exe # # Save the output from 'path.diagram(sem.f2)' into a text file, then # select this file using the 'dot.exe' program. path.diagram(sem.f2) ######## # # Example Output From library sem We want Chisq <.05 # | # V # Model Chisquare = 5.5491 Df = 9 Pr(>Chisq) = 0.78406 # Goodness-of-fit index = 0.99815 # # # We want fit to be >.90 # | # V # Adjusted goodness-of-fit index = 0.9957 # # We want RMSEA <.05 # | # V # RMSEA index = 0 90 % CI: (NA, 0.023808) # BIC = -72.747 # # # # Normalized Residuals # Min. 1st Qu. Median Mean 3rd Qu. Max. # -2.06e-01 -5.22e-05 7.74e-06 2.45e-01 3.15e-01 1.53e+00 # # # We want z-scores for path loadings sig. to be <.05 # | # Parameter Estimates V # Estimate Std Error z value Pr(>|z|) # # Path Loadings larger than about .30 are preferred # | # V # lam11 0.52858 0.043180 12.241 0 y1 <--- F1 # lam21 0.54602 0.043846 12.453 0 y2 <--- F1 # lam31 0.61105 0.046421 13.163 0 y3 <--- F1 # lam42 0.49235 0.043910 11.213 0 y4 <--- F2 # lam52 0.59407 0.048481 12.254 0 y5 <--- F2 # lam62 0.54803 0.046362 11.821 0 y6 <--- F2 # th11 0.72061 0.046495 15.498 0 y1 <--> y1 # th21 0.70186 0.047581 14.751 0 y2 <--> y2 # th31 0.62662 0.052817 11.864 0 y3 <--> y3 # th42 0.75759 0.046545 16.276 0 y4 <--> y4 # th52 0.64708 0.054716 11.826 0 y5 <--> y5 # th62 0.69967 0.050411 13.879 0 y6 <--> y6 # # Iterations = 19