################################################################## # A Simulation to demonstrate an Item Response Theory Model - 1 factor. # Binary items are simulated according to underlying continuous normal # trait (with response thresholds). Comparisons of 1-factor IRT to Normal # Theory Factor Analysis based on Polychoric and Pearson correlations # are performed. # # Note: Comment sections are preceded by a pound sign # Set seed so we can repeat same simulation each time # simulation is run runif(1) save.seed<-.Random.seed set.seed(save.seed) # Number of subjects n<-1000 # Create ability scores for n individuals # Assuming N(0,1) f1<-rnorm(n) # Create n response thresholds (t) on each of 6 items (t1-t6) # for n individuals assuming N(0,1) for t1-t6. # # Each item has a mean threshold across individuals of mean=0, sd=1 t1<-rnorm(n) t2<-rnorm(n) t3<-rnorm(n) t4<-rnorm(n) t5<-rnorm(n) t6<-rnorm(n) # Convert t1-t6 thresholds to values between 0 and 1 using function "pnorm" # We will call these # The average p(t)=.50, so the average threshold is .50 across items. t1.p<-pnorm(t1) t2.p<-pnorm(t2) t3.p<-pnorm(t3) t4.p<-pnorm(t4) t5.p<-pnorm(t5) t6.p<-pnorm(t6) # Create a vector for probability of ability values for a specific set of # item characteristics: based on n ability scores (f1), # for an item with a given discrimination (b) and item difficulty (a) # Create a function that has calling arguments b=discrimination coefficient; # a=item difficulty coefficient; and f1=ability (vector of n values) prob.ability<-function(a,b,f1){ # Curly brace to begin function # Comments first: # # b=discrimination coefficient; a=item difficulty coefficient (intercept); # f1=vector on n ability values N(0,1); Z is the amount of trait measured on # an individual with trait f with item error e, as measured by an item whose # parameters are a, b; p(Z) is the probability of the trait value on an # individual with trait f, as measured by item with paramters a, b. # e (error) is based on relation lamda_error = sqrt(1-lamda^2) # and lambda = (b/1.701)/sqrt(1+(b/1.701)^2) e<-rnorm(length(f1),0, sqrt(1-((b/1.701)/sqrt(1+(b/1.701)^2))^2)) Z<-b*f1+a+e prob.f1<-1/(1+exp(-Z)) # return a vector: probability of ability for n individuals # for a specified set of item characteristics: a,b return(prob.f1) return(prob.f1) } # Curl brace to end function # Plot Item Response Curves probs<-prob.ability(a=.10, b=.6, f1) plot(f1, probs) probs<-prob.ability(a=.20, b=.6, f1) plot(f1, probs) probs<-prob.ability(a=.30, b=.6, f1) plot(f1, probs) probs<-prob.ability(a=.40, b=.6, f1) plot(f1, probs) probs<-prob.ability(a=.50, b=.6, f1) plot(f1, probs) probs<-prob.ability(a=.60, b=.6, f1) plot(f1, probs) # Create observed responses from underlying ability and threshold: # # Compare ability level p(f), which is between 0 and 1, with the threshold p(t), # which is between 0 and 1. If p(f1)<p(t)then return # 0 - incorrect response; For example, if p(f1)>p(t1) then return 1 (correct # response). If p(f1)<p(t1), return 0 (incorrect response). # # The difficulty parameter a represents the probability of correct answer # across individuals. We allow the difficulty to increase across items from # average y1 = .10 to average y6 = .60. The parameter b (discrimination) # represents the slope of the ogive curve (the the factor curve). y1<-ifelse(prob.ability(b=.6, a=.10, f1) < t1.p , 0 ,1) y2<-ifelse(prob.ability(b=.6, a=.20, f1) < t2.p , 0 ,1) y3<-ifelse(prob.ability(b=.6, a=.30, f1) < t3.p , 0 ,1) y4<-ifelse(prob.ability(b=.6, a=.40, f1) < t4.p , 0 ,1) y5<-ifelse(prob.ability(b=.6, a=.50, f1) < t5.p , 0 ,1) y6<-ifelse(prob.ability(b=.6, a=.60, f1) < t6.p , 0 ,1) # Combine data into a dataframe f1.data<-data.frame(cbind(y1, y2, y3, y4, y5, y6)) head(f1.data) tail(f1.data) # Perform a 1-factor latent variable analysis according to # Item Response Theory # Estimate model library(ltm) # discrimination allowed to vary f1.data.ltm.fit<-ltm(f1.data~z1, IRT.param=FALSE) # discrimination assumed to be equal f1.data.rasch.fit<-rasch(f1.data, IRT.param=FALSE) # Summarize model and plot item characteristic curves # Two parameter IRT summary(f1.data.ltm.fit) plot(f1.data.ltm.fit) # One parameter IRT (e.g. Rasch) summary(f1.data.rasch.fit) GoF.rasch(f1.data.rasch.fit) plot(f1.data.rasch.fit) # Estimate and print out ability scores for n respondents f1.data.ltm.scores<-factor.scores(f1.data.ltm.fit, IRT.param=FALSE) f1.data.rasch.scores<-factor.scores(f1.data.rasch.fit, IRT.param=FALSE) head(f1.data.ltm.scores) head(f1.data.rasch.scores) ######################################################################## # # Begin Section on Polychoric and Pearson Correlation Factor Analysis # Calculate correlation matrix of 6 variables f1.cor<-cor(f1.data) f1.cor # Calculate polychoric correlation matrix # Create ordinal data from numeric data f1.data2<-f1.data f1.data2$y1<-as.factor(f1.data$y1) f1.data2$y2<-as.factor(f1.data$y2) f1.data2$y3<-as.factor(f1.data$y3) f1.data2$y4<-as.factor(f1.data$y4) f1.data2$y5<-as.factor(f1.data$y5) f1.data2$y6<-as.factor(f1.data$y6) library(polycor) f1.poly.cor<-hetcor(f1.data2, bins=4) f1.poly.cor f1.poly.cor<-f1.poly.cor$correlations f1.poly.cor # Create lower triangular matrix from correlation matrix # Pearson f1.cor[upper.tri(f1.cor)] <- 0 f1.cor # Polychoric f1.poly.cor[upper.tri(f1.poly.cor)] <- 0 f1.poly.cor # Run Linear Common Factor Model (Maximumlikelihood Factor Analysis) # on the binary items - 1 factor - First Pearson, Then Polychoric # Loadings based on Pearson correlation f1.fact<-factanal(f1.data, factors=1, scores="regression") f1.fact # Factor scores based on Pearson correlation head(f1.fact$scores) tail(f1.fact$scores) # Loadings based on Polychoric correlation f1.poly.fact<-factanal(covmat=f1.poly.cor, factors=1) f1.poly.fact ########################################################## # # Calculate Cronbach's Alpha and compare to Omega Coefficient # Based on Pearson correlation library(psy) cronbach(f1.data) # Calculate Omega Coefficient PartA<-(sum(f1.fact$loadings))^2 PartB<-sum(1-f1.fact$loadings^2) omega.pearson<-PartA/(PartA+PartB) omega.pearson ########################################################### # Based on Polychoric correlation # # Calculate Omega Coefficient PartA<-(sum(f1.poly.fact$loadings))^2 PartB<-sum(1-f1.poly.fact$loadings^2) omega.poly<-PartA/(PartA+PartB) omega.poly ########################################################################## # # Perform a confirmatory factor analysis with structural # equation modeling package # # Confirmatory factor analysis (CFA) - 1 factor # # Based on Pearson correlation # # # Load library library(sem) # Comment Section: # # Factor Path for Items (e.g. item 1 on factor 1) # | # | Arbitrary Path Loading Name # | | # | | Fix variance of path loading # | | NA means not used # Model Setup: | | | # V V V model.f1<-matrix(c( 'F1->y1', 'lam1',1, 'F1->y2', 'lam2',NA, 'F1->y3', 'lam3',NA, 'F1->y4', 'lam4',NA, 'F1->y5', 'lam5',NA, 'F1->y6', 'lam6',NA, 'y1<->y1', 'th1' ,NA, 'y2<->y2', 'th2' ,NA, 'y3<->y3', 'th3' ,NA, 'y4<->y4', 'th4' ,NA, 'y5<->y5', 'th5' ,NA, 'y6<->y6', 'th6' ,NA, 'F1<->F1', NA , 1), ncol=3, byrow=T) # Print the model that is to be estimated: model.f1 # Names for Items obs.vars.f1<-c('y1','y2','y3','y4','y5','y6') # Estimate the Confirmatory Factor Analysis (CFA) # for the binary items using Maximum Likelihood factor analysis # Based on f1.cor - Pearson sem.pearson.f1<-sem(model.f1, f1.cor, 1000, obs.vars.f1) # summarize model summary(sem.pearson.f1) ######################################################################### ########################################################################## # # Perform a confirmatory factor analysis with structural # equation modeling package # # Confirmatory factor analysis (CFA) - 1 factor # # Based on Polychoric correlation # # Comment Section: # # Factor Path for Items (e.g. item 1 on factor 1) # | # | Arbitrary Path Loading Name # | | # | | Fix variance of path loading # | | NA means not used # Model Setup: | | | # V V V model.f1<-matrix(c( 'F1->y1', 'lam1',1, 'F1->y2', 'lam2',NA, 'F1->y3', 'lam3',NA, 'F1->y4', 'lam4',NA, 'F1->y5', 'lam5',NA, 'F1->y6', 'lam6',NA, 'y1<->y1', 'th1' ,NA, 'y2<->y2', 'th2' ,NA, 'y3<->y3', 'th3' ,NA, 'y4<->y4', 'th4' ,NA, 'y5<->y5', 'th5' ,NA, 'y6<->y6', 'th6' ,NA, 'F1<->F1', NA , 1), ncol=3, byrow=T) # Print the model that is to be estimated: model.f1 # Names for Items obs.vars.f1<-c('y1','y2','y3','y4','y5','y6') # Estimate the Confirmatory Factor Analysis (CFA) # for the binary items using Maximum Likelihood factor analysis # Based on f1.poly.cor - Polychoric sem.poly.f1<-sem(model.f1, f1.poly.cor, 1000, obs.vars.f1) # summarize model summary(sem.poly.f1) ################################################### ############ Summary ################### ################################################### # # Summarize all three approaches: 1) Normal FA on pearson cor. from binary # 2) Normal FA on polychoric cor. from binary # 3) 2-parameter IRT model from binary # # Note: Coefficients in the IRT parameterization, if divided # by D=1.701, give coefficient values that can be used in # the formula below, to calculate the "factor loading" # values that would be obtained by performing a linear 1-factor # common trait model on the "tetrachoric" correlation matrix f (and # converting back the other direction) # # IRT ---> FA (b/1.701)/sqrt(1+(b/1.701)^2) # # example: (.5189/1.701)/sqrt(1+(.5189/1.701)^2) = 0.29178 (based on 2 par IRT) # # A Factor Analysis on the tetrachoric correlation matrix gives # a lamda estimate of = 0.30593 # # FA ---> IRT (lambda/sqrt(1-lambda^2))* 1.701 # # example: example: (.30593/sqrt(1-.30593^2)) * 1.701 = 0.5466 # # A two parameter IRT analysis gives an IRT estimate = 0.5189 (2 parm. IRT est.) # # # Linear 1-factor common trait model (pearson & polychoric) summary(sem.pearson.f1) summary(sem.poly.f1) # IRT 1 and 2 parameter models summary(f1.data.ltm.fit) summary(f1.data.rasch.fit)