#################################################### # Monte Carlo Simulation and Parameter Estimation # Create data set by Simulation, initially fix # population values for x, but allow random # effect for pop. rho # Sample size for x and y n<-100 rho.pop<-.5 # Number of monte carlo replications replications<-100000 # Random effect on correlation yes=1; no=0 # Default is 0 random.effect.flag<-0 # Random effect for predictor - intially set to zero x.sigma<-0 # Draw unobserved random effect for i_th case - # initially set as fixed population (e.g. # mean = 0, sigma = 0). x.i<-rnorm(n, 0, x.sigma) # Draw n observed scores on x - intially set to be # fixed values in population (e.g. rnorm(n,0,1) ) x<-rnorm(n, x.i, 1) # If random.effect.flag=1, then draw population correlation # rho random effect i and set error term based on # identity error=sqrt(1-rho^2); otherwise keep rho constant # and sample error term from rnorm(n, 0, sqrt(1-rho^2)) if (random.effect.flag==1) { rho<-runif(n, -rho.pop, rho.pop) + rho.pop # Censor values - set to mean if rho is out of bounds rho<-ifelse(rho<(-1), -1*rho.pop, rho) rho<-ifelse(rho>1, rho.pop, rho) # Calculate error error.rho<-sqrt(1-rho^2)} else { rho<-rep(rho.pop,n) error.rho<-sqrt(1-rho^2) error.rho<-rnorm(n, 0, error.rho) } # Statistics of the sampled parameter values used to # create the sample values of x and y mean(rho) mean(error.rho) mean(rho) + mean(error.rho) sd(error.rho) # Assemble data for sample y<- rho*x + error.rho y.x.fit<-lm(scale(y)~scale(x)) plot(x,y) abline(y.x.fit) summary(y.x.fit) #### Monte Carlo Parameter Estimation # (fixed data, random parameters - bayesian version) # # Pick candidate slope coefficients with uniform # probability; these are candidate models that # can account for the observed "fixed data". # We do this many times - call these the Monte Carlo # replications. # # Note that individual y & x are fixed and do not vary. # # Candidate models predict observed y as: # # y_pred = rho.candidate[i] * x # # where rho.candiate[i] is a particular random model; # x are the fixed observed n data points and y_pred # is a good prediction about the n observed y values. # We rank order our candidate models by the # sum((y_i - y_pred_i)^2) as i varies from 1 to n. # Randomly select rho values distributed by unifrom distribution rho.candidate<-runif(replications,-1,1) # Weighted Residual Standard Error # Do this if not using weights (e.g. all weights are 1) # Default weights<-rep(1,n) # Do this if using weights that depend on residuals in regression if(random.effect.flag==1) { weights<-1/rho.candidate } squared.resid<-matrix(0, ncol=1, nrow=replications) for(i in 1:replications) { squared.resid[i]<-sum(weights[i]*(y - rho.candidate[i] * x) ^2) } rho.sum.squared.resid<-data.frame(rho.candidate, squared.resid) rankings<-order(rho.sum.squared.resid[,2]) rho.sum.squared.resid.sorted<-rho.sum.squared.resid[rankings,] # Display 6 top candidates top.values<-head(rho.sum.squared.resid.sorted[, ]) top.values mean(top.values) # Best model (e.g. rho) best.pred.rho<-rho.sum.squared.resid.sorted[1,1] best.pred.rho # Sum Squared Residuals for Best Model residual.var<-rho.sum.squared.resid.sorted[1,2] residual.var # Residual Standard Error: # Note: In the case of random effects for rho, the sum of squared # residuals of the best model (rho) is based on a weighted sum # where the weights are: 1/sqrt(1-rho^2). In the case of no random # effects we use weights equal to 1. resid.std.error<-sqrt(residual.var/(n-2)) resid.std.error