#For use when comparing the fit of nested models with complex data, # (e.g. TYPE = COMPLEX is mplus) # The Scaled Difference Chi-square Test Statistic can be found at #http://preprints.stat.ucla.edu/260/chisquare.pdf # This function provides scaled differences tests based on chi-square # and loglikihood values. # The notation below is taken from http://www.statmodel.com/chidiff.shtml #For the chi-square difference test (which is the default in this function), # the notation from #www.statmodel.com reads: # "d0 is the degrees of freedom in the nested model, #c0 is the scaling correction factor for the nested model, #d1 is the degrees of freedom in the comparison model, #and c1 is the scaling correction factor for the comparison model. #Be sure to use the correction factor given in the output for the H0 model. #... T0 and T1 are the MLM, MLR, #or WLSM chi-square values for the nested and comparison model, respectively." #For the difference test using logliklihood value (loglike=TRUE): #d0 and d1 are the number of parameters for the H0 and H1 models. #c0 and c1 are the scaling correction factors for the H0 and H1 models. # t0 and t1 are the loglikelihood values for the H0 and H1 models. scale.diff.test <- function (d0, d1, c0, c1, t0, t1, loglike=FALSE) { if(loglike) {cd.2<- (d0*c0 - d1*c1 ) / (d0 - d1) x<- t0 - t1 TRd.2<- -2*(x) / cd.2 df.2 <- abs(d0 - d1) p.2 <- pchisq(TRd.2, df.2) sig.2 <- 1 - p.2 cat ("scaled loglikelihood difference test for complex data:", "\n") cat ("value =", TRd.2) cat (" df=", df.2) cat (" sig. =", sig.2, "\n")} else {cd<- (d0*c0 - d1*c1 ) / (d0 - d1) TRd<-(t0*c0 - t1*c1) / cd df <- abs(d0 - d1) p <- pchisq(TRd, df) sig <- 1 - p cat ("scaled Chi-squared difference test for complex data:", "\n") cat ("value =", TRd) cat (" df=", df) cat (" sig. = ", sig, "\n") } } scale.diff.test (45, 43, 1.12, 1.123, 246.44, 237.846) scale.diff.test (39, 47, 1.45, 1.546, -2606, -2583, loglike=TRUE) scale.diff.test (162, 166, 1.088, 1.09, 777.043, 791.371) scale.diff.test (166, 162, 1.09, 1.088, 791.371, 777.043) scale.diff.test (77, 72, 1, 1, 250, 231)

Created by Pretty R at inside-R.org

Examples:

scale.diff.test (45, 43, 1.12, 1.123, 246.44, 237.846)

scale.diff.test (39, 47, 1.45, 1.546, -2606, -2583, loglike=TRUE)

Output:

scaled Chi-squared difference test for complex data:

value = 8.443147 df= 2 sig. = 0.01467553

scaled loglikelihood difference test for complex data:

value = 22.84012 df= 8 sig. = 0.003575695

]]>

library (MASS) covar<-mvrnorm(250, c(0, 0), matrix(c(1, 0.00, 0.00, 1), 2, 2)) mydata<-data.frame(covar) names(mydata)<-c("sat", "mot") mydata$admin<- mydata$sat + mydata$mot mydata$admin2<- ifelse (mydata$admin >=quantile(mydata$admin, .85), "pass", "fail") library(ggplot2) qplot(sat,mot, data = mydata, color = admin2)

Created by Pretty R at inside-R.org

]]>

Stirling<- function (n)((2*pi*n)^.5)*(n/exp(1))^n

Created by Pretty R at inside-R.org

]]>

]]>

Currently I have functions for a sorted by size summary table of the post matching mean differences:

meandifftable<- function (x){ post<-data.frame(x$sum.matched[4]) matchID <- as.vector (row.names (post) ) names(post)[1]<-c("m_mean_diff") post$absolute<- abs(post[1]) total2<-post[order (-post$absolute, na.last=NA) ,] meandiffover1<- subset(total2[1], total2[1]> .1 | total2[1]< -.1) meandiffover1 }

A graph function (histogram and density plot) for both pre-matched mean differences:

all_meandiffplot <- function (x) { adiff<-data.frame(x$sum.all) names(adiff)[4]<-c("all_mean_diff") diffplot<-ggplot(adiff, aes(all_mean_diff) ) diffplot<- diffplot+ geom_histogram (fill="grey") diffplot<- diffplot+ geom_density (colour="red") diffplot<-diffplot+xlim(-.5, .5) diffplot }

and after matching mean differences:

matched_meandiffplot <- function (x) { mdiff<-data.frame(x$sum.matched) names(mdiff)[4]<-c("matched_mean_diff") diffplot<-ggplot(mdiff, aes(matched_mean_diff) ) diffplot<- diffplot+ geom_histogram (fill="grey") diffplot<- diffplot+ geom_density (colour="red") diffplot<-diffplot+xlim(-.5, .5) diffplot }

Not that both plots are on a scale from -.5 to .5 so that they can easily be compared.

Finally I have a set of simple tables which indicate how many “large” (>.25), “medium” (>.20) and “small” (<.20) standardized mean differences there were before matching:

all_meandiffcount<-function (x){ all<-data.frame(x$sum.all[4]) all$all_group[all[1] > .25]<- "Large" all$all_group[all[1] < -.25] <- "Large" all$all_group[all[1] > .20 & all[1] < .25 ] <- "Medium" all$all_group[all[1] < -.20 & all[1] > -.25] <- "Medium" all$all_group[all[1] < .20 & all[1] > .00]<- "Small" all$all_group[all[1] > -.20 & all[1] < .00] <- "Small" table(all$all_group) }

and after matching:

matched_meandiffcount<-function (x){ matched<-data.frame(x$sum.matched[4]) matched$matched_group[matched[1] > .25]<- "Large" matched$matched_group[matched[1] < -.25] <- "Large" matched$matched_group[matched[1] > .20 & matched[1] < .25 ] <- "Medium" matched$matched_group[matched[1] < -.20 & matched[1] > -.25] <- "Medium" matched$matched_group[matched[1] < .20 & matched[1] > .00]<- "Small" matched$matched_group[matched[1] > -.20 & matched[1] < .00] <- "Small" table(matched$matched_group) }

]]>

First I wrote a function to produce a histogram of the matched mean differences which gives a nice summary of how well the matching procedure has achieved balance (note you need to install the package ggplot2 to use this function):

meandiffplot <- function (x) { mdiff<-data.frame(x$sum.matched) names(mdiff)[4]<-c("m_mean_diff") diffplot<-ggplot(mdiff, aes(m_mean_diff) ) diffplot<- diffplot+ geom_histogram (fill="grey") diffplot<- diffplot+ geom_density (colour="red") diffplot }

Next I produced a function the reports only those matched mean differences with a standardized difference of over .1 (sorted by absolute size).

If you use the matchit package I think these functions are really useful.

]]>

#First we run the kmeans analysis: In brackets is the dataset used #(in this case I only want variables #1 through 11 hence the [1:11]) #and the number of clusters I want produced (in this case 4). cl <- kmeans(mydata[1:11], 4) #We will need to add an id variable for later use. In this case I have called it .row. clustT1WIN$.row <- rownames(clustT1WIN) #At this stage I also make a new variable indicating cluster membership as below. # I have a good #idea of what my clusters will be called so #I gave them those names in the second line of the code. #Then I put it together and put the data in a form that is good for graphing. cluster<-cl$cluster cl.cluster<-as.vector(recode (cluster, "1='FC'; 2='FV'; 3='SO'; 4= 'OS' ", as.numeric.result=FALSE) ) clustT1WIN2<- data.frame (clustT1WIN [1:12], cl.cluster) molten2 <- melt(clustT1WIN2, id = c(".row", "cl.cluster") ) #OK set up the graph background. #Following the ggplot book I also create a jit parameter cause it is #much easier to alter this and type it in than the full code over and over again. pcp_cl <- ggplot(molten2, aes(variable, value, group = .row, colour = cl.cluster) ) jit<- position_jitter (width = .08, height = .08) #Ok first graph the cluster means. pcp_cl + stat_summary(aes(group = cl.cluster), fun.y = mean, geom = "line") #Then we produce a colourful but uninformative parallel coordinates #plot with a bit of alpha blending and jitter. pcp_cl + geom_line(position = jit, alpha = 1/5) #All code up to this point is as per Wickham but #I also add the cluster means graph that we #first produced as well as changing the angle of the x axis text so it is readable. pcp_cl + geom_line(position = jit, colour = alpha("black", 1/4)) + stat_summary(aes(group = cl.cluster), fun.y = mean, geom = "line", size = 1.5 ) + facet_wrap(~ cl.cluster)+ opts(axis.text.x=theme_text(angle=-45, hjust=0) )

Relatively simple but visually very informative. Here is the final result:

]]>

**Creation of proportional factor score regression: Weights for composite score creation **

#First run your model in mplus and read it into R. This is done as follows out <- readLines( “D:MPLUSFILENAME.out” ) #Next we use the grep feature to tell R to find aspects of our data #to include in the output we will produce. This has the advantage of #being the same no mater how long or short your Mplus input file is #(that means you dont need to constantly count lines and change input). #Lets start with chi square. This will give us the value, degrees of freedom and significance. # Chi Square ind1 <- grep( “Chi-Square Test of Model Fit” , out )[1] chisq <- c( as.numeric( substring( out[ ind1 + 2 ] , 38 ) ) ) df <- c( as.numeric( substring( out[ ind1 + 3 ] , 38 ) ) ) p <- c( as.numeric( substring( out[ ind1 + 4 ] , 38 ) ) ) #Next we will get the fit # CFI / TLI ind2 <- grep( “CFI/TLI” , out ) cfi <- c( as.numeric( substring( out[ ind2 + 2 ] , 38 ) ) ) tli<- c(as.numeric( substring( out[ ind2 + 3 ] , 38 ) ) ) #RMSEA ind3 <- grep( “RMSEA” , out )[1] rmsea <- c( as.numeric( substring( out[ ind3 + 2 ] , 38 ) ) ) # SRMR ind4 <- grep( “SRMR” , out )[1] srmr <- c( as.numeric( substring( out[ ind4 + 2 ] , 38 ) ) ) #Next we calculate the latent factor reliabilities. #First we grab the factor loadings then the residuals #Factor Loadings ind6 <- grep( “STDYX Standardization” , out ) PEST<- c ( as.numeric( substring( out[ ind6 + 6 ] , 23, 28) ), as.numeric( substring( out[ ind6 + 7 ] , 23, 28) ), as.numeric( substring( out[ ind6 + 8 ] , 23, 28) ), as.numeric( substring( out[ ind6 + 9 ] , 23, 28) ) ) #Residuals RES<- c( as.numeric( substring( out[ ind6 + 21 ] , 23, 28) ), as.numeric( substring( out[ ind6 + 22 ] , 23, 28) ), as.numeric( substring( out[ ind6 + 23 ] , 23, 28) ), as.numeric( substring( out[ ind6 + 24 ] , 23, 28) ) ) #Next we calculate the reliability rounded to 2 decimal places. rel <- round( (sum (PEST)^2)/ ( (sum (PEST)^2) + sum (RES) ), digits = 2) #We can now wrap the fit into a list and #move one to calculating the proportional weights # for the composite scores calculations. fit<-data.frame(chisq, df, p, cfi, tli, rmsea, srmr, fsd, rel) #Now specific to the factor scores #we want we will get the factor determinates and regression weights. #Factor score determinates ind5 <- grep( ” FACTOR DETERMINACIES” , out ) fsd <- c(as.numeric( substring( out[ ind5 + 2 ] , 22, 27) ) ) #Factor score regression weights ind7 <- grep( “SUMMARY OF FACTOR SCORES” , out ) FSRW <- c( as.numeric( substring( out[ ind7 + 9 ] , 16, 21) ), as.numeric( substring( out[ ind7 + 9 ] , 30, 35) ), as.numeric( substring( out[ ind7 + 9 ] , 44, 49) ), as.numeric( substring( out[ ind7 + 9 ] , 58, 63) ), as.numeric( substring( out[ ind7 + 9 ] , 72, 77) ) ) #Now we have the data we just calculate the weights FSTOT<-sum(FSRW,na.rm=T) FSPROP<-FSRW/FSTOT #Finally we call the output we need. #First we check the proportional weights add to 1 as required sum (FSPROP,na.rm=T) #Next we get the vector of fit values and the latent variable relatibility fit #Now all we need is the composite score weights FSPROP

]]>

]]>

**Generating a covariance structure:**

#First install MASS which allows for the creation of covariance structures. #Then load said package. library (MASS) #Now we need to specify a multivariate normal covariance matrix. covar<-mvrnorm(100, c(0, 0), matrix(c(1, 0.50, 0.50, 1), 2, 2)) #The first set of numbers = number of cases (here 100). #Second set in c() = means (here set to 0). #Third set = the covariances (variances here set at 1 and covariances at .5). #Fourth set = nature of matrix (here its a 2 by 2). Vary all of these as you like. #Now lets check the matrix looks right. To check the matrix has the setup you want use the following. check<-matrix(c(1, 0.50, 0.50, 1), nrow=2,ncol=2,byrow=TRUE) #Next we wrap it up into a dataframe mydata<-data.frame(covar) #Finally we give the variables names of interest # (best to let them reflect the sort of thing you do in research commonly) #and attach names for use names(mydata)<-c("x1", "x2") attach(mydata)

]]>