Part 1: Measurement Invariance Coding

Author

Peter F. Halpin

These notes provide the code from Part 1 of the workshop on categorical factor analysis.

Estimation in lavaan

Note that for Delta parameterization, “scale” is fixed to 1 and residual variances = \(1 - \lambda^2\). For Theta parameterization, residual variances = 1 and scales = \(1 / \sqrt{1 + \lambda^2}\). In both cases, the only free parameters are the factors loadings and thresholds, but their values depend on the scaling. Use summary, parameterEstimates or standardizedSolution to see model output.

library(lavaan)
dat <- read.csv("cint_data.csv")

# Model syntax
mod1 <- ' depression =~ cint1 + cint2 + cint4 + cint11 + 
                        cint27 + cint28 + cint29 + cint30'

# Fit model using delta parameterization
fit.delta <- cfa(mod1, 
                 data = dat, 
                 std.lv = T,  # standardize latent variable
                 ordered = T) # data are ordered


# Fit model using theta parameterization
fit.theta <- cfa(mod1, 
                 data = dat, 
                 std.lv = T,  # standardize latent variable
                 ordered = T, # data are ordered
                 parameterization = "theta")

estimates <- cbind(parameterEstimates(fit.delta)[, 1:4], 
                   parameterEstimates(fit.theta)[, 4])
names(estimates)[4:5] <- c("Delta", "Theta")
knitr::kable(estimates, digits = 3)

lhs	op	rhs	Delta	Theta
depression	=~	cint1	0.671	0.905
depression	=~	cint2	0.592	0.734
depression	=~	cint4	0.533	0.630
depression	=~	cint11	0.569	0.692
depression	=~	cint27	0.699	0.977
depression	=~	cint28	0.573	0.699
depression	=~	cint29	0.615	0.780
depression	=~	cint30	0.552	0.662
cint1	\|	t1	-1.111	-1.498
cint1	\|	t2	-0.009	-0.012
cint1	\|	t3	1.073	1.447
cint2	\|	t1	-0.645	-0.800
cint2	\|	t2	0.192	0.238
cint2	\|	t3	1.006	1.248
cint4	\|	t1	-1.047	-1.237
cint4	\|	t2	-0.108	-0.127
cint4	\|	t3	0.977	1.155
cint11	\|	t1	-0.992	-1.206
cint11	\|	t2	-0.244	-0.297
cint11	\|	t3	0.939	1.142
cint27	\|	t1	-0.414	-0.579
cint27	\|	t2	0.207	0.290
cint27	\|	t3	0.982	1.372
cint28	\|	t1	-0.759	-0.927
cint28	\|	t2	-0.003	-0.004
cint28	\|	t3	0.953	1.163
cint29	\|	t1	0.012	0.015
cint29	\|	t2	0.576	0.731
cint29	\|	t3	1.360	1.725
cint30	\|	t1	-0.353	-0.424
cint30	\|	t2	0.260	0.311
cint30	\|	t3	1.073	1.287
cint1	~~	cint1	0.550	1.000
cint2	~~	cint2	0.650	1.000
cint4	~~	cint4	0.716	1.000
cint11	~~	cint11	0.676	1.000
cint27	~~	cint27	0.512	1.000
cint28	~~	cint28	0.672	1.000
cint29	~~	cint29	0.622	1.000
cint30	~~	cint30	0.695	1.000
depression	~~	depression	1.000	1.000
cint1	~*~	cint1	1.000	0.741
cint2	~*~	cint2	1.000	0.806
cint4	~*~	cint4	1.000	0.846
cint11	~*~	cint11	1.000	0.822
cint27	~*~	cint27	1.000	0.715
cint28	~*~	cint28	1.000	0.820
cint29	~*~	cint29	1.000	0.789
cint30	~*~	cint30	1.000	0.834
cint1	~1		0.000	0.000
cint2	~1		0.000	0.000
cint4	~1		0.000	0.000
cint11	~1		0.000	0.000
cint27	~1		0.000	0.000
cint28	~1		0.000	0.000
cint29	~1		0.000	0.000
cint30	~1		0.000	0.000
depression	~1		0.000	0.000

Both models have the same fit to the data – only the scaling changes.

gof <- cbind(fitMeasures(fit.delta)[1:25], fitMeasures(fit.theta)[1:25])
names(gof) <- c("Delta", "Theta")
knitr::kable(gof, digits = 3)

npar	32.000	32.000
fmin	0.011	0.011
chisq	19.239	19.239
df	20.000	20.000
pvalue	0.506	0.506
chisq.scaled	33.789	33.789
df.scaled	20.000	20.000
pvalue.scaled	0.028	0.028
chisq.scaling.factor	0.580	0.580
baseline.chisq	3307.540	3307.540
baseline.df	28.000	28.000
baseline.pvalue	0.000	0.000
baseline.chisq.scaled	2357.221	2357.221
baseline.df.scaled	28.000	28.000
baseline.pvalue.scaled	0.000	0.000
baseline.chisq.scaling.factor	1.408	1.408
cfi	1.000	1.000
tli	1.000	1.000
cfi.scaled	0.994	0.994
tli.scaled	0.992	0.992
cfi.robust	0.988	0.988
tli.robust	0.983	0.983
nnfi	1.000	1.000
rfi	0.992	0.992
nfi	0.994	0.994

Using standardized output, both solutions give equivalent numerical values (the Delta parameterization)

standardized.estimates <- cbind(standardizedSolution(fit.delta)[, 1:4],
                                standardizedSolution(fit.theta)[, 4])
names(standardized.estimates)[4:5] <- c("Delta", "Theta")
knitr::kable(standardized.estimates, digits = 3)

lhs	op	rhs	Delta	Theta
depression	=~	cint1	0.671	0.671
depression	=~	cint2	0.592	0.592
depression	=~	cint4	0.533	0.533
depression	=~	cint11	0.569	0.569
depression	=~	cint27	0.699	0.699
depression	=~	cint28	0.573	0.573
depression	=~	cint29	0.615	0.615
depression	=~	cint30	0.552	0.552
cint1	\|	t1	-1.111	-1.111
cint1	\|	t2	-0.009	-0.009
cint1	\|	t3	1.073	1.073
cint2	\|	t1	-0.645	-0.645
cint2	\|	t2	0.192	0.192
cint2	\|	t3	1.006	1.006
cint4	\|	t1	-1.047	-1.047
cint4	\|	t2	-0.108	-0.108
cint4	\|	t3	0.977	0.977
cint11	\|	t1	-0.992	-0.992
cint11	\|	t2	-0.244	-0.244
cint11	\|	t3	0.939	0.939
cint27	\|	t1	-0.414	-0.414
cint27	\|	t2	0.207	0.207
cint27	\|	t3	0.982	0.982
cint28	\|	t1	-0.759	-0.759
cint28	\|	t2	-0.003	-0.003
cint28	\|	t3	0.953	0.953
cint29	\|	t1	0.012	0.012
cint29	\|	t2	0.576	0.576
cint29	\|	t3	1.360	1.360
cint30	\|	t1	-0.353	-0.353
cint30	\|	t2	0.260	0.260
cint30	\|	t3	1.073	1.073
cint1	~~	cint1	0.550	0.550
cint2	~~	cint2	0.650	0.650
cint4	~~	cint4	0.716	0.716
cint11	~~	cint11	0.676	0.676
cint27	~~	cint27	0.512	0.512
cint28	~~	cint28	0.672	0.672
cint29	~~	cint29	0.622	0.622
cint30	~~	cint30	0.695	0.695
depression	~~	depression	1.000	1.000
cint1	~*~	cint1	1.000	1.000
cint2	~*~	cint2	1.000	1.000
cint4	~*~	cint4	1.000	1.000
cint11	~*~	cint11	1.000	1.000
cint27	~*~	cint27	1.000	1.000
cint28	~*~	cint28	1.000	1.000
cint29	~*~	cint29	1.000	1.000
cint30	~*~	cint30	1.000	1.000
cint1	~1		0.000	0.000
cint2	~1		0.000	0.000
cint4	~1		0.000	0.000
cint11	~1		0.000	0.000
cint27	~1		0.000	0.000
cint28	~1		0.000	0.000
cint29	~1		0.000	0.000
cint30	~1		0.000	0.000
depression	~1		0.000	0.000

Levels of MI

Code for levels of MI. Use summary, parameterEstimates or standardizedSolution to see model output.

# Configural model
fit.config <- cfa(mod1, 
                  data = dat, 
                  std.lv = T,  
                  ordered = T, 
                  group = "cfemale") 

# Weak / metric invariance
fit.weak <- cfa(mod1, 
                  data = dat, 
                  std.lv = T,  
                  ordered = T, 
                  group = "cfemale",
                  group.equal = "loadings") 

# Strong / scalar invariance
fit.strong <- cfa(mod1, 
                  data = dat, 
                  std.lv = T,  
                  ordered = T, 
                  group = "cfemale",
                  group.equal = c("loadings", "thresholds")) 

# Strict invariance (switch to theta parameterization) 
fit.strict.theta <- cfa(mod1, 
                        data = dat, 
                        std.lv = T,  
                        ordered = T, 
                        group = "cfemale",
                        group.equal = c("loadings", "thresholds", "residuals"),
                        parameterization = "theta") 

# Strict invariance using delta parameterization (need to change model)
mod.strict <- 'depression =~ cint1 + cint2 + cint4 + cint11 + 
                             cint27 + cint28 + cint29 + cint30
                 
  cint1 ~*~ c(1, 1)*cint1
  cint2 ~*~ c(1, 1)*cint2
  cint4 ~*~ c(1, 1)*cint4
  cint11 ~*~ c(1, 1)*cint11
  cint27 ~*~ c(1, 1)*cint27
  cint28 ~*~ c(1, 1)*cint28
  cint29 ~*~ c(1, 1)*cint29
  cint30 ~*~ c(1, 1)*cint30'

fit.strict.delta <- cfa(mod.strict, 
                        data = dat, 
                        std.lv = T,  
                        ordered = T, 
                        group = "cfemale",
                        group.equal = c("loadings", "thresholds"),
                        parameterization = "delta")

Comparing models using chi-square difference tests

Testing config, weak, and strong invariance. Note that strong invariance model estimates “scales” of LRs (in Delta parameterization) or residual variances of LRVs (in Theta parameterization).

# Config, weak, and strong (estimates "scales" of LRV)
lavTestLRT(fit.config, fit.weak, fit.strong)


Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan NOTE:
    The "Chisq" column contains standard test statistics, not the
    robust test that should be reported per model. A robust difference
    test is a function of two standard (not robust) statistics.
 
           Df AIC BIC   Chisq Chisq diff Df diff Pr(>Chisq)    
fit.config 40          37.534                                  
fit.weak   47          57.126     12.091       7    0.09761 .  
fit.strong 62         112.075     63.910      15    5.3e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Testing config, weak, and strict invariance. Note that strict invariance model does not estimate “scales” or residual variance of LRVs.

# Config, weak, and strict (doesn't estimates "scales" of LRV))
lavTestLRT(fit.config, fit.weak, fit.strict.delta)


Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan NOTE:
    The "Chisq" column contains standard test statistics, not the
    robust test that should be reported per model. A robust difference
    test is a function of two standard (not robust) statistics.
 
                 Df AIC BIC   Chisq Chisq diff Df diff Pr(>Chisq)    
fit.config       40          37.534                                  
fit.weak         47          57.126     12.091       7    0.09761 .  
fit.strict.delta 70         121.781     73.542      23  3.411e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Same result using theta parameterization for strict invariance (lavaan code is simpler, model fit is the same).

# Same fit when using theta for strict (simpler code)
lavTestLRT(fit.config, fit.weak, fit.strict.theta)


Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan NOTE:
    The "Chisq" column contains standard test statistics, not the
    robust test that should be reported per model. A robust difference
    test is a function of two standard (not robust) statistics.
 
                 Df AIC BIC   Chisq Chisq diff Df diff Pr(>Chisq)    
fit.config       40          37.534                                  
fit.weak         47          57.126     12.091       7    0.09761 .  
fit.strict.theta 70         122.510     76.762      23  1.053e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Related models

Testing homogeneity of variance by comparing weak invariance model to a model that adds constraints on factor variances.

fit.homo <- cfa(mod1, 
                data = dat, 
                std.lv = T,  
                ordered = T, 
                group = "cfemale",
                group.equal = c("loadings", "lv.variances"))

lavTestLRT(fit.weak, fit.homo)


Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan NOTE:
    The "Chisq" column contains standard test statistics, not the
    robust test that should be reported per model. A robust difference
    test is a function of two standard (not robust) statistics.
 
         Df AIC BIC  Chisq Chisq diff Df diff Pr(>Chisq)  
fit.weak 47         57.126                                
fit.homo 48         88.228      6.023       1    0.01412 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Omnibus test of wether factor means differ over groups, by comparing strong invariance model to a model that adds constraints on factor means (mainly useful with > 2 groups).

fit.omnibus <- cfa(mod1, 
                data = dat, 
                std.lv = T,  
                ordered = T, 
                group = "cfemale",
                group.equal = c("loadings", "thresholds", "means"))

lavTestLRT(fit.strong, fit.omnibus)


Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan NOTE:
    The "Chisq" column contains standard test statistics, not the
    robust test that should be reported per model. A robust difference
    test is a function of two standard (not robust) statistics.
 
            Df AIC BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
fit.strong  62         112.08                                  
fit.omnibus 63         260.51     28.474       1  9.497e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1