Part 3: Robust DIF coding

Author

Peter F. Halpin

These notes provide the code from Part 3 of the workshop on robust DIF and DTF.

Installation

The software is available via github. Please keep in mind that it is in the Beta stages of development. Please report any bugs during the workshop, by email, or on github. The code was updated recently (Dec 3) so please be re-install to be sure you have the current version.

# Installer for github 
install.packages(remotes)

# Install robustDIF from github
remotes::install_github("peterhalpin/robustDIF")

# Load library 
library(robustDIF)

Loading required package: Matrix

Reading-in parameter estimates (`mirt`)

robustDIF will read-in parameter estimates from mirt or lavaan. One can provide either a list of separate model fits, one for each groups, or the configural multigroup model. The code below shows how to work with mirt. The final section replicates these step with lavaan.

First, estimate the configural model. Be sure to use the option SE = T to obtain the covariance matrix of the item parameters estimates!

library(mirt)

Loading required package: stats4

Loading required package: lattice

# Set up data for mirt
cint <- read.csv("cint_data.csv")
depression_names <- c("cint1", "cint2", "cint4", "cint11", 
                      "cint27", "cint28", "cint29", "cint30")
depression_items <- cint[, depression_names]
gender <- factor(cint$cfemale)

# Estimate model (no invariance constraints)
config.mod <- multipleGroup(depression_items, 
                            group = gender, 
                            itemtype = "graded", 
                            SE = T, # <- make sure to request SEs
                            verbose = F) # <- suppress iteration history


# Print parms in slope-intercept format
coef(config.mod, IRTpars = F, simplify= T)

$`0`
$items
          a1     d1     d2     d3
cint1  1.517  2.099 -0.442 -2.866
cint2  1.323  1.100 -0.704 -2.362
cint4  1.106  1.760 -0.025 -2.130
cint11 0.931  1.723  0.249 -2.313
cint27 1.647  0.799 -0.780 -2.607
cint28 0.909  1.194 -0.312 -2.141
cint29 1.074 -0.663 -1.908 -3.528
cint30 1.101  0.766 -0.525 -2.278

$means
F1 
 0 

$cov
   F1
F1  1


$`1`
$items
          a1    d1     d2     d3
cint1  1.580 3.076  0.411 -2.226
cint2  1.242 1.566 -0.142 -1.961
cint4  1.122 2.480  0.431 -1.851
cint11 1.420 2.371  0.712 -1.689
cint27 1.748 1.190 -0.216 -2.205
cint28 1.443 1.941  0.303 -1.860
cint29 1.468 0.628 -0.627 -2.619
cint30 1.271 0.702 -0.501 -2.192

$means
F1 
 0 

$cov
   F1
F1  1

Second, use the get_model_parms function to read-in the info required by robustDIF

# Extract model parameters
mirt.parms <- get_model_parms(config.mod)

# Check output -- should be same as above
mirt.parms$est

$group.1
          a1      d1      d2     d3
item1 1.5168  2.0990 -0.4424 -2.866
item2 1.3226  1.0996 -0.7040 -2.362
item3 1.1060  1.7600 -0.0246 -2.130
item4 0.9305  1.7229  0.2487 -2.313
item5 1.6473  0.7991 -0.7798 -2.607
item6 0.9094  1.1941 -0.3117 -2.141
item7 1.0741 -0.6629 -1.9085 -3.528
item8 1.1012  0.7659 -0.5249 -2.278

$group.2
         a1     d1      d2     d3
item1 1.580 3.0758  0.4107 -2.226
item2 1.242 1.5664 -0.1422 -1.961
item3 1.122 2.4800  0.4308 -1.851
item4 1.420 2.3706  0.7115 -1.689
item5 1.748 1.1900 -0.2164 -2.205
item6 1.443 1.9410  0.3027 -1.860
item7 1.468 0.6282 -0.6270 -2.619
item8 1.271 0.7025 -0.5005 -2.192

Running RDIF

rdif is an internal function used to estimate scaling parameters. It is the workhorse behing the package, but other functions provide more user-friendly output. In the output below, the estimated values of the scaling parameters are indicated by est. Items with DIF are indicated by weights = 0. The other output describes the iteratively re-weighted least squares estimation routine (number of iterations and the convergence criterion).

# "Raw" output with weights: item intercepts / thresholds
rdif(mirt.parms, par = "intercept")

$est
[1] 0.4042

$weights
 [1] 0.3840 0.4828 1.0000 0.9836 0.9494 0.9304 0.4264 0.9999 0.7667 0.9440
[11] 0.8000 0.9807 0.1650 0.7916 0.6049 0.6791 0.9838 0.4172 0.0000 0.0000
[21] 0.6549 0.0000 0.0000 0.1326

$n.iter
[1] 23

$epsilon
[1] 7.274e-08

# For item slopes
rdif(mirt.parms, par = "slope")

$est
[1] 1.045

$weights
[1] 0.9997 0.7714 0.9831 0.0000 0.9946 0.0000 0.1355 0.8197

$n.iter
[1] 11

$epsilon
[1] 4.658e-08

We can see that some item tresholds were flagged, as well as some item slopes. More on this below.

Wald tests

Inferences about DIF can also be made by following up rdif with stand-alone Wald tests of the item parameters. The stand-alone tests can be useful if one wishes to test for DIF using a different Type I Error rate than was used for estimation with rdif. Otherwise, the parameter-level tests provide the same inference as the weights in the rdif output (albeit in a more commonly used format).

To test each item parameter separately, use the function rdif_z_test:

# Wald test of item intercepts
rdif_z_test(mirt.parms, par = "intercept")

             z.test    p.val
cint1.d1   1.208719 0.226771
cint1.d2   1.082666 0.278957
cint1.d3   0.003788 0.996978
cint2.d1  -0.178121 0.858628
cint2.d2   0.313643 0.753793
cint2.d3  -0.368797 0.712279
cint4.d1   1.154519 0.248288
cint4.d2   0.010795 0.991387
cint4.d3  -0.691293 0.489381
cint11.d1  0.330230 0.741226
cint11.d2 -0.636779 0.524269
cint11.d3  0.193013 0.846949
cint27.d1 -1.510353 0.130953
cint27.d2 -0.650922 0.515097
cint27.d3 -0.923956 0.355509
cint28.d1  0.822061 0.411042
cint28.d2  0.177016 0.859496
cint28.d3 -1.166264 0.243508
cint29.d1  3.749700 0.000177
cint29.d2  2.987273 0.002815
cint29.d3  0.855926 0.392039
cint30.d1 -3.188729 0.001429
cint30.d2 -2.587435 0.009669
cint30.d3 -1.562886 0.118079

# Wald test of item slopes
rdif_z_test(mirt.parms, par = "slope")

            z.test   p.val
cint1.a1  -0.02281 0.98180
cint2.a1  -0.68373 0.49415
cint4.a1  -0.18036 0.85687
cint11.a1  2.23752 0.02525
cint27.a1  0.10181 0.91891
cint28.a1  2.47098 0.01347
cint29.a1  1.55793 0.11925
cint30.a1  0.60284 0.54662

Alternatively, the user can test all parameters for each item together using a Wald test whose degrees of freedom is equal the number of item parameters. These item-level tests (rather parameter-level tests) are not equivalent to the flagging procedure because they combine information across parameter estimates from the same item`.

# Wald test of both parameters
rdif_chisq_test(mirt.parms)

       chi.square df     p.val
cint1       2.269  4 6.864e-01
cint2       2.347  4 6.722e-01
cint4       2.798  4 5.922e-01
cint11     12.688  4 1.290e-02
cint27      3.360  4 4.995e-01
cint28     11.099  4 2.548e-02
cint29     30.338  4 4.178e-06
cint30     11.697  4 1.975e-02

In the example, the Wald tests for individual item parameters lead to the same conclusions as the flagging procedures. For the chi-square test, we can see that a total of 4 items are indicated as having DIF, on either or both of the intercepts/thresholds or slopes.

Does DIF affect impact?

The delta_test function compares a naive estimate of impact that ignores DIF to the robust DIF estimator. The naive estimator is chose to be the MLE of the scaling parameter, computed from the item-level scaling functions, which is a precision-weighted version of the standard “mean-mean” approach. The null hypothesis that both estimators are consistent for the “true” scaling parameter leads to a Hausman-like specification test of DTF, based on the moments of the distribution of the latent trait. Note that, unlike other procedures for DTF, implementation of this test does not require an initial item-by-item analysis to identify which items may exhibit DIF – it allows the user to test for DTF before doing a DIF analysis.

# For impact on means 
delta_test(mirt.parms, par = "intercept")

rdif.est   ml.est    delta se.delta   z.test    p.val 
0.404222 0.395573 0.008649 0.039663 0.218071 0.827374

# For impact on variances
delta_test(mirt.parms, par = "slope")

rdif.est   ml.est    delta se.delta   z.test    p.val 
 1.04540  1.12096 -0.07555  0.05880 -1.28481  0.19886

We can see that DIF at the item level is not affecting conclusions about impact on either the mean or variance of the latent trait.

Again with `lavaan`

The same as above but this time with lavaan. Note that the groups are now in the reverse order and the parameters estimates differ from mirt due to different scaling of the latent variable and estimation routines. However, both analyses lead to the same substantive conclusions about which items exhibit DIF and whether DIF affects conclusions about impact.

library(lavaan)

This is lavaan 0.6-16
lavaan is FREE software! Please report any bugs.

# Model (same as above)
mod1 <- ' depression =~ cint1 + cint2 + cint4 + cint11 + 
                        cint27 + cint28 + cint29 + cint30'
# Fit model
fit.config <- cfa(mod1, 
                  data = cint, 
                  std.lv = T,  
                  ordered = T, 
                  group = "cfemale") 
                  
# Extract parms
lavaan.parms <- get_model_parms(fit.config)

# RDIF procedures (note groups are reversed)
delta_test(lavaan.parms, par = "intercept")

rdif.est   ml.est    delta se.delta   z.test    p.val 
-0.36430 -0.41036  0.04606  0.04395  1.04789  0.29469

delta_test(lavaan.parms, par = "slope")

rdif.est   ml.est    delta se.delta   z.test    p.val 
 1.03037  1.06796 -0.03759  0.03531 -1.06478  0.28698

rdif_z_test(lavaan.parms, par = "intercept")

           z.test     p.val
cint1.t1  -1.3273 1.844e-01
cint1.t2  -1.5315 1.257e-01
cint1.t3  -0.3650 7.151e-01
cint2.t1  -0.2274 8.201e-01
cint2.t2  -0.5764 5.643e-01
cint2.t3   0.4927 6.222e-01
cint4.t1  -1.2619 2.070e-01
cint4.t2  -0.5779 5.634e-01
cint4.t3   0.4357 6.630e-01
cint11.t1  0.5398 5.893e-01
cint11.t2  0.3830 7.018e-01
cint11.t3 -1.7650 7.756e-02
cint27.t1  1.3480 1.777e-01
cint27.t2  0.6343 5.259e-01
cint27.t3  0.9950 3.198e-01
cint28.t1 -0.1300 8.965e-01
cint28.t2 -0.9256 3.547e-01
cint28.t3 -0.3364 7.365e-01
cint29.t1 -4.0901 4.312e-05
cint29.t2 -4.1279 3.661e-05
cint29.t3 -2.1193 3.407e-02
cint30.t1  3.0034 2.670e-03
cint30.t2  2.2237 2.617e-02
cint30.t3  1.2709 2.038e-01

rdif_z_test(lavaan.parms, par = "slope")

          z.test    p.val
cint1.a  -0.2616 0.793637
cint2.a  -1.0008 0.316936
cint4.a  -0.1505 0.880401
cint11.a  2.4290 0.015140
cint27.a  0.1545 0.877245
cint28.a  2.6290 0.008564
cint29.a  1.2903 0.196933
cint30.a  0.8008 0.423230

rdif_chisq_test(lavaan.parms)

       chi.square df     p.val
cint1       3.120  4 5.379e-01
cint2       2.732  4 6.036e-01
cint4       2.130  4 7.118e-01
cint11     18.130  4 1.164e-03
cint27      2.667  4 6.150e-01
cint28     20.144  4 4.678e-04
cint29     31.570  4 2.342e-06
cint30     10.322  4 3.534e-02