Part 2: DIF Coding

Author

Peter F. Halpin

These notes provide the code from Part 2 of the workshop on DIF with the graded responses model

Two-parameter logistic model

This example uses mirt’s built in dataset LSAT7 which contains 5 dichotomously scored items obtained from the Law School Admissions Test, section 7.

library(mirt)
Loading required package: stats4
Loading required package: lattice
dat <- expand.table(LSAT7)
twoPL <- mirt(dat, 
             itemtype = "2PL",
             verbose = F) # to omit iteration history in output

# Parameter estimates
coef(twoPL, IRTpars = T, simplify = T)
$items
           a      b g u
Item.1 0.988 -1.879 0 1
Item.2 1.081 -0.748 0 1
Item.3 1.706 -1.058 0 1
Item.4 0.765 -0.635 0 1
Item.5 0.736 -2.520 0 1

$means
F1 
 0 

$cov
   F1
F1  1
# Item response functions 
plot(twoPL,
     type = "itemscore",  
     facet = F)

# Item info
plot(twoPL,
     type = "infotrace",  
     facet = F)

# Test info
plot(twoPL,
     type = "info",  
     facet = F)

plot(twoPL,
     type = "rxx",  
     facet = F)

marginal_rxx(twoPL)
[1] 0.4417618

Graded response model

This code uses the example data from the workshop. First we fit the model and plot some item level functions discussed in the workshop.

# Load data and separate depression items
cint <- read.csv("cint_data.csv")
depression_names <- c("cint1", "cint2", "cint4", "cint11", 
                      "cint27", "cint28", "cint29", "cint30")
depression_items <- cint[, depression_names]

# Run GRM model (use SE = T in mirt call to get SEs)
grm <- mirt(depression_items, 
            itemtype = "graded",
            verbose = F) # to omit iteration history in output

# Parameter estimates
coef(grm, IRTpars = T, simplify = T)
$items
           a     b1     b2    b3
cint1  1.612 -1.622 -0.011 1.578
cint2  1.286 -1.049  0.306 1.665
cint4  1.129 -1.879 -0.195 1.758
cint11 1.219 -1.688 -0.402 1.608
cint27 1.692 -0.594  0.279 1.404
cint28 1.213 -1.292 -0.001 1.641
cint29 1.351 -0.004  0.889 2.217
cint30 1.155 -0.630  0.435 1.910

$means
F1 
 0 

$cov
   F1
F1  1
# Per item plots 
itemplot(grm,  
         item = 1, 
         type = "threshold",  
         main = "Cumulative response functions")

itemplot(grm, 
         item = 1, 
         type = "trace", 
         main = "Category response functions")

# Plotting all test items 
plot(grm, 
     type = "itemscore",  
     main = "Expected item score functions", 
     facet = F)

Next, consider the IIFs, TIF, Reliability of the depression items

# Item info
plot(grm,
     type = "infotrace",  
     main = "Item information functions", 
     facet = F)

# Test info
plot(grm,
     type = "info",  
     main = "Item information functions", 
     facet = F)

plot(grm,
     type = "rxx",  
     main = "Item information functions", 
     facet = F)

marginal_rxx(grm)
[1] 0.7944443

DIF analysis with GRM

Likelihood ratio based DIF testing with the GRM model. Step 1 is to estimate the multigroup model with strong / strict invariance constraints.

# Groups need to be a factor 
gender <- factor(cint$cfemale)

# Invariance constraints used by mirt
strong.invariance <- c("free_mean", "free_var", "slopes", "intercepts")

# Estimate model
strong.mod <- multipleGroup(depression_items, 
                            group = gender, 
                            itemtype = "graded",
                            invariance = strong.invariance,
                            verbose = F)

# View output (item parms the same in both groups?)
coef(strong.mod, IRTpars = T, simplify = T)
$`0`
$items
           a     b1     b2    b3
cint1  1.470 -1.523  0.244 1.993
cint2  1.156 -0.907  0.596 2.105
cint4  1.017 -1.823  0.042 2.207
cint11 1.116 -1.591 -0.186 2.019
cint27 1.507 -0.406  0.564 1.817
cint28 1.107 -1.161  0.255 2.059
cint29 1.276  0.250  1.212 2.647
cint30 1.021 -0.451  0.743 2.399

$means
F1 
 0 

$cov
   F1
F1  1


$`1`
$items
           a     b1     b2    b3
cint1  1.470 -1.523  0.244 1.993
cint2  1.156 -0.907  0.596 2.105
cint4  1.017 -1.823  0.042 2.207
cint11 1.116 -1.591 -0.186 2.019
cint27 1.507 -0.406  0.564 1.817
cint28 1.107 -1.161  0.255 2.059
cint29 1.276  0.250  1.212 2.647
cint30 1.021 -0.451  0.743 2.399

$means
   F1 
0.478 

$cov
      F1
F1 1.291

The second step is to implement the DIF analysis, which can be done with or without purification of the anchor set.

# Without purification
DIF(strong.mod, 
    which.par = c("a1", "d1", "d2", "d3"), # <- mirt notation
    scheme = "drop")  # <- drop item constraints
       groups converged     AIC   SABIC      HQ    BIC     X2 df     p
cint1     0,1      TRUE   3.113   9.344  10.370 22.047  4.887  4 0.299
cint2     0,1      TRUE   4.692  10.923  11.949 23.626  3.308  4 0.508
cint4     0,1      TRUE   3.169   9.400  10.425 22.102  4.831  4 0.305
cint11    0,1      TRUE   1.948   8.178   9.204 20.881  6.052  4 0.195
cint27    0,1      TRUE   0.629   6.860   7.886 19.563  7.371  4 0.118
cint28    0,1      TRUE   3.090   9.321  10.347 22.024   4.91  4 0.297
cint29    0,1      TRUE -23.411 -17.180 -16.154 -4.477 31.411  4     0
cint30    0,1      TRUE  -9.195  -2.964  -1.939  9.738 17.195  4 0.002

As shown above, we conclude the last two items exhibit DIF (without purification). With purification, we get the same conclusion, but the output is only shown for items that exhibited DIF

DIF(strong.mod, 
    which.par = c("a1", "d1", "d2", "d3"), 
    scheme = "drop_sequential", #<- different scheme
    seq_stat = .05,  # <- Type I Error rate for DIF
    max_run = 2) # <- two stages only

Checking for DIF in 6 more items
Computing final DIF estimates...
       groups converged     AIC   SABIC      HQ    BIC     X2 df     p
cint29    0,1      TRUE -18.863 -12.632 -11.606  0.071 26.863  4     0
cint30    0,1      TRUE  -4.647   1.584   2.610 14.286 12.647  4 0.013

Following up the DIF analysis

After a DIF analysis, we may wish to better understand how DIF affects items. This can be done using the partial invariance model in which items without DIF are constrained over groups and items with DIF are estiamted freely over groups.

# Invariance constraints
partial.invariance <- c("free_mean", "free_var", 
                        "cint1", "cint2", "cint4", "cint27", "cint28")

# Estimate model
partial.mod <- multipleGroup(depression_items, 
                             group = gender, 
                             itemtype = "graded",
                             invariance = partial.invariance,
                             verbose = F)


# Plot IRFs of biased items
itemplot(partial.mod, type = "score", item = "cint29", main = "CINT 29")

itemplot(partial.mod, type = "score", item = "cint30", main = "CINT 30")

# Examine parameter estimates
coef(partial.mod, IRTpars = T, simplify = T)
$`0`
$items
           a     b1     b2    b3
cint1  1.551 -1.460  0.215 1.864
cint2  1.249 -0.858  0.542 1.945
cint4  1.088 -1.724  0.025 2.051
cint11 0.930 -1.853 -0.267 2.488
cint27 1.622 -0.393  0.515 1.686
cint28 1.172 -1.113  0.226 1.926
cint29 1.073  0.618  1.778 3.288
cint30 1.103 -0.695  0.476 2.068

$means
F1 
 0 

$cov
   F1
F1  1


$`1`
$items
           a     b1     b2    b3
cint1  1.551 -1.460  0.215 1.864
cint2  1.249 -0.858  0.542 1.945
cint4  1.088 -1.724  0.025 2.051
cint11 1.370 -1.309 -0.102 1.648
cint27 1.622 -0.393  0.515 1.686
cint28 1.172 -1.113  0.226 1.926
cint29 1.424 -0.024  0.858 2.259
cint30 1.240 -0.151  0.822 2.191

$means
   F1 
0.418 

$cov
      F1
F1 1.065

We can see that item 29 is biased “towards” females and item 30 is biased towards males. Based on the estimate of group mean differences in the strong (.478) and partial model (.418), it is unclear if DIF may be affecting conclusions about how the groups differ. More on this in part 3 of the workshop.