diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index f49a465..a858191 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -30,13 +30,13 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd - name: Set up R - uses: r-lib/actions/setup-r@d3c5be51b12e724e68f33216ca3c148b66d5f0b6 + uses: r-lib/actions/setup-r@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 with: r-version: release use-public-rspm: true - name: Set up R package dependencies - uses: r-lib/actions/setup-r-dependencies@d3c5be51b12e724e68f33216ca3c148b66d5f0b6 + uses: r-lib/actions/setup-r-dependencies@6f6e5bc62fba3a704f74e7ad7ef7676c5c6a2590 with: extra-packages: any::rcmdcheck needs: check diff --git a/.jules/bolt.md b/.jules/bolt.md index cfa2846..9afe6a4 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -21,3 +21,7 @@ ## 2026-06-30 - Preserve NA handling when removing factor conversions **Learning:** `levels(as.factor(x))` excludes missing responses from the category count, so a faster replacement must not count `NA` as an extra response category. **Action:** Keep `na.omit(unique(x))` rather than plain `unique(x)` in response-category comparisons. + +## 2026-06-30 - Safe Vectorization of Data Frame Row Extraction +**Learning:** In R, replacing a slow `for` loop with vectorized data frame row extraction requires care. Using `unlist()` on a data frame silently coerces factor columns to their underlying integer codes, leading to incorrect string representations. The safest and most robust way to extract elements as characters while preserving factor labels (and avoiding introduced names) is using `vapply()`, such as `vapply(df[cols], as.character, character(1), USE.NAMES = FALSE)`. +**Action:** Always use `vapply(..., as.character, character(1))` instead of `unlist()` when vectorizing type coercion functions over data frame subsets containing potential factor columns. diff --git a/R/aFIPC.R b/R/aFIPC.R index d0329f2..7b7da22 100644 --- a/R/aFIPC.R +++ b/R/aFIPC.R @@ -688,16 +688,13 @@ autoFIPC <- print(modIPD_DIF) print(CommonItemList_NOIPD) + # Performance optimization: Replace the element-by-element loop with vapply + # to efficiently extract and convert data frame values to characters, + # preserving factor labels and skipping unnecessary O(n) overhead. ActualoldFormCommonItem <- - vector(length = length(CommonItemList_NOIPD)) + vapply(IPDItemList[1, CommonItemList_NOIPD], as.character, character(1), USE.NAMES = FALSE) ActualnewFormCommonItem <- - vector(length = length(CommonItemList_NOIPD)) - for (i in 1:length(CommonItemList_NOIPD)) { - ActualoldFormCommonItem[i] <- - as.character(IPDItemList[CommonItemList_NOIPD][1, i]) - ActualnewFormCommonItem[i] <- - as.character(IPDItemList[CommonItemList_NOIPD][2, i]) - } + vapply(IPDItemList[2, CommonItemList_NOIPD], as.character, character(1), USE.NAMES = FALSE) message('ActualoldFormCommonItem: ', ActualoldFormCommonItem) message('ActualnewFormCommonItem: ', ActualnewFormCommonItem) diff --git a/tests/testthat/test-factor-handling.R b/tests/testthat/test-factor-handling.R new file mode 100644 index 0000000..6cb772c --- /dev/null +++ b/tests/testthat/test-factor-handling.R @@ -0,0 +1,31 @@ +test_that("factor columns are handled correctly during IPD vectorization", { + skip_if_not_installed("mirt") + + # Create a scenario where IPD runs and dataframe has factor or character types + set.seed(42) + old_item_names <- paste0("Item", 1:4) + new_item_names <- paste0("Item", 1:4) + + dat_old <- mirt::simdata(a = matrix(runif(4, 0.8, 2)), d = matrix(rnorm(4)), N = 100, itemtype = "2PL") + dat_new <- mirt::simdata(a = matrix(runif(4, 0.8, 2)), d = matrix(rnorm(4)), N = 100, itemtype = "2PL") + colnames(dat_old) <- old_item_names + colnames(dat_new) <- new_item_names + + old_mod <- mirt::mirt(dat_old, 1, itemtype = "2PL", SE = FALSE, verbose = FALSE) + new_mod <- mirt::mirt(dat_new, 1, itemtype = "2PL", SE = FALSE, verbose = FALSE) + + # Run autoFIPC with checkIPD = TRUE to trigger the IPD logic + res <- aFIPC::autoFIPC( + newformXData = new_mod, + oldformYData = old_mod, + newformCommonItemNames = paste0("Item", 1:4), + oldformCommonItemNames = paste0("Item", 1:4), + itemtype = "2PL", + checkIPD = TRUE, + confirmCommonItems = TRUE + ) + + # If it didn't crash and returns the list with LinkedModel, factor logic is safe + expect_type(res, "list") + expect_true(!is.null(res$LinkedModel)) +})