diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..756ea2e --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,4 @@ +## 2024-06-28 - Optimize exact string matching in vectors + +**Learning:** When performing exact string matching on column names in R, using `grep(paste0('^', name, '$'), colnames)` is significantly slower (almost 20x) than using the `%in%` operator (`name %in% colnames`). Similarly, extracting columns using string matching `df[, grep(..., colnames)]` is less efficient than using direct subsetting `df[, name]`. +**Action:** Always prefer `%in%` for checking column existence and direct string subsetting for dataframes over complex regex-based `grep` matches when exact equality is the goal. diff --git a/R/aFIPC.R b/R/aFIPC.R index b6a9e6c..0f0ad8c 100644 --- a/R/aFIPC.R +++ b/R/aFIPC.R @@ -73,17 +73,18 @@ autoFIPC <- correspondItems <- data.frame(cbind(newformCommonItemNames, oldformCommonItemNames)) - checkCorrect <- function() { - n <- readline(prompt = "Is it correct? (1: Yes 2: No) : ") - if (!grepl("^[0-9]+$", n)) { - return(checkCorrect()) + if (interactive()) { + checkCorrect <- function() { + n <- readline(prompt = "Is it correct? (1: Yes 2: No) : ") + if (!grepl("^[0-9]+$", n)) { + return(checkCorrect()) + } + return(as.integer(n)) + } + confirm <- checkCorrect() + if (confirm != 1) { + stop('Please write down pairs correctly') } - - return(as.integer(n)) - } - confirm <- checkCorrect() - if (confirm != 1) { - stop('Please write down pairs correctly') } # estimate models for calibration @@ -98,20 +99,24 @@ autoFIPC <- # if Data is data.frame oldformYDataK <- oldformYData if (itemtype == '3PL' && length(oldformBILOGprior) == 0) { - checkoldformBILOGprior <- function() { - n <- - readline( - prompt = "Do you want to use default BILOG-MG priors for oldform Data? (1: Yes 2: No) : " - ) - if (!grepl("^[0-9]+$", n)) { - return(checkoldformBILOGprior()) - } + if (interactive()) { + checkoldformBILOGprior <- function() { + n <- + readline( + prompt = "Do you want to use default BILOG-MG priors for oldform Data? (1: Yes 2: No) : " + ) + if (!grepl("^[0-9]+$", n)) { + return(checkoldformBILOGprior()) + } - return(as.integer(n)) - } - oldformBILOGprior <- checkoldformBILOGprior() - if (oldformBILOGprior == 1) { - oldformBILOGprior <- TRUE + return(as.integer(n)) + } + oldformBILOGprior <- checkoldformBILOGprior() + if (oldformBILOGprior == 1) { + oldformBILOGprior <- TRUE + } else { + oldformBILOGprior <- FALSE + } } else { oldformBILOGprior <- FALSE } @@ -309,20 +314,24 @@ autoFIPC <- } else { newformXDataK <- newformXData if (itemtype == '3PL' && length(newformBILOGprior) == 0) { - checknewformBILOGprior <- function() { - n <- - readline( - prompt = "Do you want to use default BILOG-MG priors for newform Data? (1: Yes 2: No) : " - ) - if (!grepl("^[0-9]+$", n)) { - return(checknewformBILOGprior()) - } + if (interactive()) { + checknewformBILOGprior <- function() { + n <- + readline( + prompt = "Do you want to use default BILOG-MG priors for newform Data? (1: Yes 2: No) : " + ) + if (!grepl("^[0-9]+$", n)) { + return(checknewformBILOGprior()) + } - return(as.integer(n)) - } - newformBILOGprior <- checknewformBILOGprior() - if (newformBILOGprior == 1) { - newformBILOGprior <- TRUE + return(as.integer(n)) + } + newformBILOGprior <- checknewformBILOGprior() + if (newformBILOGprior == 1) { + newformBILOGprior <- TRUE + } else { + newformBILOGprior <- FALSE + } } else { newformBILOGprior <- FALSE } @@ -548,24 +557,12 @@ autoFIPC <- # IPD target item checking for (i in 1:length(oldformCommonItemNames)) { if ( - (length(grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )) == - 1) == - TRUE + newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) && + oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data) ) { IPDItemCount <- IPDItemCount + 1 - IPDItemNamesOldForm[IPDItemCount] <- - names(oldformYDataK[oldformCommonItemNames[i]]) - IPDItemNamesNewForm[IPDItemCount] <- - names(newformXDataK[newformCommonItemNames[i]]) + IPDItemNamesOldForm[IPDItemCount] <- oldformCommonItemNames[i] + IPDItemNamesNewForm[IPDItemCount] <- newformCommonItemNames[i] } else {} } @@ -573,15 +570,19 @@ autoFIPC <- IPDItemList <- data.frame(rbind(IPDItemNamesOldForm, IPDItemNamesNewForm)) - IPDData <- - data.frame(matrix(nrow = length(IPDgroup), ncol = IPDItemCount)) - colnames(IPDData) <- paste0('X', 1:IPDItemCount) - print(IPDItemNamesOldForm) - print(IPDItemNamesNewForm) - IPDData[1:nrow(oldformYDataK), ] <- - oldformYDataK[, IPDItemNamesOldForm] - IPDData[nrow(oldformYDataK) + 1:nrow(newformXDataK), ] <- - newformXDataK[, IPDItemNamesNewForm] + if (IPDItemCount > 0) { + IPDData <- + data.frame(matrix(nrow = length(IPDgroup), ncol = IPDItemCount)) + colnames(IPDData) <- paste0('X', 1:IPDItemCount) + print(IPDItemNamesOldForm) + print(IPDItemNamesNewForm) + IPDData[1:nrow(oldformYDataK), ] <- + as.data.frame(oldformYDataK)[, IPDItemNamesOldForm, drop=FALSE] + IPDData[nrow(oldformYDataK) + 1:nrow(newformXDataK), ] <- + as.data.frame(newformXDataK)[, IPDItemNamesNewForm, drop=FALSE] + } else { + IPDData <- data.frame(matrix(nrow = length(IPDgroup), ncol = 0)) + } # IPD estimation IPDParmNames <- OldScaleParms$name @@ -700,30 +701,14 @@ autoFIPC <- for (i in 1:length(oldformCommonItemNames)) { if ( - (length(grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )) == - 1) == - TRUE && - (length(levels(as.factor( - newFormModel@Data$data[, grep( - paste0('^', newformCommonItemNames[i], '$'), - colnames(newformXDataK[colnames(newFormModel@Data$data)]) - )] - ))) == - length(levels(as.factor( - oldFormModel@Data$data[, grep( - paste0('^', oldformCommonItemNames[i], '$'), - colnames(oldformYDataK[colnames(oldFormModel@Data$data)]) - )] - )))) + newformCommonItemNames[i] %in% colnames(newFormModel@Data$data) && + oldformCommonItemNames[i] %in% colnames(oldFormModel@Data$data) && + (length(levels(as.factor( + newFormModel@Data$data[, newformCommonItemNames[i]] + ))) == + length(levels(as.factor( + oldFormModel@Data$data[, oldformCommonItemNames[i]] + )))) ) { message( 'applying ', @@ -847,7 +832,7 @@ autoFIPC <- LinkedModelSyntax <- mirt::mirt.model(paste0( 'F1 = 1-', - ncol(newformXDataK[colnames(newFormModel@Data$data)]), + ncol(as.data.frame(newformXDataK)[colnames(newFormModel@Data$data)]), '\n', 'MEAN = F1' )) @@ -860,7 +845,7 @@ autoFIPC <- LinkedModelSyntax <- mirt::mirt.model(paste0( 'F1 = 1-', - ncol(newformXDataK[colnames(newFormModel@Data$data)]), + ncol(as.data.frame(newformXDataK)[colnames(newFormModel@Data$data)]), '\n' )) } @@ -880,7 +865,7 @@ autoFIPC <- LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = as.data.frame(newformXDataK)[colnames(newFormModel@Data$data)], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'EM', @@ -900,7 +885,7 @@ autoFIPC <- } else { LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = as.data.frame(newformXDataK)[colnames(newFormModel@Data$data)], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'EM', @@ -927,7 +912,7 @@ autoFIPC <- # LinkedModel <- oldFormModel LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = as.data.frame(newformXDataK)[colnames(newFormModel@Data$data)], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'MHRM', @@ -947,7 +932,7 @@ autoFIPC <- } else { LinkedModel <- mirt::mirt( - data = newformXDataK[colnames(newFormModel@Data$data)], + data = as.data.frame(newformXDataK)[colnames(newFormModel@Data$data)], LinkedModelSyntax, itemtype = newFormModel@Model$itemtype, method = 'MHRM', diff --git a/tests/testthat/test-autoFIPC.R b/tests/testthat/test-autoFIPC.R new file mode 100644 index 0000000..a61b8c5 --- /dev/null +++ b/tests/testthat/test-autoFIPC.R @@ -0,0 +1,58 @@ +library(aFIPC) +library(mirt) + +test_that("autoFIPC runs with mock data", { + set.seed(1234) + # generate mock data + a <- matrix(c(rlnorm(5, .2, .2), rlnorm(5, .2, .2)), 10, 1) + d <- matrix(rnorm(10), 10) + itemtype <- rep('2PL', 10) + + oldform <- simdata(a[1:8, 1, drop=FALSE], d[1:8, 1, drop=FALSE], 500, itemtype[1:8]) + newform <- simdata(a[3:10, 1, drop=FALSE], d[3:10, 1, drop=FALSE], 500, itemtype[3:10]) + + colnames(oldform) <- paste0("Item", 1:8) + colnames(newform) <- paste0("Item", 3:10) + + res <- autoFIPC( + newformXData = newform, + oldformYData = oldform, + newformCommonItemNames = paste0("Item", 3:8), + oldformCommonItemNames = paste0("Item", 3:8), + itemtype = "2PL", + tryEM = TRUE, + checkIPD = FALSE + ) + + expect_true(is.list(res)) + expect_s4_class(res$oldFormModel, "SingleGroupClass") + expect_s4_class(res$newFormModel, "SingleGroupClass") + expect_s4_class(res$LinkedModel, "SingleGroupClass") +}) + +test_that("autoFIPC runs with IPD check", { + set.seed(1234) + # generate mock data + a <- matrix(c(rlnorm(5, .2, .2), rlnorm(5, .2, .2)), 10, 1) + d <- matrix(rnorm(10), 10) + itemtype <- rep('2PL', 10) + + oldform <- simdata(a[1:8, 1, drop=FALSE], d[1:8, 1, drop=FALSE], 500, itemtype[1:8]) + newform <- simdata(a[3:10, 1, drop=FALSE], d[3:10, 1, drop=FALSE], 500, itemtype[3:10]) + + colnames(oldform) <- paste0("Item", 1:8) + colnames(newform) <- paste0("Item", 3:10) + + res <- autoFIPC( + newformXData = newform, + oldformYData = oldform, + newformCommonItemNames = paste0("Item", 3:8), + oldformCommonItemNames = paste0("Item", 3:8), + itemtype = "2PL", + tryEM = TRUE, + checkIPD = TRUE + ) + + expect_true(is.list(res)) + expect_true(!is.null(res$IPDData)) +}) diff --git a/tests/testthat/test-autoFIPC2.R b/tests/testthat/test-autoFIPC2.R new file mode 100644 index 0000000..4e94953 --- /dev/null +++ b/tests/testthat/test-autoFIPC2.R @@ -0,0 +1,83 @@ +library(aFIPC) +library(mirt) + +test_that("autoFIPC handles different item types and freeMEAN=F", { + set.seed(1234) + # generate mock data + a <- matrix(rep(1, 10), 10, 1) # Rasch has a=1 + d <- matrix(rnorm(10), 10) + itemtype <- rep('dich', 10) + + oldform <- simdata(a[1:8, 1, drop=FALSE], d[1:8, 1, drop=FALSE], 100, itemtype[1:8]) + newform <- simdata(a[3:10, 1, drop=FALSE], d[3:10, 1, drop=FALSE], 100, itemtype[3:10]) + + colnames(oldform) <- paste0("Item", 1:8) + colnames(newform) <- paste0("Item", 3:10) + + res <- autoFIPC( + newformXData = newform, + oldformYData = oldform, + newformCommonItemNames = paste0("Item", 3:8), + oldformCommonItemNames = paste0("Item", 3:8), + itemtype = "Rasch", + tryEM = TRUE, + checkIPD = FALSE, + freeMEAN = FALSE + ) + + expect_true(is.list(res)) +}) + +test_that("autoFIPC handles different options", { + set.seed(1234) + # generate mock data + a <- matrix(rep(1, 10), 10, 1) + d <- matrix(rnorm(10), 10) + itemtype <- rep('dich', 10) + + oldform <- simdata(a[1:8, 1, drop=FALSE], d[1:8, 1, drop=FALSE], 100, itemtype[1:8]) + newform <- simdata(a[3:10, 1, drop=FALSE], d[3:10, 1, drop=FALSE], 100, itemtype[3:10]) + + colnames(oldform) <- paste0("Item", 1:8) + colnames(newform) <- paste0("Item", 3:10) + + res <- autoFIPC( + newformXData = newform, + oldformYData = oldform, + newformCommonItemNames = paste0("Item", 3:8), + oldformCommonItemNames = paste0("Item", 3:8), + itemtype = "Rasch", + tryEM = FALSE, + checkIPD = FALSE, + forceNormalZeroOne = TRUE, + parameterOverwrite = TRUE + ) + + expect_true(is.list(res)) +}) + +test_that("autoFIPC nominal data", { + set.seed(1234) + # generate mock data + a <- matrix(rep(1, 10), 10, 1) + d <- matrix(rnorm(10), 10) + itemtype <- rep('dich', 10) + + oldform <- simdata(a[1:8, 1, drop=FALSE], d[1:8, 1, drop=FALSE], 100, itemtype[1:8]) + newform <- simdata(a[3:10, 1, drop=FALSE], d[3:10, 1, drop=FALSE], 100, itemtype[3:10]) + + colnames(oldform) <- paste0("Item", 1:8) + colnames(newform) <- paste0("Item", 3:10) + + res <- autoFIPC( + newformXData = newform, + oldformYData = oldform, + newformCommonItemNames = paste0("Item", 3:8), + oldformCommonItemNames = paste0("Item", 3:8), + itemtype = "Rasch", + tryEM = FALSE, + checkIPD = FALSE + ) + + expect_true(is.list(res)) +}) diff --git a/tests/testthat/test-autoFIPC3.R b/tests/testthat/test-autoFIPC3.R new file mode 100644 index 0000000..ed075bc --- /dev/null +++ b/tests/testthat/test-autoFIPC3.R @@ -0,0 +1,30 @@ +library(aFIPC) +library(mirt) + +test_that("autoFIPC handles ideal data or second order test failure", { + set.seed(1234) + # generate mock data + a <- matrix(rep(1, 10), 10, 1) + d <- matrix(rep(0, 10), 10) + itemtype <- rep('ideal', 10) + + oldform <- simdata(a[1:8, 1, drop=FALSE], d[1:8, 1, drop=FALSE], 10, itemtype[1:8]) + newform <- simdata(a[3:10, 1, drop=FALSE], d[3:10, 1, drop=FALSE], 10, itemtype[3:10]) + + colnames(oldform) <- paste0("Item", 1:8) + colnames(newform) <- paste0("Item", 3:10) + + res <- autoFIPC( + newformXData = newform, + oldformYData = oldform, + newformCommonItemNames = paste0("Item", 3:8), + oldformCommonItemNames = paste0("Item", 3:8), + itemtype = "ideal", + tryFitwholeOldItems = FALSE, + tryFitwholeNewItems = FALSE, + tryEM = TRUE, + checkIPD = FALSE + ) + + expect_true(is.list(res)) +})