diff --git a/.Rbuildignore b/.Rbuildignore index d46eb037d..f6b27d8db 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,31 +1,33 @@ -^logo.png -^README.Rmd -^LICENSE - -^\.Rprofile$ -^.*\.Rproj$ -^\.Rproj\.user$ - -^\.travis.yml -^\_pkgdown.yml -^\_pkgdown.yaml -^paper.bib$ -^GEMINI\.md$ - -^data/. -^docs/. -^vignettes/. -^pkgdown/. -^WIP/. -^papers/. -^.github/. -^CODE_OF_CONDUCT\.md$ -^revdep$ -^tests/testthat/_snaps/. -^cran-comments\.md$ -^\.github$ -\.code-workspace$ -\.lintr$ -^CRAN-SUBMISSION$ -^[.]?air[.]toml$ -^\.vscode$ +^logo.png +^README.Rmd +^LICENSE + +^\.Rprofile$ +^.*\.Rproj$ +^\.Rproj\.user$ + +^\.travis.yml +^\_pkgdown.yml +^\_pkgdown.yaml +^paper.bib$ +^GEMINI\.md$ + +^data/. +^docs/. +^vignettes/. +^pkgdown/. +^WIP/. +^papers/. +^.github/. +^CODE_OF_CONDUCT\.md$ +^revdep$ +^tests/testthat/_snaps/. +^cran-comments\.md$ +^\.github$ +\.code-workspace$ +\.lintr$ +^CRAN-SUBMISSION$ +^[.]?air[.]toml$ +^\.vscode$ +^\.positai$ +^\.claude$ diff --git a/.gitignore b/.gitignore index 1e3341f4a..270c13cf8 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,5 @@ Network Trash Folder Temporary Items .apdisk .Rprofile +.positai +.vscode diff --git a/DESCRIPTION b/DESCRIPTION index 9d7885d87..55f108388 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.16.0.2 +Version: 0.16.0.3 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 8653afe2b..efaec9d14 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# performance 0.16.0.3 + +## Bug fixes + +* Fixed issue in `check_collinearity()` that was causing inflated VIF values + when applied to clm and clmm models from the ordinal package. + # performance 0.16.0.2 ## Changes diff --git a/R/check_collinearity.R b/R/check_collinearity.R index 86a28334a..fdb684c2d 100644 --- a/R/check_collinearity.R +++ b/R/check_collinearity.R @@ -481,6 +481,45 @@ check_collinearity.zerocount <- function( return(NULL) } + # Filter to true slope parameters (handles multiple intercepts in ordinal models) + if (inherits(x, c("clm", "clmm"))) { + # names(x$beta) returns only non-singular (surviving) slopes + slope_names <- names(x$beta) + keep_idx <- which(colnames(v) %in% slope_names) + + # Rebuild term_assign by matching model matrix columns to surviving slopes + tryCatch( + { + mm <- insight::get_modelmatrix(x) + assign_attr <- attr(mm, "assign") + if (!is.null(assign_attr)) { + # Use name-matching to isolate indices for estimated slopes + match_idx <- which(colnames(mm) %in% slope_names) + if (length(match_idx) > 0) { + term_assign <- assign_attr[match_idx] + } + } + }, + error = function(e) NULL + ) + } else if (insight::has_intercept(x)) { + # Standard behavior: drop the first column/row (the singular intercept) + keep_idx <- seq_len(ncol(v))[-1] + } else { + keep_idx <- seq_len(ncol(v)) + if (isTRUE(verbose)) { + insight::format_alert("Model without intercept. VIFs may not be sensible.") + } + } + + # Safely subset the matrix + if (length(keep_idx) < ncol(v)) { + if (!is.null(term_assign) && length(term_assign) == ncol(v)) { + term_assign <- term_assign[keep_idx] + } + v <- v[keep_idx, keep_idx, drop = FALSE] + } + # we have rank-deficiency here. remove NA columns from assignment if (isTRUE(attributes(v)$rank_deficient) && !is.null(attributes(v)$na_columns_index)) { term_assign <- term_assign[-attributes(v)$na_columns_index] @@ -491,14 +530,6 @@ check_collinearity.zerocount <- function( } } - # check for missing intercept - if (insight::has_intercept(x)) { - v <- v[-1, -1] - term_assign <- term_assign[-1] - } else if (isTRUE(verbose)) { - insight::format_alert("Model has no intercept. VIFs may not be sensible.") - } - f <- insight::find_formula(x, verbose = FALSE) # hurdle or zeroinfl model can have no zero-inflation formula, in which case @@ -541,7 +572,6 @@ check_collinearity.zerocount <- function( result <- vector("numeric") na_terms <- vector("numeric") - # sanity check - models with offset(?) may contain too many term assignments if (length(term_assign) > ncol(v)) { term_assign <- term_assign[seq_len(ncol(v))] } diff --git a/performance.Rproj b/performance.Rproj index 2c52ed395..aacfcbeaa 100644 --- a/performance.Rproj +++ b/performance.Rproj @@ -1,25 +1,25 @@ -Version: 1.0 -ProjectId: af6facf3-033e-40d4-ac22-2830774814a9 - -RestoreWorkspace: No -SaveWorkspace: No -AlwaysSaveHistory: No - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: knitr -LaTeX: pdfLaTeX - -StripTrailingWhitespace: Yes - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageCheckArgs: --as-cran --run-donttest -PackageRoxygenize: rd,collate,namespace - -QuitChildProcessesOnExit: Yes -DisableExecuteRprofile: Yes +Version: 1.0 +ProjectId: af6facf3-033e-40d4-ac22-2830774814a9 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: No + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: knitr +LaTeX: pdfLaTeX + +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageCheckArgs: --as-cran --run-donttest +PackageRoxygenize: rd,collate,namespace + +QuitChildProcessesOnExit: Yes +DisableExecuteRprofile: Yes diff --git a/tests/testthat/test-check_collinearity.R b/tests/testthat/test-check_collinearity.R index c6b192643..fadd683a6 100644 --- a/tests/testthat/test-check_collinearity.R +++ b/tests/testthat/test-check_collinearity.R @@ -300,3 +300,113 @@ test_that("check_collinearity, validate adjusted vif against car", { expect_equal(out1[, 1], out2$VIF, tolerance = 1e-3, ignore_attr = TRUE) expect_equal(out1[, 3], out2$SE_factor, tolerance = 1e-3, ignore_attr = TRUE) }) + +test_that("check_collinearity, ordinal clmm models", { + skip_if_not_installed("ordinal") + set.seed(999) + n <- 500 + x_continuous <- rnorm(n, mean = 0, sd = 1) + x_binary <- sample(c(-0.5, 0.5), size = n, replace = TRUE, prob = c(0.85, 0.15)) + subject_id <- factor(rep(1:50, each = 10)) + random_intercepts <- rnorm(50, 0, 1) + latent_y <- 2 * + x_continuous + + 3 * x_binary + + random_intercepts[as.numeric(subject_id)] + + rlogis(n) + y_ordinal <- cut( + latent_y, + breaks = 15, + ordered_result = TRUE + ) + dat <- data.frame(y_ordinal, x_continuous, x_binary, subject_id) + mod_clmm <- ordinal::clmm( + y_ordinal ~ x_continuous + x_binary + (1 | subject_id), + data = dat + ) + out <- check_collinearity(mod_clmm) + expect_s3_class(out, "check_collinearity") + expect_identical(out$Term, c("x_continuous", "x_binary")) + expect_equal(out$VIF, c(1.12, 1.12), tolerance = 0.05) +}) + +test_that("check_collinearity, ordinal clm models", { + skip_if_not_installed("ordinal") + set.seed(999) + n <- 500 + x_continuous <- rnorm(n, mean = 0, sd = 1) + x_binary <- sample(c(-0.5, 0.5), size = n, replace = TRUE, prob = c(0.85, 0.15)) + latent_y <- 2 * x_continuous + 3 * x_binary + rlogis(n) + y_ordinal <- cut( + latent_y, + breaks = 15, + ordered_result = TRUE + ) + dat <- data.frame(y_ordinal, x_continuous, x_binary) + mod_clm <- ordinal::clm( + y_ordinal ~ x_continuous + x_binary, + data = dat + ) + out <- check_collinearity(mod_clm) + expect_s3_class(out, "check_collinearity") + expect_identical(out$Term, c("x_continuous", "x_binary")) + expect_equal(out$VIF, c(1.11, 1.11), tolerance = 0.05) +}) + +test_that("check_collinearity, ordinal clmm models with offset", { + skip_if_not_installed("ordinal") + set.seed(999) + n <- 500 + x_continuous <- rnorm(n, mean = 0, sd = 1) + x_binary <- sample(c(-0.5, 0.5), size = n, replace = TRUE, prob = c(0.85, 0.15)) + x_offset <- rnorm(n, mean = 0, sd = 0.5) + subject_id <- factor(rep(1:50, each = 10)) + random_intercepts <- rnorm(50, 0, 1) + + latent_y <- 2 * + x_continuous + + 3 * x_binary + + random_intercepts[as.numeric(subject_id)] + + x_offset + + rlogis(n) + y_ordinal <- cut(latent_y, breaks = 15, ordered_result = TRUE) + dat <- data.frame(y_ordinal, x_continuous, x_binary, x_offset, subject_id) + mod_clmm_offset <- ordinal::clmm( + y_ordinal ~ x_continuous + x_binary + offset(x_offset) + (1 | subject_id), + data = dat + ) + out <- check_collinearity(mod_clmm_offset) + expect_s3_class(out, "check_collinearity") + expect_identical(out$Term, c("x_continuous", "x_binary")) + expect_equal(out$VIF, c(1.12, 1.12), tolerance = 0.05) +}) + +test_that("check_collinearity, ordinal clm models with offset", { + skip_if_not_installed("ordinal") + set.seed(999) + n <- 500 + x_continuous <- rnorm(n, mean = 0, sd = 1) + x_binary <- sample(c(-0.5, 0.5), size = n, replace = TRUE, prob = c(0.85, 0.15)) + x_offset <- rnorm(n, mean = 0, sd = 0.5) + latent_y <- 2 * x_continuous + 3 * x_binary + x_offset + rlogis(n) + y_ordinal <- cut(latent_y, breaks = 15, ordered_result = TRUE) + dat <- data.frame(y_ordinal, x_continuous, x_binary, x_offset) + mod_clm_offset <- ordinal::clm( + y_ordinal ~ x_continuous + x_binary + offset(x_offset), + data = dat + ) + out <- check_collinearity(mod_clm_offset) + expect_s3_class(out, "check_collinearity") + expect_identical(out$Term, c("x_continuous", "x_binary")) + expect_equal(out$VIF, c(1.11, 1.11), tolerance = 0.05) +}) + +test_that("check_collinearity, standard lm models with offset", { + # Standard linear model with an offset + m_lm_offset <- lm(mpg ~ wt + cyl + offset(disp), data = mtcars) + out <- check_collinearity(m_lm_offset) + expect_s3_class(out, "check_collinearity") + # The offset should not be evaluated for collinearity + expect_identical(out$Term, c("wt", "cyl")) + expect_false("disp" %in% out$Term) +})