From 93960117d7eaf506e462b19b7941a446a97aa164 Mon Sep 17 00:00:00 2001 From: PavanLomati Date: Tue, 21 Apr 2026 19:54:54 +0530 Subject: [PATCH 1/2] Load_pk-data --- NAMESPACE | 3 +++ NEWS.md | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 34b4580e..a69fd304 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,6 +158,8 @@ export(getDepVar) export(getGroups) export(getIndepVar) export(get_halflife_points) +export(get_mapped_column) +export(get_pk_patterns) export(group_by) export(inner_join) export(interp.extrap.conc) @@ -165,6 +167,7 @@ export(interp.extrap.conc.dose) export(interpolate.conc) export(is_sparse_pk) export(left_join) +export(load_pk_data) export(mutate) export(normalize) export(normalize_by_col) diff --git a/NEWS.md b/NEWS.md index c13f0a91..107f4a07 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,11 @@ the dosing including dose amount and route. # PKNCA 0.12.2 +## New features +* `load_pk_data()` now provides a unified pipeline for loading, classifying, + cleaning, and standardising PK data across multiple file formats, with + improved column detection, BLQ handling, and decimal formatting. + ## Bug Fixes * `normalize.data.frame()` no longer triggers a dplyr deprecation warning From d6f27f4c73fbd162ab1fc053b7e747a64f56dd05 Mon Sep 17 00:00:00 2001 From: PavanLomati Date: Wed, 6 May 2026 18:12:19 +0530 Subject: [PATCH 2/2] load_pk_data(), a new data-loading utility for the PKNCA package. It provides a standardized workflow to load, classify, clean, and preprocess pharmacokinetic (PK) data from multiple file formats prior to NCA analysis. Key Features Automatic detection of concentration and dose datasets Flexible column mapping via regex patterns Support for both separate and combined datasets BLQ handling with interpolation (linear pre-Cmax, log-linear post-Cmax) Decimal precision standardization Minimal assumptions about input data structure --- DESCRIPTION | 8 +- NEWS.md | 2 + R/load_pk_data.R | 911 +++++++++++++++++++++++++++++ data-raw/test/EX.xpt | Bin 0 -> 4800 bytes data-raw/test/PC.xpt | Bin 0 -> 44400 bytes man/auto_create_subject_id.Rd | 13 + man/clean_blq_values.Rd | 16 + man/count_decimal_places.Rd | 12 + man/create_column_mapping.Rd | 12 + man/decimal_formatter.Rd | 14 + man/detect_role.Rd | 12 + man/get_mapped_column.Rd | 25 + man/get_pk_patterns.Rd | 21 + man/interpolate_subject.Rd | 12 + man/load_and_bind_pk_files.Rd | 12 + man/load_pk_data.Rd | 49 ++ man/process_conc_data.Rd | 12 + man/process_dose_data.Rd | 12 + man/quick_validate.Rd | 12 + man/read_column_names_only.Rd | 13 + man/read_one_pk_file.Rd | 12 + man/remove_empty_data.Rd | 13 + man/resolve_pk_column_roles.Rd | 29 + man/resolve_pk_files.Rd | 13 + tests/testthat/test-load_pk_data.R | 540 +++++++++++++++++ 25 files changed, 1773 insertions(+), 2 deletions(-) create mode 100644 R/load_pk_data.R create mode 100644 data-raw/test/EX.xpt create mode 100644 data-raw/test/PC.xpt create mode 100644 man/auto_create_subject_id.Rd create mode 100644 man/clean_blq_values.Rd create mode 100644 man/count_decimal_places.Rd create mode 100644 man/create_column_mapping.Rd create mode 100644 man/decimal_formatter.Rd create mode 100644 man/detect_role.Rd create mode 100644 man/get_mapped_column.Rd create mode 100644 man/get_pk_patterns.Rd create mode 100644 man/interpolate_subject.Rd create mode 100644 man/load_and_bind_pk_files.Rd create mode 100644 man/load_pk_data.Rd create mode 100644 man/process_conc_data.Rd create mode 100644 man/process_dose_data.Rd create mode 100644 man/quick_validate.Rd create mode 100644 man/read_column_names_only.Rd create mode 100644 man/read_one_pk_file.Rd create mode 100644 man/remove_empty_data.Rd create mode 100644 man/resolve_pk_column_roles.Rd create mode 100644 man/resolve_pk_files.Rd create mode 100644 tests/testthat/test-load_pk_data.R diff --git a/DESCRIPTION b/DESCRIPTION index 1f4e5fbc..b51425f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Authors@R: c( Imports: checkmate, dplyr (>= 1.1.0), - digest, + digest, nlme, purrr, rlang, @@ -23,6 +23,7 @@ Suggests: covr, cowplot, ggplot2, + haven, knitr, labeling, pander, @@ -30,7 +31,10 @@ Suggests: spelling, testthat (>= 3.0.0), units, - withr + withr, + rio, + zoo, + janitor Description: Compute standard Non-Compartmental Analysis (NCA) parameters for typical pharmacokinetic analyses and summarize them. License: AGPL-3 diff --git a/NEWS.md b/NEWS.md index 107f4a07..bf66a9d5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ the dosing including dose amount and route. # PKNCA 0.12.2 +# PKNCA (development version) + ## New features * `load_pk_data()` now provides a unified pipeline for loading, classifying, cleaning, and standardising PK data across multiple file formats, with diff --git a/R/load_pk_data.R b/R/load_pk_data.R new file mode 100644 index 00000000..dd2f7a2b --- /dev/null +++ b/R/load_pk_data.R @@ -0,0 +1,911 @@ +# ============================================================================= +# PK Data Loader +# ============================================================================= +# Loads, classifies, cleans, and standardises pharmacokinetic data from +# multiple file formats (XPT, XLSX, XLS, CSV, TXT, SAS7BDAT). +# ============================================================================= + +# ============================================================================= +# 1. Public API +# ============================================================================= + +#' Load and Process Pharmacokinetic Data +#' +#' Streamlines loading, cleaning, and standardisation of PK data from multiple +#' file formats (XPT, XLSX, XLS, CSV, TXT, SAS7BDAT). +#' +#' @param path Character. Directory containing PK files. Default: \code{getwd()}. +#' @param file_types Character vector. File extensions to search for. Default: +#' \code{c("xpt","xlsx","xls","csv","txt","sas7bdat")}. +#' @param patterns Named list. Regex patterns for PK column roles. +#' See \code{\link{get_pk_patterns}}. +#' @param decimal_control Logical. Apply smart decimal formatting? Default \code{TRUE}. +#' @param blq_handling Logical. Apply BLQ interpolation? Default \code{TRUE}. +#' @param bind_rows Logical. Bind multiple files into one data frame? Default \code{TRUE}. +#' @param verbose Logical. Print detailed progress? Default \code{TRUE}. +#' +#' @return A list of class \code{pk_data_list}: +#' \item{conc}{Concentration data frame (if found)} +#' \item{dose}{Dose data frame (if found)} +#' +#' @export +#' @examples +#' \dontrun{ +#' pk <- load_pk_data(path = "path/to/data", verbose = TRUE) +#' conc <- pk$conc +#' dose <- pk$dose +#' } +load_pk_data <- function(path = getwd(), + file_types = NULL, + patterns = get_pk_patterns(), + decimal_control = TRUE, + blq_handling = TRUE, + bind_rows = TRUE, + verbose = TRUE) { + + # ---- argument validation -------------------------------------------------- + checkmate::assert_string(path, min.chars = 1) + checkmate::assert_directory_exists(path) + checkmate::assert_character(file_types, null.ok = TRUE, min.chars = 1) + checkmate::assert_list(patterns, min.len = 1, names = "named") + checkmate::assert_flag(decimal_control) + checkmate::assert_flag(blq_handling) + checkmate::assert_flag(bind_rows) + checkmate::assert_flag(verbose) + + lapply(patterns, function(p) { + if (!is.character(p)) + rlang::abort("All entries in `patterns` must be character vectors.") + }) + + # ---- 1. resolve files ----------------------------------------------------- + file_set <- resolve_pk_files( + path = path, + file_types = file_types, + patterns = patterns, + verbose = verbose + ) + + # ---- 2. load & (optionally) bind ------------------------------------------ + result <- list() + type_map <- c(conc = "concentration", dose = "dose") + + for (role in names(type_map)) { + files <- file_set[[role]] + if (length(files) == 0) next + + result[[role]] <- load_and_bind_pk_files( + paths = files, + type = type_map[[role]], + patterns = patterns, + verbose = verbose, + bind_rows = bind_rows + ) + } + + quick_validate(result) # lightweight safety net + + # ---- 3. post-processing --------------------------------------------------- + process_map <- list( + conc = function(x) process_conc_data(x, decimal_control, blq_handling, verbose), + dose = function(x) process_dose_data(x, decimal_control, verbose) + ) + + for (role in names(process_map)) { + if (!is.null(result[[role]])) + result[[role]] <- process_map[[role]](result[[role]]) + } + + class(result) <- c("pk_data_list", class(result)) + result +} + +#' Lightweight Result Validation +#' +#' @keywords internal +quick_validate <- function(result) { + if (is.null(result$conc) && is.null(result$dose)){ + rlang::abort("No valid PK data loaded \u2014 neither conc nor dose was found.") + } + + invisible(TRUE) +} + +# ============================================================================= +# 2. File Resolution +# ============================================================================= + +#' Resolve PK Files in a Directory +#' +#' Scans \code{path} for supported files and classifies each as +#' "conc", "dose", "combined", or "unknown". +#' +#' @keywords internal +resolve_pk_files <- function(path, + file_types = NULL, + patterns, + verbose = FALSE) { + + # path validity already guaranteed by load_pk_data(); skip redundant check + if (is.null(file_types)){ + file_types <- c("xpt", "xlsx", "xls", "csv", "txt", "sas7bdat") + } + + ext_pat <- paste0("\\.(", paste(file_types, collapse = "|"), ")$") + + if (verbose){message("Supported extensions: ", paste(file_types, collapse = ", "))} + + files <- list.files(path, pattern = ext_pat, + full.names = TRUE, ignore.case = TRUE) + + if (length(files) == 0){ + rlang::abort(sprintf("No supported files found in: %s", path)) + } + + # ---- role detection ------------------------------------------------------- + roles <- vapply( + X = files, + FUN = detect_role, + FUN.VALUE = character(1), + patterns = patterns, + verbose = verbose + ) + + if (verbose) { + message("File role detection:") + for (i in seq_along(files)) + message(sprintf(" %s \u2192 %s", basename(files[i]), roles[i])) + } + + combined_files <- files[roles == "combined"] + conc_only <- files[roles == "conc"] + dose_only <- files[roles == "dose"] + + # If any combined files exist, use them exclusively for both roles + if (length(combined_files) > 0) { + if (verbose) message("Combined conc+dose file(s) found \u2192 single-file mode") + return(structure( + list(conc = combined_files, dose = combined_files), + class = "pk_file_set" + )) + } + + # Warn when one role is absent + if (length(conc_only) == 0 && length(dose_only) == 0) + rlang::abort(sprintf( + "Neither concentration nor dose file detected.\nFiles found: %s\n%s", + paste(basename(files), collapse = ", "), + "Ensure files contain required columns (conc/concentration and/or dose/amt)." + )) + + if (length(conc_only) == 0) + rlang::warn(sprintf( + "No concentration file detected. Dose file(s): %s", + paste(basename(dose_only), collapse = ", ") + )) + + if (length(dose_only) == 0) + rlang::warn(sprintf( + "No dose file detected. Concentration file(s): %s", + paste(basename(conc_only), collapse = ", ") + )) + + structure(list(conc = conc_only, dose = dose_only), class = "pk_file_set") +} + + +# ============================================================================= +# 3. File Loading & Binding +# ============================================================================= + +#' Load and Bind Multiple PK Files +#' +#' @keywords internal +load_and_bind_pk_files <- function(paths, type, patterns, verbose, bind_rows) { + + missing_files <- paths[!file.exists(paths)] + if (length(missing_files) > 0){ + rlang::abort(sprintf("File(s) do not exist:\n %s", paste(missing_files, collapse = "\n "))) + } + + dfs <- lapply(paths, read_one_pk_file, patterns = patterns, verbose = verbose) + + if (length(dfs) == 1) return(dfs[[1]]) + + if (!bind_rows) return(dfs) + + # -- check mapping consistency across files --------------------------------- + mappings <- lapply(dfs, function(d) attr(d, "column_mapping")) + reference_mapping <- mappings[[1]] + + inconsistent <- vapply(mappings[-1], function(m) { + !identical( + unlist(m[!is.na(unlist(m))]), + unlist(reference_mapping[!is.na(unlist(reference_mapping))]) + ) + }, logical(1)) + + if (any(inconsistent)) + rlang::warn( + c("!" = "Column mappings differ across files being bound together.", + "i" = "Using the mapping from the first file.", + ">" = "Verify all files share the same column structure.") + ) + + # -- bind & restore attributes ---------------------------------------------- + bound <- dplyr::bind_rows(dfs) + attr(bound, "column_mapping") <- reference_mapping + class(bound) <- c("pk_data", class(bound)) + + if (verbose){ + message(sprintf(" \u2022 Bound %d %s file(s) \u2192 %d rows", length(dfs), type, nrow(bound))) + } + + bound +} + + +#' Read a Single PK File +#' +#' @keywords internal +read_one_pk_file <- function(filepath, patterns, verbose = TRUE) { + + if (verbose){rlang::inform(sprintf("Loading: %s", basename(filepath)))} + + df <- tryCatch( + rio::import(file = filepath, which = 1), + error = function(e) + rlang::abort(sprintf("Failed to read '%s': %s", filepath, e$message)) + ) + + if (nrow(df) == 0){rlang::abort(sprintf("Empty file: %s", basename(filepath)))} + + mapping <- create_column_mapping(names(df), patterns) + attr(df, "column_mapping") <- mapping + class(df) <- c("pk_data", class(df)) + df +} + + +# ============================================================================= +# 4. Role Detection +# ============================================================================= + +#' Detect the PK Role of a File +#' +#' @keywords internal +detect_role <- function(f, patterns, verbose = FALSE) { + + col_names <- read_column_names_only(f, verbose = verbose) + if (is.null(col_names)) return("unknown") + + if (verbose){message(sprintf(" Columns in %s: %s", basename(f), paste(col_names, collapse = ", ")))} + + role_matches <- resolve_pk_column_roles( + names_vec = col_names, + patterns = patterns, + mode = "detect_all", + stop_on_ambiguous = FALSE + ) + + has_conc <- isTRUE(role_matches["conc"]) + has_dose <- isTRUE(role_matches["dose"]) + + if (has_conc && has_dose) return("combined") + if (has_conc) return("conc") + if (has_dose) return("dose") + + if (verbose){message(sprintf(" %s \u2192 no PK columns found", basename(f)))} + + "unknown" +} + + +#' Read Only Column Names from a File +#' +#' Attempts a zero-row or one-row read to obtain column names without loading +#' the full dataset. Handles formats that ignore \code{n_max}. +#' +#' @keywords internal +read_column_names_only <- function(f, verbose = FALSE) { + + ext <- tolower(tools::file_ext(f)) + + col_names <- tryCatch({ + if (ext %in% c("xpt", "sas7bdat")) { + # haven is more reliable for these formats + if (requireNamespace("haven", quietly = TRUE)) { + reader <- if (ext == "xpt") haven::read_xpt else haven::read_sas + tmp <- reader(f, n_max = 1L) + names(tmp) + } else { + tmp <- rio::import(file = f, which = 1) + names(tmp) + } + } else { + tmp <- rio::import(file = f, which = 1, n_max = 1L) + names(tmp) + } + }, error = function(e) { + if (verbose){message(sprintf(" Could not read columns from '%s': %s", basename(f), e$message))} + + NULL + }) + + col_names +} + + +# ============================================================================= +# 5. Column Mapping +# ============================================================================= + +#' Get Default PK Column Patterns +#' +#' Returns a named list of regex patterns used to identify concentration, dose, +#' subject, and time columns. +#' +#' @return Named list with patterns for \code{conc}, \code{dose}, +#' \code{subject}, and \code{time}. +#' @export +#' @examples +#' patterns <- get_pk_patterns() +#' # Add SDTM PCORRES support: +#' # patterns$conc <- c(patterns$conc, "^pcorres$", "^pcstresc$") +get_pk_patterns <- function() { + list( + # FIX #7: tightened greedy patterns with word boundaries / anchors + conc = c( + "^conc$", "^aval$", "^pcstresn$", "^dv$", "^concentration$", + # "^pcorres$", # opt-in: SDTM PCORRES (character result) + # "^pcstresc$", # opt-in: SDTM PCSTRESC (standardised char result) + "^conc_", # conc_ prefix (e.g. conc_plasma) + "_conc$", # _conc suffix + "\\bconcentration\\b", + "\\bng[_/]?ml\\b", + "\\bmg[_/]?ml\\b", + "\\bug[_/]?ml\\b" + ), + dose = c( + "^dose$", "^amount$", "^exdose$", "^amt$", + "^dose_", # dose_ prefix + "_dose$", # _dose suffix + "\\bmg$", + "\\bug$" + ), + subject = c( + "^usubjid$", "^id$", "^subject$", "^subjectid$", "^ptno$", + "^subj$", "^subj_id$", "^subject_id$" + ), + time = c( + "^time$", "^pctptnum$", "^atptn$", "^tad$", "^tafd$", "^hr$", + "^hours$", "^time_h$", "^time_hr$", + "\\btime\\s*\\(.*\\)" # e.g. "Time (h)" + ) + ) +} + + +#' Create Column Mapping from Column Names +#' +#' @keywords internal +create_column_mapping <- function(original_names, patterns) { + matches <- resolve_pk_column_roles( + names_vec = tolower(original_names), + patterns = patterns, + mode = "match", + stop_on_ambiguous = FALSE + ) + mapping <- vector("list", length(patterns)) + names(mapping) <- names(patterns) + for (role in names(patterns)) { + idx <- which(matches[[role]]) + mapping[[role]] <- if (length(idx) > 0) original_names[idx[1L]] else NA_character_ + } + mapping +} + + +#' Get Mapped Column Name +#' +#' Returns the actual column name for a given PK role. +#' +#' @param data A data frame created by \code{load_pk_data()}. +#' @param role Character. One of \code{"subject"}, \code{"time"}, +#' \code{"conc"}, or \code{"dose"}. +#' +#' @return Character string - the matched column name. +#' @export +#' @examples +#' \dontrun{ +#' time_col <- get_mapped_column(pk$conc, "time") +#' } +get_mapped_column <- function(data, role) { + + mapping <- attr(data, "column_mapping") + if (is.null(mapping)){ + rlang::abort( + message = "Missing column mapping.", + body = c( + "i" = "This data frame was not produced by load_pk_data().", + ">" = "Use load_pk_data() to load and map your data." + ) + ) + } + + col <- mapping[[role]] + if (is.na(col)) { + available <- names(mapping)[!is.na(unlist(mapping))] + rlang::abort( + message = sprintf("Role '%s' not found in column mapping.", role), + body = c( + "i" = sprintf("Available roles: %s", paste(available, collapse = ", ")), + ">" = "Check your data or adjust column patterns via get_pk_patterns()." + ) + ) + } + col +} + + +#' Resolve PK Column Roles +#' +#' Matches column names against PK role patterns. +#' +#' @param file Character. Optional file path for error messages. +#' @param names_vec Character vector (or data frame) of column names. +#' @param patterns Named list of regex patterns. +#' @param mode One of \code{"match"}, \code{"detect"}, \code{"detect_all"}. +#' @param stop_on_ambiguous Logical. Abort if multiple columns match one role? +#' +#' @keywords internal +resolve_pk_column_roles <- function(file = NULL, + names_vec, + patterns, + mode = c("match", "detect", "detect_all"), + stop_on_ambiguous = TRUE) { + + mode <- match.arg(mode) + + if (is.data.frame(names_vec)) { + if (is.null(names_vec) || nrow(names_vec) == 0) { + return(switch(mode, + detect = FALSE, + detect_all = FALSE, + lapply(patterns, function(x) logical(0)))) + } + names_vec <- names(names_vec) + } + + lower_names <- tolower(names_vec) + + # Duplicate column check + dupes <- lower_names[duplicated(lower_names)] + if (length(dupes) > 0){ + rlang::abort(sprintf( + "%sDuplicate column names (case-insensitive): %s", + if (!is.null(file)) sprintf("File '%s': ", basename(file)) else "", + paste(unique(dupes), collapse = ", ") + )) + } + + if (length(lower_names) == 0) { + return(switch(mode, + detect = FALSE, + detect_all = FALSE, + lapply(patterns, function(x) logical(0)))) + } + + # Match each role + role_hits <- lapply(patterns, function(pats) { + combined_pat <- paste0("(", paste(pats, collapse = "|"), ")") + grepl(combined_pat, lower_names, ignore.case = TRUE, perl = TRUE) + }) + + # Ambiguity check + if (stop_on_ambiguous) { + ambiguous_roles <- names(role_hits)[vapply(role_hits, sum, integer(1)) > 1L] + if (length(ambiguous_roles) > 0) { + details <- vapply(ambiguous_roles, function(r) { + hits <- which(role_hits[[r]]) + sprintf("%s \u2190 %s", r, paste(names_vec[hits], collapse = ", ")) + }, character(1)) + rlang::abort(sprintf( + "%sAmbiguous column matches:\n %s\n\nFix: Rename columns or supply custom patterns.", + if (!is.null(file)) sprintf("File '%s': ", basename(file)) else "", + paste(details, collapse = "\n ") + )) + } + } + + switch(mode, + detect = any(vapply(role_hits, any, logical(1))), + detect_all = vapply(role_hits, any, logical(1)), + role_hits # "match" \u2014 return the full logical list + ) +} + + +# ============================================================================= +# 6. Post-Processing: Concentration & Dose +# ============================================================================= + +#' Process Concentration Data +#' +#' @keywords internal +process_conc_data <- function(df, decimal_control, blq_handling, verbose) { + + df <- remove_empty_data(df, verbose) + + mapping <- attr(df, "column_mapping") + if (is.na(mapping$subject)) + df <- auto_create_subject_id(df, verbose = verbose) + + if (blq_handling) + df <- clean_blq_values(df, verbose) + + if (decimal_control) + df <- decimal_formatter( + df = df, + col_max_map = list(time = 1L, conc = 3L), + verbose = verbose + ) + + df +} + + +#' Process Dose Data +#' +#' @keywords internal +process_dose_data <- function(df, decimal_control, verbose) { + + df <- remove_empty_data(df, verbose) + + mapping <- attr(df, "column_mapping") + if (is.na(mapping$subject)) + df <- auto_create_subject_id(df, verbose = verbose) + + if (decimal_control) + df <- decimal_formatter( + df = df, + col_max_map = list(dose = 2L), + verbose = verbose + ) + + df +} + + +# ============================================================================= +# 7. Data Cleaning Utilities +# ============================================================================= + +#' Remove Empty Rows and Columns +#' +#' Thin wrapper around \code{janitor::remove_empty()} that preserves +#' custom attributes. +#' +#' @keywords internal +remove_empty_data <- function(df, verbose) { + + orig_rows <- nrow(df) + orig_cols <- ncol(df) + + cleaned <- janitor::remove_empty(dat = df, which = c("rows", "cols")) + + removed_rows <- orig_rows - nrow(cleaned) + removed_cols <- orig_cols - ncol(cleaned) + + if ((removed_rows > 0 || removed_cols > 0) && verbose){ + message(sprintf(" \u2022 Removed %d empty row(s), %d empty col(s)", removed_rows, removed_cols)) + } + + # Preserve attributes + attr(cleaned, "column_mapping") <- attr(df, "column_mapping") + class(cleaned) <- class(df) + cleaned +} + + +#' Auto-Create Subject ID Column +#' +#' Creates a default subject ID (\code{"SUBJ001"}) when no subject column is +#' detected. Warns if duplicate time values suggest multiple subjects. +#' +#' @keywords internal +auto_create_subject_id <- function(df, verbose = FALSE) { + + mapping <- attr(df, "column_mapping") + + # Already mapped, nothing to do + if (!is.null(mapping$subject) && + !is.na(mapping$subject) && + mapping$subject %in% names(df)) { + return(df) + } + + warned <- FALSE + time_col <- mapping$time + + if (!is.na(time_col) && time_col %in% names(df)) { + time_vals <- df[[time_col]] + if (any(duplicated(stats::na.omit(time_vals)))) { + warned <- TRUE + rlang::warn(c( + "!" = "No subject ID column detected.", + "i" = "Duplicate time values found \u2014 possible multiple subjects.", + ">" = "All data assigned to single subject ID = 'SUBJ001'.", + ">" = "Add a subject column if multiple subjects are present." + )) + } + } + + df$ID <- "SUBJ001" + mapping$subject <- "ID" + attr(df, "column_mapping") <- mapping + + if (verbose && !warned){rlang::inform("No subject ID detected \u2014 created default column 'ID' = 'SUBJ001'.")} + + df +} + + +# ============================================================================= +# 8. BLQ Handling +# ============================================================================= + +#' Handle BLQ (Below Limit of Quantification) Values +#' +#' Replaces BLQ strings with \code{NA} then interpolates using linear +#' interpolation pre-Cmax and log-linear interpolation post-Cmax. +#' +#' Negative-time rows are flagged with a warning before removal. +#' +#' @keywords internal +clean_blq_values <- function(data, verbose) { + + subj_col <- get_mapped_column(data, "subject") + time_col <- get_mapped_column(data, "time") + conc_col <- get_mapped_column(data, "conc") + + blq_strings <- c("blq", "bloq", "bql", "lloq", "na", "nr", "", + "nd", " 0) { + rlang::warn(c( + "!" = sprintf("%d row(s) with negative time values will be removed.", + length(neg_time_rows)), + "i" = "Negative time indicates pre-dose sampling \u2014 handle separately if needed.", + ">" = "Rows removed from BLQ processing pipeline." + )) + } + + # ---- step 1: mutate ------------------------------------------------------- + + processed <- data + + # keep original + processed$conc_original <- processed[[conc_col]] + + # transform + tmp <- trimws(tolower(as.character(processed[[conc_col]]))) + + processed[[conc_col]] <- dplyr::case_when( + tmp %in% blq_strings ~ NA_real_, + tmp == "0" ~ 0, + TRUE ~ suppressWarnings(as.numeric(tmp)) + ) + + # ---- step 2: filter ------------------------------------------------------- + processed <- dplyr::filter( + processed, + !is.na(.data[[time_col]]), + is.finite(suppressWarnings(as.numeric(as.character(.data[[time_col]])))), + suppressWarnings(as.numeric(as.character(.data[[time_col]]))) >= 0 + ) + + # ---- step 3: arrange ------------------------------------------------------ + processed <- dplyr::arrange( + processed, + .data[[subj_col]], + .data[[time_col]] + ) + + # ---- step 4: group-wise interpolation ------------------------------------ + split_data <- split(processed, processed[[subj_col]]) + + interpolated_list <- lapply(split_data, function(sub_df) { + interpolate_subject( + sub_df = sub_df, + time_col = time_col, + conc_col = conc_col, + verbose = verbose + ) + }) + + cleaned <- dplyr::bind_rows(interpolated_list) + + # ---- final arrange -------------------------------------------------------- + cleaned <- dplyr::arrange( + cleaned, + .data[[subj_col]], + .data[[time_col]] + ) + + # ---- restore attributes --------------------------------------------------- + attr(cleaned, "column_mapping") <- attr(data, "column_mapping") + class(cleaned) <- class(data) + cleaned +} + + +#' Interpolate BLQ Values for a Single Subject +#' +#' @keywords internal +interpolate_subject <- function(sub_df, time_col, conc_col, verbose) { + + if (nrow(sub_df) == 0) return(sub_df) + + sub_df <- dplyr::arrange(sub_df, .data[[time_col]]) + t_vals <- sub_df[[time_col]] + conc_vals <- sub_df[[conc_col]] # FIX #2 + method <- rep("observed", length(t_vals)) + + obs_idx <- which(!is.na(conc_vals)) + + # All BLQ set everything to zero + if (length(obs_idx) == 0) { + sub_df[[conc_col]] <- 0 + sub_df$method <- "all-blq" + return(sub_df) + } + + if (length(obs_idx) >= 2) { + cmax_idx <- obs_idx[which.max(conc_vals[obs_idx])] + cmax_time <- t_vals[cmax_idx] + + first_obs <- min(t_vals[obs_idx]) + last_obs <- max(t_vals[obs_idx]) + middle_na <- which(is.na(conc_vals) & t_vals > first_obs & t_vals < last_obs) + + if (length(middle_na) > 0) { + conc_interp <- tryCatch( + zoo::na.approx(conc_vals, x = t_vals, na.rm = FALSE), + error = function(e) conc_vals + ) + + for (i in middle_na) { + if (t_vals[i] <= cmax_time) { + conc_vals[i] <- conc_interp[i] + method[i] <- "pre-cmax-linear" + } else { + before <- max(obs_idx[obs_idx < i]) + after <- min(obs_idx[obs_idx > i]) + if (!is.na(before) && !is.na(after) && + conc_vals[before] > 0 && conc_vals[after] > 0) { + lambda <- log(conc_vals[before] / conc_vals[after]) / + (t_vals[after] - t_vals[before]) + conc_vals[i] <- conc_vals[before] * exp(-lambda * (t_vals[i] - t_vals[before])) + method[i] <- "post-cmax-loglinear" + } else { + conc_vals[i] <- conc_interp[i] + method[i] <- "post-cmax-linear" + } + } + } + } + } + + # Pre-dose zeros (before first observed value) + first_obs_idx <- min(obs_idx) + if (first_obs_idx > 1L) { + conc_vals[seq_len(first_obs_idx - 1L)] <- 0 + method[seq_len(first_obs_idx - 1L)] <- "pre-dose-zero" + } + + sub_df[[conc_col]] <- conc_vals + sub_df$method <- method + sub_df +} + + +# ============================================================================= +# 9. Decimal Formatting +# ============================================================================= + +#' Count Decimal Places in a Numeric Value +#' +#' @keywords internal +count_decimal_places <- function(x) { + if (is.na(x) || !is.finite(x)) return(0L) + x_str <- sub("0+$", "", sprintf("%.15f", x)) + if (grepl("\\.", x_str, fixed = FALSE)) + nchar(sub(".*\\.", "", x_str)) + else + 0L +} + + +#' Apply Consistent Decimal Formatting to PK Columns +#' +#' Decimal information is stored as an attribute on the *data frame* +#' (\code{"decimal_info"}) rather than on a copy of an individual column, +#' ensuring the metadata actually persists. +#' +#' @keywords internal +decimal_formatter <- function(df, col_max_map, verbose) { + + mapping <- attr(df, "column_mapping") + if (is.null(mapping)) + rlang::abort("Internal error: column_mapping attribute is missing.") + + decimal_info <- attr(df, "decimal_info") %||% list() + + for (role in names(col_max_map)) { + col <- mapping[[role]] + if (is.null(col) || is.na(col) || !col %in% names(df)) next + if (!is.numeric(df[[col]])) next + + places <- vapply(df[[col]], count_decimal_places, integer(1)) + optimal <- min(max(places, na.rm = TRUE), col_max_map[[role]]) + + # store on the data frame, not on a discarded copy of the column + decimal_info[[col]] <- optimal + + if (verbose){rlang::inform(sprintf(" \u2022 [%s] %s: using %d decimal place(s)", role, col, optimal))} + } + + attr(df, "decimal_info") <- decimal_info + df +} + + +# ============================================================================= +# 10. Usage Example (wrapped in if (FALSE) so it never auto-runs) +# ============================================================================= +if (FALSE) { + + data_dir <- "../data-raw/test" + + pk <- load_pk_data( + path = data_dir, + bind_rows = TRUE, + decimal_control = TRUE, + blq_handling = TRUE, + verbose = TRUE + ) + + print(pk) + + conc <- pk$conc + dose <- pk$dose + + # Retrieve mapped column names + time_col <- get_mapped_column(conc, "time") + conc_col <- get_mapped_column(conc, "conc") + subj_col <- get_mapped_column(conc, "subject") + + # Ready for PKNCA + conc_obj <- PKNCAconc( + conc, + formula = as.formula( + sprintf("%s ~ %s | %s", conc_col, time_col, subj_col) + ) + ) + dose_obj <- PKNCAdose( + dose, + formula = as.formula( + sprintf("%s ~ %s | %s", + get_mapped_column(dose, "dose"), + get_mapped_column(dose, "time"), + get_mapped_column(dose, "subject")) + ) + ) + data_obj <- PKNCAdata(conc_obj, dose_obj) + result <- pk.nca(data_obj) +} \ No newline at end of file diff --git a/data-raw/test/EX.xpt b/data-raw/test/EX.xpt new file mode 100644 index 0000000000000000000000000000000000000000..a94595c6b4752e40b16b63f34cb63f9906097a7e GIT binary patch literal 4800 zcmd6p(QeZ)7=^ttMmIJ=O zJynDMb$DJznOBSj&C7XSbjYK^fA{Hq=P)R~*+<(?1~LeHe$)%Qy|A-7x9_U&E}f}5 z?^ulcc@OHH*N@Nvzr$DdRQ2L)kecCZHZYX&T%*gEz9IJu%#;d80DR`=*vF4EoL&6DCnSIVM{fC z0^dnhU$(_8PBMXt^|fS~eR=Mb0gKhMd#B7Yt)BraQD$Y{hVP_BmTlH5v%F5ns99IR z!uo0+!go^Dmz^q$^WzKFN|qP!owUfZ#dgIkO{UWQfdOkJ%S-r9T4dQ_cg0yU7^0wq zf`u*BoZvfYk>wHF%kLK%%K=K(R8Bhz+!d3+$;NXfz#F3 zvW$7ZT%o*^sx0@*zN|0H7pDx>;`$vhSK8kHlD>LpUHYx=`)={x!nt4kN9x-_KBEUJ zYW(r@rAMBm{;80Ihu5Y1;Zn`d<0Pe1nF^9)F_8fe+6O#3@I%SFJ_Rf7=~eZdK4u-!r^qE?(@JHxZURpBlEb-Tu_WLfEdArfdDG6 z5ZpvD5+jC~ge7c}O**CMWK8Iw|$b%~3vMY3vQk(8mL7@x#)@s&+#zjM036-FzW z(=}h!P9@d+kN2L_=XbmN{Lj~~nYC(G>-BnTx~Dfg*Zg&MsNQPtJa6XF|Lt`17yQXT z&+Du9dHLT_DV)ypB0q@o;|o_U5(LD^r?P0OOs59HK>hyT8vM~??#bS7 zSFK9{gTdV|i;w^l!GQ1B^JRUyx8CR--1+cqMfxtcx;FO&9J6Yp`v00=ceSh5_h;^x zr^no=6`of)zEY_)RgUXx%*p0;WP|@9?I+>e#UFkVwNsks<@n0+Sx;BBqZ`jS_!O^+ z->|v~kC#gGyqr?`z?{B0>Y}0Ja&oCW;D?Wce>7E2sO!GkY`nP=X>l4j2Y$t#whd2T zD$Ti^P&u)#8&KwS=>mh-8jzO>c)V1a=f$fGGkL)?4nDez!h^9`gx2eW}!N8C&FK=<3n4rSgCu zHf48QUDrArrE9e3;c4)9wfbc|UMe+ZcYML6w{{_(bFe913qNo55j zOP2RnyQ`h^8a4B=NXC8|GOz!tDa$%h=yN7KH^2IYq&Vn#y7=l+uXDk|<#3ASE7#UL zt9@P7PCcco5B{gPxp=Mp>f#@mnQXyYkAc~)Ho9uv4Ic1tau7{svJoUx12I)5_(AC{ zThi~XSiWlYu$w=G^(Zv=jjgfqRYcg_s2nss$_cYu4ZHadE`@$Lt3 z@EkdC8N@z|qT-fO=x^L}=i#0A1cV%zgqR<4U;{u*pKk6Z=Ky&s!y$0wAXpOlkzmCw zqsW>s{ciiV)juZW5C%ta2!mq_#Pm(?a*#nO4$~buL_7$5me_h>Mb@79t?LfXB;$|= zfREz9yu~YzsumuWlX`>s5Kt^r9_p?f0v7oqmc=bcR%D|#t=+zT``=J;U^#@NIPifS zJaaeo1~1EDz9WYa!nwW(P5XdCzrOeF18402Eg1)XEC(@=!*t{z%5qrY$bqvs@p)+4 z2Ne3IiT6xCb#4WE`mcBv&nt99SCV?-$ljTA|n72w{+wyT~|D`AM!uIC2oFgm_eB{3HrJ z;g9)y)^k)G$owQ%t&SYfLKe4-g4R#Qbbc~aBON&~8FF9R@_<5r@{uEZZoQ1CeJ~tI z{A8%?7@*RAGK|4TVf%nWzch)keJD84_{mU>ax4!rlJI^Jns^k7tgY=yC=ch*v=0Rb z8b2B08(Qh|0L3BkA?{GzGKy^cW2c?J=+g0I9H{(cs75=Mha`}xA0@_5qR38Lzsq}g z$KO+NAoG)<8so@8O3wW>HXKl7BftF4<#O6*$T(2>$xw}TaNVRA z);viRhlv}E%1?%BoGXVY<$i46H$yA*p~ndOvw#DYpA6M_M-D84cy(g?H&N)tFO&KQ zGCvtAJ8rl1^MZkm;-QVxqEMfodT_?wt!rr7hlI{g3U!)mc?i=yE@$7ftk4%PgQ$H6 zQU9jlK;kEb@*FvEh`spC)(aH+ABN|)`YpopK;tKcI^B_jU@64qViTuDku_ib%!AiH zaxWnV8b2x28IBwx9)~^`#!sTi+ONOh_4{gnN5+B5PYN~BnS;#VvuvD}8 z`!*^LWPVbpGaWey9_N7=^PYtwYyRiMmz{m&-^e&n`AMP9a^%31F!Wh$>IDk@gO=T& zYw77;j#I^d@>GHep09pJ90>%do$(sxrsu@-9Ns1 zfRUDJn=`u z@<8J!rTT~?hbWZ5!Ny@wWbIEpL>Hfg@<8P$rJC%>A%-z9J_}8M7K&_K<#c%OUQbmX z$o!;Kwuvtte~@7wrxj-WB#Nx{@EtJD+*Fn!A7`Gz=&xhP%MBPFA zDURgI964~wVBjE*kFrAVoSP2|Y9^}}RDMz_=Exz9 zLO&HYE{8&YbQ%nw{LU|^IFR{CiBmz!w-0Fuj|`2XuZiQlVpcU_1`| z|BKUFp?elRmj98I2g`xPPew{Oau8BVKVoLwAc}1GqkjuwIdgERpBIJ$jh~E^bmS0) z5Qm8Z^V~#{wLiZDhQrXjcN-2gelk*#BZru$%#XPF=}~0k)_*%6Fh^D|sQhH4Vn+^9 z4E{k#(|>^?8*}5THy?j`6cq>4pjSla&W5KN+d1jvRPC?*RT^+%gJ%=f2mz|NG}xka3{$laV^# zkppz?Mt+!@S1J^`_t2%|?rNf$#|rg=%uhz@qmCRfBw76QC`jXzoYDEoSY6=Afw2Up zKt<-ci9&ZAglQ<(K0;G33#a=*WR5G1zJxFK2}w{s#13%=!rp2TtcF z6E(w;0}DbJFK5?>L7}HFgW|C9ElLg~elk%r9XY_rFsK)S86Sm0?|gL~45)1<!e2nq2A`@U&~{-6DSfN{m&qG}(={A8lK9XaqcjbU7neO_3he{|WW;EyQ& z0Of%yKA9-H9(d{J1(W%DAT~~mLNB-*dU3{(l?O6EnW(yBc}QW{Ad6G;DuF^Ld-8R1 z9;B%kA)TL0)oe!&Nf5#G3p>9Xh35VFxIvovINWd`@sp|Qb>zU4H1Ef;?SmEiUjoAR zq2NH{CsWnu$U(rEL0{tflU8IcTW;R?_JN6n^@7GvrmEq{0hZu|cy((0B#LZw%cB|s zb7UN-{A8-;IC6+#Z5%&jraucsHvW>?c|eMa1DT&p)m%po5Uz&t50RahWJNamwC~5W z_7c5kg>-&0Rdx@L(*8lf+82I;e$t9;c>6E!YGNNID-Trt-Ko07u{?mEg!wq3iC3f0 zKT3Ao@tZ!PI1H2rDnFU3d5#=d2=lYT#P)Amq5I~-DAI+URONxpPp0ZpM-JGO_=7sXDb;8B0mWn7CLg^Ap~Ne|E9QQ z6ng67hRgRnf%_(`S~J92<2tO9(sd0wE% z&TP6b58RV+pz@PUEpg-kOYFgXUS`SzifqE67ao9x7pXXq`AMeC4qlbg@0(E)gO9>` zVMR9P=Nsj;4X=`Mpz@PU{gq>RkQ~MpC$>DG$XeI$CtHt{OS18k@Vr>+$RSKZ=zUoD~^@4x{nV)2;-;qN;Es00=Jqv|y zT|_v3l5T$-#?^914h#we^k0~`JqkVZmEZKe`S>qr+DXHKBtFU23P%p1Nb-5CWtLYB#yoT=9|%Ilfy_^Gwc3$`Nb~V9 zf$`NSvJsmr4?_+aHI8YON!OqKg~HnWND2XVC3K%IWsU0S@aNIfNaM<9;A>sKt7>OBAiz4&RSP1Lezd_SZ0tYHT z8LH1XbC57!&&FXA*hD@TnK$(b2td3;#evLEhH9fD2Pt9Qq`>aqjUpTQ)Jc@}f@(ev z)QitLa)2R0u^-^RFjnYSW)sfeq}v|{IBas{zy+*}lG<@WDD)2hYha|zOX!gIchl{U z101e*EtBem-;qR85wxf3>yx`>Pem7f%9vnz)* z7Cy7{vtXWkA+k|(&!n5LmypfxhW6nmM-D;+(obxh7DYB(=F9Z{CslbM^OHh-!I1-W z?ZUe5k(sB4LLYu7UtaEhG7ePnNuh3bIGGNQm8GC z99RGgn#a;uXcSs)fZ-2!JWEv`$o!;GUv%XFVSAR^eIJ+=Y6FAiAH0WYIMD5n!=$>! zkwXIA-6^i4g+fO@OxK(AA|(fs{%)zh&wADD8((K(0A>FVdl3I`D(iTaiBch z>c~L^5T9iBy&Hx8!%y?&Csz=b2O2*q)mBFi62fq>U$c2$pvc;O@^=2BMaF^3PfB&0 zD~EhYlt?TGE3&o;8}f+{Jz@L1t~C w?#O`)7;Z1@IyorRYY)Ne#l1vv+LUhmq*PyZ= 0)) +}) + +test_that("clean_blq_values() preserves pk_data class", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- clean_blq_values(df, verbose = FALSE) + expect_s3_class(out, "pk_data") +}) + + +# ============================================================================= +# 12. process_conc_data() & process_dose_data() +# ============================================================================= + +test_that("process_conc_data() returns a data frame", { + df <- with_mapping(make_conc_df(), std_mapping()) + out <- process_conc_data(df, decimal_control = TRUE, + blq_handling = TRUE, verbose = FALSE) + expect_s3_class(out, "data.frame") +}) + +test_that("process_conc_data() preserves column_mapping", { + m <- std_mapping() + df <- with_mapping(make_conc_df(), m) + out <- process_conc_data(df, decimal_control = FALSE, + blq_handling = FALSE, verbose = FALSE) + expect_equal(attr(out, "column_mapping"), m) +}) + +test_that("process_dose_data() returns a data frame", { + df <- with_mapping(make_dose_df(), + list(subject = "USUBJID", time = "TIME", + conc = NA_character_, dose = "DOSE")) + out <- process_dose_data(df, decimal_control = TRUE, verbose = FALSE) + expect_s3_class(out, "data.frame") +}) + +test_that("process_dose_data() preserves column_mapping", { + m <- list(subject = "USUBJID", time = "TIME", + conc = NA_character_, dose = "DOSE") + df <- with_mapping(make_dose_df(), m) + out <- process_dose_data(df, decimal_control = FALSE, verbose = FALSE) + expect_equal(attr(out, "column_mapping"), m) +}) + + +# ============================================================================= +# 13. quick_validate() edge cases +# ============================================================================= + +test_that("quick_validate() is invisible", { + result <- list(conc = make_conc_df(), dose = NULL) + expect_invisible(quick_validate(result)) +}) + +test_that("quick_validate() passes when both conc and dose present", { + result <- list(conc = make_conc_df(), dose = make_dose_df()) + expect_true(quick_validate(result)) +}) \ No newline at end of file