From 2a527a78583d57e8875f8aa79f81aea9adac6f6b Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Thu, 19 Mar 2026 12:19:54 -0700 Subject: [PATCH 01/18] Adds 'spk repo index' subcommand for index generation and updates. Adds --use-indexes and --no-indexes flags to repository. Updates resolvo solver to get global variables data from an indexed repository. Signed-off-by: David Gilligan-Cook --- Cargo.lock | 1 + crates/spk-cli/cmd-repo/Cargo.toml | 1 + crates/spk-cli/cmd-repo/src/cmd_repo.rs | 113 ++++++++++++++++-- crates/spk-cli/common/src/flags.rs | 106 +++++++++++++++- crates/spk-cli/common/src/flags_test.rs | 2 + crates/spk-cli/group4/src/cmd_view.rs | 5 + crates/spk-solve/src/solvers/resolvo/mod.rs | 28 ++++- .../src/solvers/resolvo/spk_provider.rs | 26 ++-- .../src/storage/flatbuffer_index.rs | 19 +-- crates/spk-storage/src/storage/indexed.rs | 8 ++ 10 files changed, 273 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eeb34181be..4cca2b3e68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4611,6 +4611,7 @@ dependencies = [ "clap 4.5.47", "miette", "spk-cli-common", + "spk-schema", "spk-storage", "tracing", ] diff --git a/crates/spk-cli/cmd-repo/Cargo.toml b/crates/spk-cli/cmd-repo/Cargo.toml index 74aef594c4..5811c760f4 100644 --- a/crates/spk-cli/cmd-repo/Cargo.toml +++ b/crates/spk-cli/cmd-repo/Cargo.toml @@ -17,5 +17,6 @@ miette = { workspace = true, features = ["fancy"] } async-trait = { workspace = true } clap = { workspace = true } spk-cli-common = { workspace = true } +spk-schema = { workspace = true } spk-storage = { workspace = true } tracing = { workspace = true } diff --git a/crates/spk-cli/cmd-repo/src/cmd_repo.rs b/crates/spk-cli/cmd-repo/src/cmd_repo.rs index 3183051845..c945193482 100644 --- a/crates/spk-cli/cmd-repo/src/cmd_repo.rs +++ b/crates/spk-cli/cmd-repo/src/cmd_repo.rs @@ -2,10 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/spkenv/spk +use std::str::FromStr; +use std::time::Instant; + use clap::{Args, Subcommand}; use miette::{Context, Result}; -use spk_cli_common::{CommandArgs, Run}; -use spk_storage as storage; +use spk_cli_common::{CommandArgs, Run, flags}; +use spk_schema::VersionIdent; +use spk_storage::{self as storage, FlatBufferRepoIndex, RepositoryIndexMut}; use storage::Repository; /// Perform repository-level actions and maintenance @@ -45,19 +49,104 @@ pub enum RepoCommand { #[clap(name = "REPO")] repo: String, }, + /// Generate an index for a repository + Index { + #[clap(flatten)] + repos: flags::Repositories, + + /// Package/version of a published package to update in an + /// existing index. + /// + /// Other packages in the index will remain unchanged. Without + /// this the full index will be constructed from scratch. If + /// the repo does not have an index, a full index will be + /// constructed from scratch if possible. + /// + /// This option is only supported for flatbuffer indexes. + #[clap(long, name = "PACKAGE/VERSION")] + update: Option, + }, } impl RepoCommand { pub async fn run(&mut self) -> Result { - let repo = match &self { - Self::Upgrade { repo } => repo, - }; - let repo = match repo.as_str() { - "local" => storage::local_repository().await?, - _ => storage::remote_repository(repo).await?, - }; - let status = repo.upgrade().await.wrap_err("Upgrade failed")?; - tracing::info!("{}", status); - Ok(1) + match &self { + // spk repo upgrade ... + Self::Upgrade { repo: repo_name } => { + let repo = match repo_name.as_str() { + "local" => storage::local_repository().await?, + _ => storage::remote_repository(repo_name).await?, + }; + + let status = repo.upgrade().await.wrap_err("Upgrade failed")?; + tracing::info!("{}", status); + Ok(1) + } + + // spk repo index ... + Self::Index { repos, update } => { + // Generate or update an index a repo. The repo must + // be the underlying repo and not an indexed repo. So + // this disables index use for this command regardless + // of config or command line flags. + flags::disable_index_use(); + let repos = repos.get_repos_for_non_destructive_operation().await?; + if repos.len() != 1 { + tracing::error!( + "{} repos specified, Can only index one repo at a time. Please specify a single repo.", + repos.len() + ); + return Ok(2); + } + + if let Some(package_version) = update { + // Update the existing index for the given package/version + let start = Instant::now(); + let version_ident = VersionIdent::from_str(package_version)?; + let mut was_full_index = String::from(""); + + // Load the current index for this repo now + let repo = &repos[0].1; + match FlatBufferRepoIndex::from_repo_file(repo).await { + Ok(current_index) => { + let repo = &repos[0].1; + current_index + .update_repo_with_package_version(repo, &version_ident) + .await? + } + Err(err) => { + // There isn't an existing index, so generate one from scratch that + // will also include the update package version. + // TODO: could also just error out and say run a full index first, + // or ask the user "are you sure?" before continuing. + tracing::warn!("Failed to load flatbuffer index: {err}"); + tracing::warn!("No current index to update. Creating a full index ..."); + FlatBufferRepoIndex::index_repo(&repos).await?; + was_full_index = + " [no previous index, so a full index was created]".to_string() + } + }; + + tracing::info!( + "Index update for '{package_version}' in '{}' repo completed in: {} secs{was_full_index}", + repo.name(), + start.elapsed().as_secs_f64() + ); + } else { + // Generate a full index from scratch + let start = Instant::now(); + FlatBufferRepoIndex::index_repo(&repos).await?; + + let repo = &repos[0].1; + tracing::info!( + "Index generation for '{}' repo completed in: {} secs", + repo.name(), + start.elapsed().as_secs_f64() + ); + } + + Ok(0) + } + } } } diff --git a/crates/spk-cli/common/src/flags.rs b/crates/spk-cli/common/src/flags.rs index e0d9255d75..dec7d86eef 100644 --- a/crates/spk-cli/common/src/flags.rs +++ b/crates/spk-cli/common/src/flags.rs @@ -7,9 +7,11 @@ mod variant; use std::collections::HashSet; use std::convert::From; use std::sync::Arc; +use std::sync::atomic::AtomicBool; use clap::{Args, ValueEnum, ValueHint}; use miette::{Context, IntoDiagnostic, Result, bail, miette}; +use once_cell::sync::Lazy; use solve::{ DEFAULT_SOLVER_RUN_FILE_PREFIX, DecisionFormatter, @@ -45,6 +47,7 @@ use spk_solve as solve; #[cfg(feature = "statsd")] use spk_solve::{SPK_RUN_TIME_METRIC, get_metrics_client}; use spk_storage as storage; +use spk_storage::IndexedRepository; use spk_workspace::{FindOrLoadPackageTemplateError, FindPackageTemplateError}; pub use variant::{Variant, VariantBuildStatus, VariantLocation}; @@ -61,6 +64,12 @@ static SPK_SOLVER_OUTPUT_TO_DIR: &str = "SPK_SOLVER_OUTPUT_TO_DIR"; static SPK_SOLVER_OUTPUT_TO_DIR_MIN_VERBOSITY: &str = "SPK_SOLVER_OUTPUT_TO_DIR_MIN_VERBOSITY"; static SPK_SOLVER_OUTPUT_FILE_PREFIX: &str = "SPK_SOLVER_OUTPUT_FILE_PREFIX"; +static DISABLE_INDEX_USE: Lazy = Lazy::new(|| AtomicBool::new(false)); + +pub fn disable_index_use() { + DISABLE_INDEX_USE.store(true, std::sync::atomic::Ordering::Release); +} + #[derive(Args, Clone)] pub struct Runtime { /// Reconfigure the current spfs runtime (useful for speed and debugging) @@ -1052,6 +1061,20 @@ pub struct Repositories { /// per-job or per-show repos. #[clap(long)] pub wrap_origin: Option, + + /// Get the package data from the repo indexes instead of the + /// repos. This only applies to non-destructive repo operations. + /// This can be configured as the default in spk's config file, or + /// on a per-repo basis in spk's config file. + #[clap(long)] + pub use_indexes: bool, + + /// Do not get the package data from the repo index, always use + /// the repo instead. This only applies to non-destructive repo + /// operations. This option can be configured as the default in + /// spk's config file. + #[clap(long, conflicts_with = "use_indexes")] + pub no_indexes: bool, } impl Repositories { @@ -1152,8 +1175,39 @@ impl Repositories { repos.push(("local".into(), repo.into())); } if self.local_repo_only { + // Local repo only case does not use indexes because they + // are typically small and not indexed. If local repos + // became large and were indexed, this might change. return Ok(repos); } + + // Check whether using the indexes for the repos is enabled or not + let disable_all_index_use = DISABLE_INDEX_USE.load(std::sync::atomic::Ordering::Relaxed); + + // Get the overrides from the command line flags + let use_index_cli_override = if self.use_indexes || self.no_indexes { + if self.no_indexes { + // Don't use any indexes + Some(false) + } else { + // Use indexes, if the command line flag is given to use them + Some(self.use_indexes) + } + } else { + // No command line index flags given, so there's no index override + None + }; + + // Get the overall config setting, which includes env var + // setting for it, but not the per repo settings. Those are + // checked below as repos are created. + let config = spk_config::get_config()?; + let all_use_index_from_config = config.solver.use_indexes; + tracing::debug!( + "Disable all indexes: {disable_all_index_use}, Cli use all indexes: {use_index_cli_override:?}, Config use all indexes: {all_use_index_from_config}" + ); + + // Add the enabled repos for (name, ts, is_default_origin) in enabled .into_iter() .map(|(name, ts)| (name, ts, false)) @@ -1190,7 +1244,57 @@ impl Repositories { if let Some(ts) = ts.as_ref().or(self.when.as_ref()) { repo.pin_at_time(ts); } - repos.push((name.into(), repo.into())); + + // Decide whether to use an index for this repository based on + // the various settings: all repos, per repo, and overrides. + let use_index = if disable_all_index_use { + // Disable all take precedence over everything + tracing::debug!("All index use disabled"); + false + } else if let Some(use_indexes) = use_index_cli_override { + // Otherwise the all indexes command line flag takes + // precedence over what was in the config file + tracing::debug!("A cli-based use index override was given as: {use_indexes}"); + use_indexes + } else { + // Otherwise use this specific repository's config setting, if any + match config.repositories.get(name) { + Some(r_config) => { + tracing::debug!("Using index for '{name}' repo"); + r_config.use_index + } + // And last use the all indexes env var and config + // file setting. + None => { + tracing::debug!("Using config settings for all indexes"); + all_use_index_from_config + } + } + }; + tracing::debug!("Using index for '{name}': {use_index}"); + + if use_index { + // Use the index for this repo, if there is one, + // otherwise use the repo itself. + tracing::debug!("Using a repo index for '{name}'"); + let indexed_repo = match IndexedRepository::load_from_repo(Arc::new( + repo.clone().into(), + )) + .await + { + Ok(ir) => ir.into(), + Err(_err) => { + tracing::warn!( + "Failed to load index for '{name}' repo, falling back to the repo itself" + ); + repo.into() + } + }; + repos.push((name.to_string(), indexed_repo)); + } else { + tracing::debug!("Not using a repo index for '{name}' repo"); + repos.push((name.to_string(), repo.into())); + } } Ok(repos) } diff --git a/crates/spk-cli/common/src/flags_test.rs b/crates/spk-cli/common/src/flags_test.rs index 5f91d6c249..5c05205ca8 100644 --- a/crates/spk-cli/common/src/flags_test.rs +++ b/crates/spk-cli/common/src/flags_test.rs @@ -67,6 +67,8 @@ async fn test_get_solver_with_host_options( disable_repo: Default::default(), when: None, wrap_origin: None, + use_indexes: false, + no_indexes: false, }, decision_formatter_settings: DecisionFormatterSettings { time: Default::default(), diff --git a/crates/spk-cli/group4/src/cmd_view.rs b/crates/spk-cli/group4/src/cmd_view.rs index b51369d8fe..d07a67d150 100644 --- a/crates/spk-cli/group4/src/cmd_view.rs +++ b/crates/spk-cli/group4/src/cmd_view.rs @@ -781,6 +781,11 @@ impl View { /// Display the contents of a package spec fn print_build_spec(&self, package_spec: Arc) -> Result { + // TODO: does not handle packages from indexes. This will + // crash if --use-indexes was used. Those packages would need + // to be converted to something that could be serialized, or + // pieces extracted individually from the index packages, for + // this to work. match &self.format.clone().unwrap_or_default() { OutputFormat::Yaml => serde_yaml::to_writer(std::io::stdout(), &*package_spec) .into_diagnostic() diff --git a/crates/spk-solve/src/solvers/resolvo/mod.rs b/crates/spk-solve/src/solvers/resolvo/mod.rs index edea693714..07d3a3ad20 100644 --- a/crates/spk-solve/src/solvers/resolvo/mod.rs +++ b/crates/spk-solve/src/solvers/resolvo/mod.rs @@ -21,7 +21,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::sync::Arc; use std::time::Instant; -use pkg_request_version_set::{SpkSolvable, SyntheticComponent}; +use pkg_request_version_set::{SpkSolvable, SyntheticComponent, VarValue}; use spk_provider::SpkProvider; use spk_schema::ident::{ InclusionPolicy, @@ -36,7 +36,7 @@ use spk_schema::ident::{ VarRequest, }; use spk_schema::ident_component::Component; -use spk_schema::name::PkgNameBuf; +use spk_schema::name::{OptNameBuf, PkgNameBuf}; use spk_schema::prelude::{HasVersion, Named, Versioned}; use spk_schema::version_range::VersionFilter; use spk_schema::{OptionMap, Package, Spec}; @@ -146,7 +146,28 @@ impl Solver { self.build_from_source_trail = trail; } - pub async fn solve(&self) -> Result { + pub async fn solve(&mut self) -> Result { + let mut known_global_vars: HashMap> = Default::default(); + + // Gather the global vars from any indexed repos and use them + // to prime the known global vars cache. + for repo in self.repos.iter() { + if let RepositoryHandle::Indexed(indexed_repo) = &**repo { + let start_gv = Instant::now(); + for (name, values) in indexed_repo.get_global_var_values().into_iter() { + let entry = known_global_vars.entry(name).or_default(); + for v in values { + entry.insert(VarValue::Owned(v)); + } + } + tracing::debug!( + "Resolvo: gathered global var from '{}' in: {} secs", + repo.name(), + start_gv.elapsed().as_secs_f64() + ); + } + } + let repos = self.repos.clone(); let requests = self.requests.clone(); let binary_only = self.binary_only; @@ -155,6 +176,7 @@ impl Solver { let solvables = tokio::task::spawn_blocking(move || { let mut provider = Some(SpkProvider::new( repos.clone(), + known_global_vars.clone(), binary_only, build_from_source_trail, )); diff --git a/crates/spk-solve/src/solvers/resolvo/spk_provider.rs b/crates/spk-solve/src/solvers/resolvo/spk_provider.rs index 3bd0eec16e..550dd7325a 100644 --- a/crates/spk-solve/src/solvers/resolvo/spk_provider.rs +++ b/crates/spk-solve/src/solvers/resolvo/spk_provider.rs @@ -316,10 +316,10 @@ impl ResolvoPackageName { // Filter builds that don't conform to global options // XXX: This find runtime will add up. let repo = provider - .repos - .iter() - .find(|repo| repo.name() == ident.repository_name()) - .expect("Expected solved package's repository to be in the list of repositories"); + .repos + .iter() + .find(|repo| repo.name() == ident.repository_name()) + .expect("Expected solved package's repository to be in the list of repositories"); if requires_build_from_source { match provider.can_build_from_source(&ident).await { @@ -609,16 +609,19 @@ impl SpkProvider { pub fn new( repos: Vec>, + known_global_vars: HashMap>, binary_only: bool, build_from_source_trail: HashSet, ) -> Self { + let known_global_var_values = RefCell::new(known_global_vars); + Self { pool: Pool::new(), repos, global_pkg_requests: Default::default(), global_var_requests: Default::default(), interned_solvables: Default::default(), - known_global_var_values: Default::default(), + known_global_var_values, queried_global_var_values: Default::default(), cancel_solving: Default::default(), binary_only, @@ -1093,12 +1096,13 @@ impl DependencyProvider for SpkProvider { ); // XXX: This find runtime will add up. let repo = self - .repos - .iter() - .find(|repo| repo.name() == located_build_ident_with_component.ident.repository_name()) - .expect( - "Expected solved package's repository to be in the list of repositories", - ); + .repos + .iter() + .find(|repo| repo.name() == located_build_ident_with_component.ident.repository_name()) + .expect( + "Expected solved package's repository to be in the list of repositories", + ); + if let Ok(package) = repo .read_package(located_build_ident_with_component.ident.target()) .await diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index 15ddce4cd9..b8536cb533 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -144,7 +144,7 @@ impl FlatBufferRepoIndex { let start_check_fb = Instant::now(); index.check_fb_index()?; tracing::debug!( - "'{name}' repo index checked as flatb RepositoryIndex: {} secs", + "'{name}' repo index verified before use : {} secs", start_check_fb.elapsed().as_secs_f64() ); } else { @@ -162,7 +162,7 @@ impl FlatBufferRepoIndex { } tracing::debug!( - "'{name}' repo index flatbuffer total time : {} secs", + "'{name}' repo index flatbuffer from bytes in: {} secs", start.elapsed().as_secs_f64() ); @@ -324,20 +324,24 @@ impl FlatBufferRepoIndex { let mut num_versions = 0; let mut num_builds = 0; + let mut num_erroring_builds = 0; let mut traversal = repo_walker.walk(); while let Some(item) = traversal.try_next().await? { match item { RepoWalkerItem::Version(version) => { + num_versions += 1; + let name = version.ident.name(); let v = version.ident.version().clone(); let pkg_info = packages.entry(name.into()).or_default(); pkg_info.versions.push(v.clone()); let _ver_info = pkg_info.version_builds.entry(v).or_default(); - num_versions += 1; } RepoWalkerItem::Build(build) => { + num_builds += 1; + // Add a build spec and related things let build_ident = build.spec.ident(); @@ -351,6 +355,7 @@ impl FlatBufferRepoIndex { let component_map = match repo.read_components(build.spec.ident()).await { Ok(c) => c, Err(err) => { + num_erroring_builds += 1; tracing::warn!( "Problem reading published components for '{}': {err}. Skipping it.", build.spec.ident() @@ -367,8 +372,6 @@ impl FlatBufferRepoIndex { ver_info.build_specs.push(build_info); global_vars.extract_global_vars(&build.spec, &package_names)?; - - num_builds += 1; } // Ignore everything else @@ -379,14 +382,12 @@ impl FlatBufferRepoIndex { // Debugging and logging let mut vars: Vec = global_vars.keys().map(|k| k.to_string()).collect(); vars.sort(); - tracing::info!("Globals found:\n\t{}", vars.into_iter().join("\n\t")); + tracing::debug!("Globals found:\n\t{}", vars.into_iter().join("\n\t")); tracing::info!( - "Index for '{}' repo consists of {} packages, {} versions, {} builds, with {} global vars", + "Index for '{}' repo consists of {} packages, {num_versions} versions, {num_builds} builds ({num_erroring_builds} errors), with {} global vars", repo.name(), packages.len(), - num_versions, - num_builds, global_vars.keys().len() ); diff --git a/crates/spk-storage/src/storage/indexed.rs b/crates/spk-storage/src/storage/indexed.rs index 796d867318..46c49ed29b 100644 --- a/crates/spk-storage/src/storage/indexed.rs +++ b/crates/spk-storage/src/storage/indexed.rs @@ -4,6 +4,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; +use std::time::Instant; use arc_swap::ArcSwap; use spk_schema::foundation::ident_build::Build; @@ -71,6 +72,7 @@ impl IndexedRepository { pub async fn load_from_repo( repo_to_wrap: Arc, ) -> Result { + let start = Instant::now(); let index_kind = IndexedRepository::get_index_kind_from_config()?; let index = match index_kind.as_ref() { @@ -92,6 +94,12 @@ impl IndexedRepository { } }; + tracing::debug!( + "'{}' repo index flatbuffer total load in: {} secs", + repo_to_wrap.name(), + start.elapsed().as_secs_f64() + ); + Ok(IndexedRepository { index: ArcSwap::new(Arc::new(index)), wrapped_repo: repo_to_wrap, From d8ecb17da060bd0e94f141357a4a76928ce13d4f Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Fri, 27 Mar 2026 15:45:50 -0700 Subject: [PATCH 02/18] Adds the indexing settings to the config file docs Signed-off-by: David Gilligan-Cook --- docs/admin/config.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/admin/config.md b/docs/admin/config.md index 4da68fd689..63d11f5a2e 100644 --- a/docs/admin/config.md +++ b/docs/admin/config.md @@ -300,4 +300,15 @@ host_filtering = false # Set a default compat rule for this distro. For example, on Rocky Linux # packages built on 9.3 would be usable on 9.4. compat_rule = "x.ab" + +# SPK supports using pre-generated repository indexes to speed up solves. +use_indexes = false + +# The kind of repository index format. SPK supports a flatbuffer based index. +# index_kind = "flatb" + +# SPK supports validating flatbuffers index data before using it. +# This can be disabled, but validating is safer even though it adds +# some overhead at the start of solves using an index. +verify_flatbuffers_index_before_use = true ``` From bca8defb82602b8ff06e02add1ac1b48c8f4b251 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Fri, 27 Mar 2026 16:04:45 -0700 Subject: [PATCH 03/18] Adds deprecated builds count to index generation output Signed-off-by: David Gilligan-Cook --- crates/spk-storage/src/storage/flatbuffer_index.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index b8536cb533..a8313fbcfa 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -325,6 +325,7 @@ impl FlatBufferRepoIndex { let mut num_versions = 0; let mut num_builds = 0; let mut num_erroring_builds = 0; + let mut num_deprecated_builds = 0; let mut traversal = repo_walker.walk(); while let Some(item) = traversal.try_next().await? { @@ -341,6 +342,9 @@ impl FlatBufferRepoIndex { } RepoWalkerItem::Build(build) => { num_builds += 1; + if build.spec.is_deprecated() { + num_deprecated_builds += 1; + } // Add a build spec and related things let build_ident = build.spec.ident(); @@ -385,7 +389,7 @@ impl FlatBufferRepoIndex { tracing::debug!("Globals found:\n\t{}", vars.into_iter().join("\n\t")); tracing::info!( - "Index for '{}' repo consists of {} packages, {num_versions} versions, {num_builds} builds ({num_erroring_builds} errors), with {} global vars", + "Index for '{}' repo consists of {} packages, {num_versions} versions, {num_builds} builds ({num_deprecated_builds} deprecated, {num_erroring_builds} errors), with {} global vars", repo.name(), packages.len(), global_vars.keys().len() From 5fb7e104dc17465946e27fd2fe871714848dd0a7 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Mon, 30 Mar 2026 09:38:30 -0700 Subject: [PATCH 04/18] Disables index use for spk info command Signed-off-by: David Gilligan-Cook --- crates/spk-cli/cmd-repo/src/cmd_repo.rs | 2 -- crates/spk-cli/group4/src/cmd_view.rs | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/spk-cli/cmd-repo/src/cmd_repo.rs b/crates/spk-cli/cmd-repo/src/cmd_repo.rs index c945193482..f8da2683f9 100644 --- a/crates/spk-cli/cmd-repo/src/cmd_repo.rs +++ b/crates/spk-cli/cmd-repo/src/cmd_repo.rs @@ -117,8 +117,6 @@ impl RepoCommand { Err(err) => { // There isn't an existing index, so generate one from scratch that // will also include the update package version. - // TODO: could also just error out and say run a full index first, - // or ask the user "are you sure?" before continuing. tracing::warn!("Failed to load flatbuffer index: {err}"); tracing::warn!("No current index to update. Creating a full index ..."); FlatBufferRepoIndex::index_repo(&repos).await?; diff --git a/crates/spk-cli/group4/src/cmd_view.rs b/crates/spk-cli/group4/src/cmd_view.rs index d07a67d150..b68cf3d04f 100644 --- a/crates/spk-cli/group4/src/cmd_view.rs +++ b/crates/spk-cli/group4/src/cmd_view.rs @@ -143,6 +143,12 @@ impl Run for View { type Output = i32; async fn run(&mut self) -> Result { + // This command does not work correctly with IndexedRepo + // objects. The repos must be the underlying repo and not an + // indexed repo, so disable index use regardless of config or + // command line flags. + flags::disable_index_use(); + if self.variants || self.variants_with_tests { let options = self.options.get_options()?; let mut workspace = self From 575e1554eae16c2f39e8988b190bba83648f1a63 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Thu, 2 Apr 2026 10:26:10 -0700 Subject: [PATCH 05/18] Updates config docs Signed-off-by: David Gilligan-Cook --- docs/admin/config.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/admin/config.md b/docs/admin/config.md index 63d11f5a2e..beadd15376 100644 --- a/docs/admin/config.md +++ b/docs/admin/config.md @@ -307,8 +307,8 @@ use_indexes = false # The kind of repository index format. SPK supports a flatbuffer based index. # index_kind = "flatb" -# SPK supports validating flatbuffers index data before using it. -# This can be disabled, but validating is safer even though it adds -# some overhead at the start of solves using an index. -verify_flatbuffers_index_before_use = true +# SPK supports validating index data before using it. +# This can be disabled, but validating is safer even though it can add +# some overhead at the start of a solve, when using an index. +verify_index_before_use = true ``` From d341505a1ab232f4f0c619181163c98430f66fe1 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Thu, 2 Apr 2026 12:31:12 -0700 Subject: [PATCH 06/18] Adds index docs to reference seciton Signed-off-by: David Gilligan-Cook --- docs/ref/indexes.md | 254 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 docs/ref/indexes.md diff --git a/docs/ref/indexes.md b/docs/ref/indexes.md new file mode 100644 index 0000000000..79d5b4cdde --- /dev/null +++ b/docs/ref/indexes.md @@ -0,0 +1,254 @@ +--- +title: Indexes +summary: Indexes for improving solve times +weight: 120 +--- + +This explains indexes, indexing, index controls, and index requirements in `spk`. + +## Spk Repository Indexes + +`SPK` supports generating a packages index for a repository to help +with solves. An index must have been generated separately for a +repository before it can be used. Using an index speeds up solves +against that use that repository. + +The index is designed to help the solvers with solves (package +look-ups, deprecation checks, pre-loading non-package variable +references, getting install requirements, etc.). It does not contain +the full package data. So it does not have the information needed to +help other `spk` operations, e.g. building or testing a package. + +If indexing is enabled, you have to generate an index before to trying +to use it in a solve. They are not generated on the fly (outside of +automated tests). + +If index use is enabled, but no index has been generated for a +repository, `spk` will fallback to using the underlying repository's +packages directly. This typically results in slower solves, +particularly with larger repositories. + +If a solve uses multiple repositories and indexes exist for some or +all of them, `spk` will use the indexes that exist. + + +### Enabling/Disabling Indexes + +`spk` index use can be enabled or disabled in the `spk` config file. +It is disabled by default because index generation and updating needs +to be set up on per repository basis. Setting this up is recommended +for repositories with a large number of packages. + +Most `spk` commands can use the `--use-indexes` and `--no-indexes` +flags to override repository index use.`spk repo index ...` command +always has index use disabled because it operates on the indexes +themselves. `spk info ...` also disables indexes because to display +full package information requires reading the package from the +repository directly, not from an index. + +If index use is enabled in the config file, it can be disabled with +the `--no-indexes` command line flag. + +If index use is disabled in the config file, it can be enabled with the +`--use-indexes` flag. + +### Generating an Index + +To generate an index for a repository (e.g. origin), run: +`spk repo index --disable-repo local` + +This will generate a flatbuffer schema based index file. The index +file is stored in the underlying spfs repo (e.g. origin repo) in a +`index/spk/` sub-directory. + +Index generation only works on one repository at a time (hence the +`--disable-repo local` when working on the default `origin` repo). If +you have multiple repositories to index you have to run `spk repo index +...` once for each repository. + +See `spk repo index -h` for more details. + +You don't have to generate a full index every time a package changes, +you can also update a package in an existing index, see the next +section. + + +### Updating an existing Index + +Updating a package in an existing index, such as after a new build is +published or a package is deprecated, is not automatic. + +A site using `spk` has to set up a system to trigger index updates +when they want them to happen. The recommendation is after a new +package is published, deprecated, undeprecated, or deleted. But +regular periodic complete index generation may also work for a site, +depending on the frequency of package changes and the periodic updates. + +To update an existing index, e.g. after a new `python` package was +published, run: +`spk repo index --disable-repo local --update python` + +This will read in the existing index for the repository and update the +versions and builds of the named package in the index. It is faster +than generating an index from scratch. It has to be run once per +repository and once per package. + +See `spk repo index -h` for more details. + + +## Index vs Repository mismatches - updates are important + +When index use is enabled, it is important to update the indexes when +changes happen to the repository and its packages. Otherwise, the +index and real package data will get out of sync. An index that is old +won't have the same data as the repositories it is based one. This can +lead to discrepancies in solves. + + +## Index Details + +### Index file location + +An index is stored in a file inside the repository it indexes. Only +spk filesystem repositories (based on spfs fs repositories) support +indexes. The file will be kept in the `index/spk/` sub-directory of an +spfs fs repository. + + +### Structure and types in SPK + +An index is implemented by a set of types in spk-storage and +spk-schema that work together to wrap a repository in its index and +produce indexed packages that act as packages for the solver. + +The hierarchy of index related types: + +``` +in the 'spk-storage' crate: + +RepositoryHandle enum + Indexed struct (implements Repository and Storage traits) + RepoIndex enum (implements RepoIndex and RepoIndexMut traits) + FlatBufferRepoIndex struct (implements RepoIndex and RepoIndexMut traits) + flatbuffers schema + +in the 'spk-schema' crate: + IndexedPackage (implements Package traits, produced by a FlatBufferRepoIndex) + flatbuffers schema + +``` + + + +### Flatbuffer index schema + +`spk` uses flatbuffers for the index data format on disk. This is fast +to read and use, but slow to generate. Updates to an existing index +require an index to be read in completely and written out again with +the updated data. It does not support updating complex structures +in-place. + +The `spk-proto` crate contains spk's flatbuffers schema for an +index. It requires `flatc` to be installed to generate the rust code +for the index schema. + +The schema is versioned internally, and in the index file name contains +the schema version number as well. + +The index structure does not match the rust object structures +1-to-1. The is partially due to only keeping things needs for solves, +and partially due to what flatbuffers require, support, and recommend +(lists not sets or mappings, removing duplication and intermediate +structures). + +The broad top-level schema structure is (capital letters indicate a schema table name): +``` +RepositoryIndex + schema version number + list of PackageIndex + name + list of VersionIndex + version number + list of BuildIndex + build digest/id + ... other fields needed for solving + list of GlobalVar (used to prime the 'resolvo' solver to avoid restarts) + name + list of valid values +``` + +The structure is quite deep to cover the data for options, +requirements, compat rules, version numbers, build ids, and +components. See the schema itself for more detailed information, +including which fields of rust objects are being ignored, omitted from +an index. + + +### How to evolve the index schema + +The flatbuffers data format supports adding new fields and structures +without breaking existing code, provided fields are not deleted. New +fields should be added only to the ends of tables. + +The index schema's version number lets `spk` check that the index data +is compatible with the current spk being run. That is, that the code +understands this version of the schema. There will be situations where +multiple versions of spk are being used in a site and they may not all +be able to use index data generated by other versions of spk. + +Spk includes the schema version number in the index file name to allow +multiple index schema to co-exist during a transition between index +versions. + +#### Adding a new field + +When a new field needs to be added to the index schema, the developer should: +- Increment the schema version number +- Add the new field to the schema +- Increment spk's compatible schema version number +- Add spk code to read and write the new field +- Make a new release of spk +- Generate a new index using the new spk build +- Transition the site to the new spk release +- Once the transition is complete, retire the older index data file and generation + +During the transition the older version of spk will use the older +index format(s) and the newer spk will use the newer one. Both indexes +will have to be generated and kept up to date during the transition. +Once the older spk version is fully retired, the older index data can +be retired too. + + +#### Stop using an existing field + +When an older field should not be used anymore, the developer should: +- Add spk code to stop reading the old field +- Make a new release of spk that no longer reads the field +- Transition the site to the new spk release + +There's no need to change the index schema version for to stop reading +a field, and index generation remains unchanged (even though it +includes data that isn't read by spk anymore), + + +#### Removing an old field + +When a new field needs to be removed from the index schema, the developer should: +- Double check the reason for removing the field, it may not be worth the trouble. This is a 2 stage process: +- Stage 1: + - Add spk code to stop reading the old field + - Make a new release of spk that no longer reads the field + - Transition the site to the new spk release +- Stage 2: + - Increment the schema version number + - Increment spk's compatible schema version number + - Add spk code to stop writing the old field + - Make another new release of spk that no longer writes the field + - Transition the site to the new spk release + - Once that transition is complete, retire the older index data file and generation + +During the transition the older version of spk will use the older +index format(s) and the newer spk will use the newer one. Both indexes +will have to be generated and kept up to date during the transition. +Once the older spk version is fully retired, the older index data can +be retired too. From ae677a876ebe232c6c0fb82f818f7aae9e33af35 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Tue, 7 Apr 2026 11:32:13 -0700 Subject: [PATCH 07/18] Moves index location path creation and access into spfs repository handles. Signed-off-by: David Gilligan-Cook --- crates/spfs/src/error.rs | 2 + crates/spfs/src/storage/handle.rs | 46 +++++++++++ crates/spfs/src/storage/index_path.rs | 15 ++++ crates/spfs/src/storage/mod.rs | 2 + .../src/storage/flatbuffer_index.rs | 79 ++----------------- crates/spk-storage/src/storage/handle.rs | 35 ++++++++ crates/spk-storage/src/storage/indexed.rs | 7 ++ crates/spk-storage/src/storage/spfs.rs | 13 ++- 8 files changed, 125 insertions(+), 74 deletions(-) create mode 100644 crates/spfs/src/storage/index_path.rs diff --git a/crates/spfs/src/error.rs b/crates/spfs/src/error.rs index 8ac760bd44..44b82a63c4 100644 --- a/crates/spfs/src/error.rs +++ b/crates/spfs/src/error.rs @@ -104,6 +104,8 @@ pub enum Error { #[source] source: storage::OpenRepositoryError, }, + #[error("Repository does not support index storage location: {0:?}")] + NoIndexStorageLocation(url::Url), #[error("No remote named '{0}' configured")] #[diagnostic( diff --git a/crates/spfs/src/storage/handle.rs b/crates/spfs/src/storage/handle.rs index 846b763078..c69d1b6d8b 100644 --- a/crates/spfs/src/storage/handle.rs +++ b/crates/spfs/src/storage/handle.rs @@ -3,6 +3,7 @@ // https://github.com/spkenv/spk use std::borrow::Cow; +use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; @@ -15,9 +16,13 @@ use super::prelude::*; use super::tag::TagSpecAndTagStream; use super::{TagNamespace, TagNamespaceBuf, TagStorageMut}; use crate::graph::ObjectProto; +use crate::storage::IndexPath; use crate::tracking::{self, BlobRead}; use crate::{Error, Result, graph}; +// Index sub-directory inside a repository +const INDEX_SUB_DIR: &str = "index"; + #[derive(Debug)] #[allow(clippy::large_enum_variant)] pub enum RepositoryHandle { @@ -150,6 +155,47 @@ impl Address for RepositoryHandle { } } +#[async_trait::async_trait] +impl IndexPath for RepositoryHandle { + async fn index_path(&self) -> Result { + // Only FS repositories have a location for indexes at this time. + match self { + RepositoryHandle::FS(repo) => { + // Makes the spfs fs repository specific index + // sub-directory, if it does not exist, and returns + // the path to it. + let mut index_path = PathBuf::new(); + index_path.push(repo.root()); + index_path.push(INDEX_SUB_DIR); + + crate::runtime::makedirs_with_perms(&index_path, 0o777).map_err(|source| { + Error::String(format!( + "Unable to make '{INDEX_SUB_DIR}' sub-directory in spfs filesystem repo: {source}" + )) + })?; + + Ok(index_path) + } + + RepositoryHandle::Tar(repo) => { + Err(Error::NoIndexStorageLocation(repo.address().into_owned())) + } + RepositoryHandle::Rpc(repo) => { + Err(Error::NoIndexStorageLocation(repo.address().into_owned())) + } + RepositoryHandle::FallbackProxy(repo) => { + Err(Error::NoIndexStorageLocation(repo.address().into_owned())) + } + RepositoryHandle::Proxy(repo) => { + Err(Error::NoIndexStorageLocation(repo.address().into_owned())) + } + RepositoryHandle::Pinned(repo) => { + Err(Error::NoIndexStorageLocation(repo.address().into_owned())) + } + } + } +} + #[async_trait::async_trait] impl TagStorage for RepositoryHandle { #[inline] diff --git a/crates/spfs/src/storage/index_path.rs b/crates/spfs/src/storage/index_path.rs new file mode 100644 index 0000000000..7cee8e4a3e --- /dev/null +++ b/crates/spfs/src/storage/index_path.rs @@ -0,0 +1,15 @@ +// Copyright (c) Contributors to the SPK project. +// SPDX-License-Identifier: Apache-2.0 +// https://github.com/spkenv/spk + +use std::path::PathBuf; + +use crate::Result; + +/// The index location path of a repository. +#[async_trait::async_trait] +pub trait IndexPath { + /// Get the index location path of this repository, will create it + /// if it does not exist. + async fn index_path(&self) -> Result; +} diff --git a/crates/spfs/src/storage/mod.rs b/crates/spfs/src/storage/mod.rs index b719f2c2b5..696108cb00 100644 --- a/crates/spfs/src/storage/mod.rs +++ b/crates/spfs/src/storage/mod.rs @@ -5,6 +5,7 @@ mod address; mod blob; mod error; +mod index_path; mod layer; mod manifest; pub mod payload; @@ -27,6 +28,7 @@ pub use address::Address; pub use blob::{BlobStorage, BlobStorageExt}; pub use error::OpenRepositoryError; pub use handle::RepositoryHandle; +pub use index_path::IndexPath; pub use layer::{LayerStorage, LayerStorageExt}; pub use manifest::ManifestStorage; pub use payload::PayloadStorage; diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index a8313fbcfa..3793558d3d 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -13,7 +13,6 @@ use std::time::Instant; use futures::TryStreamExt; use itertools::Itertools; -use spfs::prelude::FromUrl; use spk_schema::foundation::ident_component::Component; use spk_schema::foundation::name::{PkgName, PkgNameBuf}; use spk_schema::foundation::version::Version; @@ -45,7 +44,6 @@ use spk_schema::{ version_to_fb_version, }; -use super::repository::Repository; use crate::storage::{RepositoryIndex, RepositoryIndexMut}; use crate::{Error, RepoWalkerBuilder, RepoWalkerItem, Result}; @@ -59,12 +57,10 @@ const COMPATIBLE_INDEX_SCHEMA_VERSION: u32 = 1; // Index name and kind constants pub const FLATBUFFER_INDEX: &str = "flatb"; +const SPK_INDEX_SUB_DIR_NAME: &str = "spk"; const INDEX_FILE_PREFIX: &str = "index"; const INDEX_FILE_EXT: &str = "fb"; -const INDEX_SUB_DIR: &str = "index"; -const SPK_INDEX_SUB_DIR_NAME: &str = "spk"; - // Flatbuffer builder constants const DEFAULT_CAPACITY: usize = 1024; @@ -576,78 +572,17 @@ impl FlatBufferRepoIndex { Ok(builder) } - /// This will create the index path inside the repo, for spk - /// indexes, if it does not exist. - async fn get_index_path_from_repo_address( - repo_name: &str, - address_url: &url::Url, - ) -> Result { - // Only handles urls that can parse as fs repo configs. Other - // repository types do not support storing index files. - let spfs_repo_config = match spfs::storage::fs::Config::from_url(address_url).await { - Ok(c) => c, - Err(err) => { - return Err(Error::IndexNoRepoPathError( - repo_name.to_string(), - err.to_string(), - )); - } - }; + async fn repo_index_location(repo: &crate::RepositoryHandle) -> Result { + let base_path = repo.index_location_path().await?; - // TODO: consider making the base index path configurable, - // with the default being the repo base path + /index/spk. + // Make the spk specific sub-directory within the repo's index + // location. let mut index_path = PathBuf::new(); - index_path.push(spfs_repo_config.path); - - index_path.push(INDEX_SUB_DIR); - spfs::runtime::makedirs_with_perms(&index_path, 0o777).map_err(|source| { - Error::String(format!( - "Unable to make '{INDEX_SUB_DIR}' sub-dir in '{repo_name}' repo: {source}" - )) - })?; - + index_path.push(base_path); index_path.push(SPK_INDEX_SUB_DIR_NAME); - spfs::runtime::makedirs_with_perms(&index_path, 0o777) - .map_err(|source| Error::String(format!("Unable to make {SPK_INDEX_SUB_DIR_NAME} sub-dir in '{repo_name}'s index directory: {source}")))?; - Ok(index_path) - } + spfs::runtime::makedirs_with_perms(&index_path, 0o777); - async fn repo_index_location(repo: &crate::RepositoryHandle) -> Result { - let base_path = match repo { - crate::RepositoryHandle::SPFS(spfs_repo) => { - Self::get_index_path_from_repo_address(spfs_repo.name(), spfs_repo.address()) - .await? - } - - crate::RepositoryHandle::Mem(mem_repo) => { - // A mem repo doesn't have a usable location for files - return Err(Error::IndexNoRepoLocationError( - mem_repo.name().to_string(), - "Spk Mem".to_string(), - )); - } - - crate::RepositoryHandle::Runtime(runtime_repo) => { - // A spfs runtime repo doesn't have a usable location - // for files. - return Err(Error::IndexNoRepoLocationError( - runtime_repo.name().to_string(), - "Spk Runtime".to_string(), - )); - } - - crate::RepositoryHandle::Indexed(indexed_repo) => { - // Indexed repositories are store their index data - // based on the repo they wrap so use the underlying - // repo's location for indexes. - Self::get_index_path_from_repo_address(indexed_repo.name(), indexed_repo.address()) - .await? - } - }; - - let mut index_path = PathBuf::new(); - index_path.push(base_path); // Index file name contains the index schema version for ease // of identifying a compatible index. The index version is // also checked later when the bytes are turned into an index diff --git a/crates/spk-storage/src/storage/handle.rs b/crates/spk-storage/src/storage/handle.rs index 1be215714d..ab3a5242de 100644 --- a/crates/spk-storage/src/storage/handle.rs +++ b/crates/spk-storage/src/storage/handle.rs @@ -2,10 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // https://github.com/spkenv/spk +use std::path::PathBuf; + use spk_schema::{Spec, SpecRecipe}; use variantly::Variantly; use super::Repository; +use crate::{Error, Result}; type Handle = dyn Repository; @@ -40,6 +43,38 @@ impl RepositoryHandle { Self::Indexed(repo) => Box::new(repo), } } + + pub async fn index_location_path(&self) -> Result { + match self { + Self::SPFS(spfs_repo) => spfs_repo.get_or_create_index_path().await, + + Self::Mem(mem_repo) => { + // A mem repo does not have a usable location for + // index files. + Err(Error::IndexNoRepoLocationError( + mem_repo.name().to_string(), + "Spk Mem".to_string(), + )) + } + + Self::Runtime(runtime_repo) => { + // A spk runtime repo does not have a usable location + // index files. + Err(Error::IndexNoRepoLocationError( + runtime_repo.name().to_string(), + "Spk Runtime".to_string(), + )) + } + + Self::Indexed(indexed_repo) => { + // Indexed repositories store their index data based + // on the repo they wrap, so use the underlying repo's + // location. This is mildly recursive because the + // wrapped repo is a spk RepositoryHandle. + Box::pin(indexed_repo.wrapped_repo_index_location_path()).await + } + } + } } impl std::ops::Deref for RepositoryHandle { diff --git a/crates/spk-storage/src/storage/indexed.rs b/crates/spk-storage/src/storage/indexed.rs index 46c49ed29b..396af95c9f 100644 --- a/crates/spk-storage/src/storage/indexed.rs +++ b/crates/spk-storage/src/storage/indexed.rs @@ -3,6 +3,7 @@ // https://github.com/spkenv/spk use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; @@ -60,6 +61,12 @@ impl IndexedRepository { Ok(index_kind) } + /// Get the index path from the underlying spfs filesystem + /// repository, which may create it if needed. + pub async fn wrapped_repo_index_location_path(&self) -> Result { + self.wrapped_repo.index_location_path().await + } + /// Set whether to update the internal index after any publish or /// write operation on this repository. This is meant for use by /// automated testing. Updating the index continuously may be costly. diff --git a/crates/spk-storage/src/storage/spfs.rs b/crates/spk-storage/src/storage/spfs.rs index 6e7a7629dd..bd82f35cb8 100644 --- a/crates/spk-storage/src/storage/spfs.rs +++ b/crates/spk-storage/src/storage/spfs.rs @@ -4,7 +4,7 @@ use std::collections::{HashMap, HashSet, hash_map}; use std::convert::{TryFrom, TryInto}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; @@ -16,7 +16,7 @@ use once_cell::sync::Lazy; use relative_path::RelativePathBuf; use serde::{Deserialize, Serialize}; use spfs::prelude::{RepositoryExt as SpfsRepositoryExt, *}; -use spfs::storage::EntryType; +use spfs::storage::{EntryType, IndexPath}; use spfs::tracking::{self, TagSpec}; use spk_schema::foundation::ident_build::{Build, parse_build}; use spk_schema::foundation::ident_component::Component; @@ -151,6 +151,15 @@ impl SpfsRepository { &self.inner } + /// Get the index path from the underlying spfs repository, which + /// may create it if needed. + pub async fn get_or_create_index_path(&self) -> Result { + self.inner + .index_path() + .await + .map_err(|err| Error::IndexNoRepoPathError(self.name.to_string(), err.to_string())) + } + /// Pin this repository to a specific point in time, limiting /// all queries and making it read-only pub fn pin_at_time(&mut self, ts: &spfs::tracking::TimeSpec) { From 79059b0a73584a91562a65fc0e07cc60af03e5b4 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Wed, 8 Apr 2026 09:25:42 -0700 Subject: [PATCH 08/18] Updates docs on indexes Signed-off-by: David Gilligan-Cook --- docs/ref/indexes.md | 86 +++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 31 deletions(-) diff --git a/docs/ref/indexes.md b/docs/ref/indexes.md index 79d5b4cdde..706f4b79e6 100644 --- a/docs/ref/indexes.md +++ b/docs/ref/indexes.md @@ -11,7 +11,7 @@ This explains indexes, indexing, index controls, and index requirements in `spk` `SPK` supports generating a packages index for a repository to help with solves. An index must have been generated separately for a repository before it can be used. Using an index speeds up solves -against that use that repository. +using that repository. The index is designed to help the solvers with solves (package look-ups, deprecation checks, pre-loading non-package variable @@ -19,9 +19,9 @@ references, getting install requirements, etc.). It does not contain the full package data. So it does not have the information needed to help other `spk` operations, e.g. building or testing a package. -If indexing is enabled, you have to generate an index before to trying -to use it in a solve. They are not generated on the fly (outside of -automated tests). +If indexing is enabled, you have to generate an index before trying to +use it in a solve. They are not generated on the fly (outside of tiny +repositories for automated tests). If index use is enabled, but no index has been generated for a repository, `spk` will fallback to using the underlying repository's @@ -36,15 +36,15 @@ all of them, `spk` will use the indexes that exist. `spk` index use can be enabled or disabled in the `spk` config file. It is disabled by default because index generation and updating needs -to be set up on per repository basis. Setting this up is recommended +to be set up on a per repository basis. Setting this up is recommended for repositories with a large number of packages. Most `spk` commands can use the `--use-indexes` and `--no-indexes` -flags to override repository index use.`spk repo index ...` command -always has index use disabled because it operates on the indexes -themselves. `spk info ...` also disables indexes because to display -full package information requires reading the package from the -repository directly, not from an index. +flags to override repository index use. The `spk repo index ...` +command always has index use disabled because it operates on the +indexes themselves. `spk info ...` also disables indexes because +displaying complete package information requires reading the package +from the repository directly, not from an index. If index use is enabled in the config file, it can be disabled with the `--no-indexes` command line flag. @@ -68,7 +68,7 @@ you have multiple repositories to index you have to run `spk repo index See `spk repo index -h` for more details. -You don't have to generate a full index every time a package changes, +You do not have to generate a full index every time a package changes, you can also update a package in an existing index, see the next section. @@ -101,8 +101,8 @@ See `spk repo index -h` for more details. When index use is enabled, it is important to update the indexes when changes happen to the repository and its packages. Otherwise, the index and real package data will get out of sync. An index that is old -won't have the same data as the repositories it is based one. This can -lead to discrepancies in solves. +will not have the same data as the repositories it is based on. This +can lead to discrepancies in solves. ## Index Details @@ -117,11 +117,12 @@ spfs fs repository. ### Structure and types in SPK -An index is implemented by a set of types in spk-storage and -spk-schema that work together to wrap a repository in its index and -produce indexed packages that act as packages for the solver. +An index is implemented by a set of types in the spk-storage and +spk-schema crates that work together to wrap a repository with its +index and produce indexed package objects that act as packages for the +solver. -The hierarchy of index related types: +The hierarchy of index related types is: ``` in the 'spk-storage' crate: @@ -152,14 +153,14 @@ The `spk-proto` crate contains spk's flatbuffers schema for an index. It requires `flatc` to be installed to generate the rust code for the index schema. -The schema is versioned internally, and in the index file name contains +The schema is versioned internally, and the index file name contains the schema version number as well. The index structure does not match the rust object structures -1-to-1. The is partially due to only keeping things needs for solves, -and partially due to what flatbuffers require, support, and recommend -(lists not sets or mappings, removing duplication and intermediate -structures). +1-to-1. This is partially due to only keeping things needed for +solves, and partially due to what flatbuffers require, support, and +recommend (lists not sets or mappings, removing duplication and +intermediate structures). The broad top-level schema structure is (capital letters indicate a schema table name): ``` @@ -197,7 +198,7 @@ multiple versions of spk are being used in a site and they may not all be able to use index data generated by other versions of spk. Spk includes the schema version number in the index file name to allow -multiple index schema to co-exist during a transition between index +multiple index schemas to co-exist during a transition between index versions. #### Adding a new field @@ -219,20 +220,43 @@ Once the older spk version is fully retired, the older index data can be retired too. -#### Stop using an existing field +#### Stop reading an existing field -When an older field should not be used anymore, the developer should: +When an older field does not need to be read anymore, the developer should: - Add spk code to stop reading the old field - Make a new release of spk that no longer reads the field - Transition the site to the new spk release -There's no need to change the index schema version for to stop reading +There is no need to change the index schema version for to stop reading a field, and index generation remains unchanged (even though it -includes data that isn't read by spk anymore), +includes data that is not read by spk anymore). + + +#### Stop populating an existing field + +If an older field no longer needs to be populated, the developer should: +- Check the field is no longer read (see above). If it is still being read then continuing may cause unexpected results. +- Increment the schema version number +- Increment spk's compatible schema version number +- Add spk code to write None (or the default enum entry depending on the field type) into the index +- Make a new release of spk that contains this change +- Transition the site to the new spk release + +Technically, the field will still be present in the index schema, and +be set in index generation because it is needed in the index's object +constructors. But the None values will reduce the space the field +takes up. And while this kind of change in backwards compatible, the +empty field may be treated a default value by older versions of spk +when read from the index - thus the need to version up the index +schema to ensure this does not cause unexpected behaviour when +multiple versions of spk are in use. #### Removing an old field +Removing an old field will break backwards compatibility of the +flatbuffers format. It is not something that should be done lightly. + When a new field needs to be removed from the index schema, the developer should: - Double check the reason for removing the field, it may not be worth the trouble. This is a 2 stage process: - Stage 1: @@ -242,13 +266,13 @@ When a new field needs to be removed from the index schema, the developer should - Stage 2: - Increment the schema version number - Increment spk's compatible schema version number - - Add spk code to stop writing the old field + - Add spk code to stop writing the old field at all - Make another new release of spk that no longer writes the field - Transition the site to the new spk release - - Once that transition is complete, retire the older index data file and generation + - Once that transition is complete, retire the older index data file and generation process During the transition the older version of spk will use the older index format(s) and the newer spk will use the newer one. Both indexes will have to be generated and kept up to date during the transition. -Once the older spk version is fully retired, the older index data can -be retired too. +Once the older spk version is fully retired, the older index data and +its generation can be retired too. From 401c9bb2317438c24bd8a4dd9eb4cddba6054d09 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Wed, 8 Apr 2026 16:59:41 -0700 Subject: [PATCH 09/18] Updates to spk config file, command line options, and defaults for index use. Signed-off-by: David Gilligan-Cook --- crates/spk-cli/common/src/flags.rs | 130 +++++++++++------- crates/spk-cli/common/src/flags_test.rs | 3 +- crates/spk-config/src/config.rs | 28 +++- .../src/storage/flatbuffer_index.rs | 17 +-- crates/spk-storage/src/storage/indexed.rs | 59 ++++---- crates/spk-storage/src/storage/mod.rs | 2 +- 6 files changed, 145 insertions(+), 94 deletions(-) diff --git a/crates/spk-cli/common/src/flags.rs b/crates/spk-cli/common/src/flags.rs index dec7d86eef..61b20fa17d 100644 --- a/crates/spk-cli/common/src/flags.rs +++ b/crates/spk-cli/common/src/flags.rs @@ -1006,6 +1006,23 @@ where Ok((found, configured.template.file_path().to_owned())) } +/// The index use command line setting options that can be used to +/// override index usage set in the spk config file. The default for a +/// repository in the spk config is to use an index if one exists, +/// unless the repository is called 'local'. The setting for an +/// individual repository can be changed in the config file, or by +/// using the matching environment variables. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] +pub enum IndexUse { + /// Use the index use settings from the repository configurations + /// in the spk config file (or their environment variable + /// overrides, if any are set). + ConfigFile, + /// Disable all index use globally regardless of any setting in + /// the spk config file (or environment variables). + Disabled, +} + #[derive(Args, Clone)] pub struct Repositories { /// This option will enable the local repository only. @@ -1062,19 +1079,11 @@ pub struct Repositories { #[clap(long)] pub wrap_origin: Option, - /// Get the package data from the repo indexes instead of the - /// repos. This only applies to non-destructive repo operations. - /// This can be configured as the default in spk's config file, or - /// on a per-repo basis in spk's config file. + /// Override how indexes will be used to get package data from the + /// repo indexes instead of the repos. This only applies to + /// non-destructive repository operations. #[clap(long)] - pub use_indexes: bool, - - /// Do not get the package data from the repo index, always use - /// the repo instead. This only applies to non-destructive repo - /// operations. This option can be configured as the default in - /// spk's config file. - #[clap(long, conflicts_with = "use_indexes")] - pub no_indexes: bool, + pub index_use: Option, } impl Repositories { @@ -1181,31 +1190,35 @@ impl Repositories { return Ok(repos); } - // Check whether using the indexes for the repos is enabled or not + // Check whether using the indexes for the repos is globally + // disabled by the spk command, such as 'spk repo index' or + // 'spk info'. let disable_all_index_use = DISABLE_INDEX_USE.load(std::sync::atomic::Ordering::Relaxed); - // Get the overrides from the command line flags - let use_index_cli_override = if self.use_indexes || self.no_indexes { - if self.no_indexes { - // Don't use any indexes - Some(false) - } else { - // Use indexes, if the command line flag is given to use them - Some(self.use_indexes) + // Check the override from the command line flag, if any + let use_index_cli_override = match self.index_use { + Some(index_use) => { + match index_use { + IndexUse::ConfigFile => { + // Use the config file/env var settings. + None + } + IndexUse::Disabled => { + // This override disables all index use. + Some(false) + } + } + } + _ => { + // There was no command line override, so use the + // config file/env var settings + None } - } else { - // No command line index flags given, so there's no index override - None }; - // Get the overall config setting, which includes env var - // setting for it, but not the per repo settings. Those are - // checked below as repos are created. + // Get the spk config, which includes env vars settings, for + // later use in the loop block. let config = spk_config::get_config()?; - let all_use_index_from_config = config.solver.use_indexes; - tracing::debug!( - "Disable all indexes: {disable_all_index_use}, Cli use all indexes: {use_index_cli_override:?}, Config use all indexes: {all_use_index_from_config}" - ); // Add the enabled repos for (name, ts, is_default_origin) in enabled @@ -1245,33 +1258,52 @@ impl Repositories { repo.pin_at_time(ts); } - // Decide whether to use an index for this repository based on - // the various settings: all repos, per repo, and overrides. + // Decide whether to use an index for this repository let use_index = if disable_all_index_use { - // Disable all take precedence over everything - tracing::debug!("All index use disabled"); + // The disable all setting takes precedence over everything else + tracing::debug!("All index use disabled. '{name}' will not use an index."); false - } else if let Some(use_indexes) = use_index_cli_override { - // Otherwise the all indexes command line flag takes - // precedence over what was in the config file - tracing::debug!("A cli-based use index override was given as: {use_indexes}"); - use_indexes + } else if let Some(use_index) = use_index_cli_override { + // Otherwise the command line flag takes precedence + // over what was in the config file. + tracing::debug!("A cli-based use index override was given: {use_index}"); + use_index } else { - // Otherwise use this specific repository's config setting, if any + // Otherwise use this repository's spk config setting match config.repositories.get(name) { - Some(r_config) => { - tracing::debug!("Using index for '{name}' repo"); - r_config.use_index + Some(repo_config) => { + tracing::debug!( + "Using the '{name'} repo's use index setting, which is: {} ({} index use)", + repo_config.use_index, + if repo_config.use_index { + "enable" + } else { + "disable" + } + ); + repo_config.use_index } - // And last use the all indexes env var and config - // file setting. + // The fallback default if there's no configuration for this repo. None => { - tracing::debug!("Using config settings for all indexes"); - all_use_index_from_config + // Enable index use on all repositories, except the 'local' repo. + let default_index_use = name != "local"; + tracing::debug!( + "Using default use index setting for '{name}' repo, which is: {} ({} index use)", + default_index_use, + if default_index_use { + "enable" + } else { + "disable" + } + ); + default_index_use } } }; - tracing::debug!("Using index for '{name}': {use_index}"); + tracing::debug!( + "Using index for '{name}': is {}", + if use_index { "enabled" } else { "disabled" } + ); if use_index { // Use the index for this repo, if there is one, diff --git a/crates/spk-cli/common/src/flags_test.rs b/crates/spk-cli/common/src/flags_test.rs index 5c05205ca8..6a63ae9687 100644 --- a/crates/spk-cli/common/src/flags_test.rs +++ b/crates/spk-cli/common/src/flags_test.rs @@ -67,8 +67,7 @@ async fn test_get_solver_with_host_options( disable_repo: Default::default(), when: None, wrap_origin: None, - use_indexes: false, - no_indexes: false, + index_use: Default::default(), }, decision_formatter_settings: DecisionFormatterSettings { time: Default::default(), diff --git a/crates/spk-config/src/config.rs b/crates/spk-config/src/config.rs index c7224f69f2..135ca49eff 100644 --- a/crates/spk-config/src/config.rs +++ b/crates/spk-config/src/config.rs @@ -12,6 +12,8 @@ use spfs::Sentry; use crate::Result; +pub const FLATBUFFER_INDEX_TOKEN: &str = "flatb"; + #[cfg(test)] #[path = "./config_test.rs"] mod config_test; @@ -97,8 +99,8 @@ pub struct Solver { pub indexes: Index, } -/// The settings for one or more indexes -#[derive(Clone, Default, Debug, Deserialize, Serialize)] +/// The settings for a spk repository index +#[derive(Clone, Debug, Deserialize, Serialize)] #[serde(default)] pub struct Index { /// Whether to validate the index data before using it. @@ -112,8 +114,19 @@ pub struct Index { pub kind: String, } +impl Default for Index { + fn default() -> Self { + Self { + // Default to verifying indexes before using them. This is + // safer but can add some overhead. + verify_before_use: true, + kind: String::from(FLATBUFFER_INDEX_TOKEN), + } + } +} + /// The settings for a single repository -#[derive(Clone, Default, Debug, Deserialize, Serialize)] +#[derive(Clone, Debug, Deserialize, Serialize)] #[serde(default)] pub struct Repository { /// Whether to use an index with this repository, if one is @@ -124,6 +137,15 @@ pub struct Repository { pub index: Index, } +impl Default for Repository { + fn default() -> Self { + Self { + use_index: true, + index: Default::default(), + } + } +} + #[derive(Clone, Default, Debug, Deserialize, Serialize)] #[serde(default)] pub struct Statsd { diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index 3793558d3d..ae0658b173 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -55,8 +55,6 @@ mod flatbuffer_index_test; const COMPATIBLE_INDEX_SCHEMA_VERSION: u32 = 1; // Index name and kind constants -pub const FLATBUFFER_INDEX: &str = "flatb"; - const SPK_INDEX_SUB_DIR_NAME: &str = "spk"; const INDEX_FILE_PREFIX: &str = "index"; const INDEX_FILE_EXT: &str = "fb"; @@ -178,13 +176,16 @@ impl FlatBufferRepoIndex { // Based on the configuration setting, decide whether to // verify the flatbuffer data before use. let config = spk_config::get_config()?; + let verify_before_use = + if let Some(repo_config) = config.repositories.get(&repo.name().to_string()) { + repo_config.index.verify_before_use + } else { + // Default is to verify indexes before using them. This is + // safer but can add some overhead. + true + }; - FlatBufferRepoIndex::read_index_from_file( - name, - &filepath, - config.solver.indexes.verify_before_use, - ) - .await + FlatBufferRepoIndex::read_index_from_file(name, &filepath, verify_before_use).await } async fn read_index_from_file( diff --git a/crates/spk-storage/src/storage/indexed.rs b/crates/spk-storage/src/storage/indexed.rs index 396af95c9f..98be1dcd65 100644 --- a/crates/spk-storage/src/storage/indexed.rs +++ b/crates/spk-storage/src/storage/indexed.rs @@ -8,6 +8,7 @@ use std::sync::Arc; use std::time::Instant; use arc_swap::ArcSwap; +use spk_config::FLATBUFFER_INDEX_TOKEN; use spk_schema::foundation::ident_build::Build; use spk_schema::foundation::ident_component::Component; use spk_schema::foundation::name::{PkgName, PkgNameBuf, RepositoryName}; @@ -18,7 +19,7 @@ use spk_schema::name::OptNameBuf; use spk_schema::{BuildIdent, Spec, SpecRecipe}; use super::repository::{PublishPolicy, Repository, Storage}; -use crate::storage::{FLATBUFFER_INDEX, FlatBufferRepoIndex, RepoIndex, RepositoryIndex}; +use crate::storage::{FlatBufferRepoIndex, RepoIndex, RepositoryIndex}; use crate::{Error, Result}; /// A spk repository that wraps another repository with that @@ -48,13 +49,13 @@ impl Clone for IndexedRepository { impl IndexedRepository { /// Get the name of the kind of index from spk's config. This is /// used to work out what kind of index to load or save. - fn get_index_kind_from_config() -> Result { + fn get_index_kind_from_config(repo_name: &RepositoryName) -> Result { let config = spk_config::get_config()?; - - let index_kind = if config.solver.indexes.kind != String::default() { - config.solver.indexes.kind.clone() + let index_kind = if let Some(repo_config) = config.repositories.get(&repo_name.to_string()) + { + repo_config.index.kind.clone() } else { - String::from(FLATBUFFER_INDEX) + String::from(FLATBUFFER_INDEX_TOKEN) }; tracing::debug!("Index kind from config: '{index_kind}'"); @@ -80,25 +81,22 @@ impl IndexedRepository { repo_to_wrap: Arc, ) -> Result { let start = Instant::now(); - let index_kind = IndexedRepository::get_index_kind_from_config()?; + let index_kind = IndexedRepository::get_index_kind_from_config(repo_to_wrap.name())?; - let index = match index_kind.as_ref() { - FLATBUFFER_INDEX => { - tracing::debug!("Flatbuffer index selected"); + let index = if index_kind == FLATBUFFER_INDEX_TOKEN { + tracing::debug!("Flatbuffer index selected"); - match FlatBufferRepoIndex::from_repo_file(&repo_to_wrap).await { - Ok(i) => RepoIndex::Flat(i), - Err(err) => { - return Err(Error::IndexFailedToLoad(err.to_string())); - } + match FlatBufferRepoIndex::from_repo_file(&repo_to_wrap).await { + Ok(i) => RepoIndex::Flat(i), + Err(err) => { + return Err(Error::IndexFailedToLoad(err.to_string())); } } - _ => { - return Err(Error::IndexUnknownKind( - index_kind, - "load from file".to_string(), - )); - } + } else { + return Err(Error::IndexUnknownKind( + index_kind, + "load from file".to_string(), + )); }; tracing::debug!( @@ -118,21 +116,20 @@ impl IndexedRepository { async fn generate_in_memory_index_from_repo( repo_to_wrap: &Arc, ) -> Result { - let index_kind = IndexedRepository::get_index_kind_from_config()?; + let index_kind = IndexedRepository::get_index_kind_from_config(repo_to_wrap.name())?; - match index_kind.as_ref() { - FLATBUFFER_INDEX => { - tracing::debug!("Flatbuffer index selected"); + if index_kind == FLATBUFFER_INDEX_TOKEN { + tracing::debug!("Flatbuffer index selected"); - match FlatBufferRepoIndex::from_repo_in_memory(repo_to_wrap).await { - Ok(i) => Ok(RepoIndex::Flat(i)), - Err(err) => Err(Error::IndexFailedToGenerate(err.to_string())), - } + match FlatBufferRepoIndex::from_repo_in_memory(repo_to_wrap).await { + Ok(i) => Ok(RepoIndex::Flat(i)), + Err(err) => Err(Error::IndexFailedToGenerate(err.to_string())), } - _ => Err(Error::IndexUnknownKind( + } else { + Err(Error::IndexUnknownKind( index_kind, "create in memory".to_string(), - )), + )) } } diff --git a/crates/spk-storage/src/storage/mod.rs b/crates/spk-storage/src/storage/mod.rs index 25c9d7a3c6..b757f77f9d 100644 --- a/crates/spk-storage/src/storage/mod.rs +++ b/crates/spk-storage/src/storage/mod.rs @@ -13,7 +13,7 @@ mod runtime; mod spfs; pub use archive::export_package; -pub use flatbuffer_index::{FLATBUFFER_INDEX, FlatBufferRepoIndex}; +pub use flatbuffer_index::FlatBufferRepoIndex; pub use handle::RepositoryHandle; pub use indexed::IndexedRepository; pub use mem::MemRepository; From 94119f44433a0190136d6d6e0e7af4abe643685f Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Thu, 9 Apr 2026 09:45:20 -0700 Subject: [PATCH 10/18] Updates spk repo index command to just allow -r reponame command line option Signed-off-by: David Gilligan-Cook --- crates/spk-cli/cmd-repo/src/cmd_repo.rs | 51 ++++++++++++++----------- crates/spk-cli/common/src/flags.rs | 2 +- docs/ref/indexes.md | 15 ++++---- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/crates/spk-cli/cmd-repo/src/cmd_repo.rs b/crates/spk-cli/cmd-repo/src/cmd_repo.rs index f8da2683f9..1057851ca0 100644 --- a/crates/spk-cli/cmd-repo/src/cmd_repo.rs +++ b/crates/spk-cli/cmd-repo/src/cmd_repo.rs @@ -9,7 +9,7 @@ use clap::{Args, Subcommand}; use miette::{Context, Result}; use spk_cli_common::{CommandArgs, Run, flags}; use spk_schema::VersionIdent; -use spk_storage::{self as storage, FlatBufferRepoIndex, RepositoryIndexMut}; +use spk_storage::{self as storage, FlatBufferRepoIndex, RepositoryHandle, RepositoryIndexMut}; use storage::Repository; /// Perform repository-level actions and maintenance @@ -51,8 +51,18 @@ pub enum RepoCommand { }, /// Generate an index for a repository Index { - #[clap(flatten)] - repos: flags::Repositories, + /// Repositories to enable for the command + /// + /// Any configured spfs repository can be named here as well as "local" or + /// a path on disk or a full remote repository url. Repositories can also + /// be limited to a specific time by appending a relative or absolute time + /// specifier (eg: origin~10m, origin~5weeks, origin@2022-10-11, + /// origin@2022-10-11T13:00.12). This time affects all interactions and + /// queries in the repository, effectively making it look like it did in the past. + /// It will cause errors for any operation that attempts to make changes to + /// the repository, even if the time is in the future. + #[clap(long, short = 'r')] + repo: String, /// Package/version of a published package to update in an /// existing index. @@ -84,20 +94,20 @@ impl RepoCommand { } // spk repo index ... - Self::Index { repos, update } => { + Self::Index { repo, update } => { // Generate or update an index a repo. The repo must - // be the underlying repo and not an indexed repo. So - // this disables index use for this command regardless - // of config or command line flags. + // be the underlying repo and not an indexed repo. So as + // a safety measure, this disables index use for this + // command regardless of config or command line flags. flags::disable_index_use(); - let repos = repos.get_repos_for_non_destructive_operation().await?; - if repos.len() != 1 { - tracing::error!( - "{} repos specified, Can only index one repo at a time. Please specify a single repo.", - repos.len() - ); - return Ok(2); - } + + // Construct the repo handle to operate on, and repo + // list that contains it. + let repo_to_index: RepositoryHandle = match repo.as_str() { + "local" => storage::local_repository().await?.into(), + name => storage::remote_repository(name).await?.into(), + }; + let repos = vec![(repo_to_index.name().to_string(), repo_to_index.clone())]; if let Some(package_version) = update { // Update the existing index for the given package/version @@ -106,12 +116,10 @@ impl RepoCommand { let mut was_full_index = String::from(""); // Load the current index for this repo now - let repo = &repos[0].1; - match FlatBufferRepoIndex::from_repo_file(repo).await { + match FlatBufferRepoIndex::from_repo_file(&repo_to_index).await { Ok(current_index) => { - let repo = &repos[0].1; current_index - .update_repo_with_package_version(repo, &version_ident) + .update_repo_with_package_version(&repo_to_index, &version_ident) .await? } Err(err) => { @@ -127,7 +135,7 @@ impl RepoCommand { tracing::info!( "Index update for '{package_version}' in '{}' repo completed in: {} secs{was_full_index}", - repo.name(), + repo_to_index.name(), start.elapsed().as_secs_f64() ); } else { @@ -135,10 +143,9 @@ impl RepoCommand { let start = Instant::now(); FlatBufferRepoIndex::index_repo(&repos).await?; - let repo = &repos[0].1; tracing::info!( "Index generation for '{}' repo completed in: {} secs", - repo.name(), + repo_to_index.name(), start.elapsed().as_secs_f64() ); } diff --git a/crates/spk-cli/common/src/flags.rs b/crates/spk-cli/common/src/flags.rs index 61b20fa17d..9b6d6ef5bd 100644 --- a/crates/spk-cli/common/src/flags.rs +++ b/crates/spk-cli/common/src/flags.rs @@ -1273,7 +1273,7 @@ impl Repositories { match config.repositories.get(name) { Some(repo_config) => { tracing::debug!( - "Using the '{name'} repo's use index setting, which is: {} ({} index use)", + "Using the '{name}' repo's use index setting, which is: {} ({} index use)", repo_config.use_index, if repo_config.use_index { "enable" diff --git a/docs/ref/indexes.md b/docs/ref/indexes.md index 706f4b79e6..7a963b098f 100644 --- a/docs/ref/indexes.md +++ b/docs/ref/indexes.md @@ -55,16 +55,15 @@ If index use is disabled in the config file, it can be enabled with the ### Generating an Index To generate an index for a repository (e.g. origin), run: -`spk repo index --disable-repo local` +`spk repo index -r origin` This will generate a flatbuffer schema based index file. The index file is stored in the underlying spfs repo (e.g. origin repo) in a `index/spk/` sub-directory. -Index generation only works on one repository at a time (hence the -`--disable-repo local` when working on the default `origin` repo). If -you have multiple repositories to index you have to run `spk repo index -...` once for each repository. +Index generation only works on one repository at a time. If you have +multiple repositories to index you have to run `spk repo index ...` +once for each repository. See `spk repo index -h` for more details. @@ -84,9 +83,9 @@ package is published, deprecated, undeprecated, or deleted. But regular periodic complete index generation may also work for a site, depending on the frequency of package changes and the periodic updates. -To update an existing index, e.g. after a new `python` package was -published, run: -`spk repo index --disable-repo local --update python` +To update an existing index, e.g. the `origin` repo's index after a +new `python` package was published or deprecated, run: +`spk repo index -r origin --update python` This will read in the existing index for the repository and update the versions and builds of the named package in the index. It is faster From f942870f7ec170815b6653bc862578e42cfd1b32 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Thu, 9 Apr 2026 10:44:40 -0700 Subject: [PATCH 11/18] Updates index docs Signed-off-by: David Gilligan-Cook --- docs/ref/indexes.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/docs/ref/indexes.md b/docs/ref/indexes.md index 7a963b098f..57f09c538c 100644 --- a/docs/ref/indexes.md +++ b/docs/ref/indexes.md @@ -35,22 +35,21 @@ all of them, `spk` will use the indexes that exist. ### Enabling/Disabling Indexes `spk` index use can be enabled or disabled in the `spk` config file. -It is disabled by default because index generation and updating needs -to be set up on a per repository basis. Setting this up is recommended -for repositories with a large number of packages. +It is enabled by default but require index generation and index +updating to be set up, on a per repository basis, before it will have +an impact. Setting this up is recommended for repositories with a +large number of packages. -Most `spk` commands can use the `--use-indexes` and `--no-indexes` -flags to override repository index use. The `spk repo index ...` +Most `spk` commands can use the `--index-use ` option to override +repository index use on a per command basis. The `spk repo index ...` command always has index use disabled because it operates on the indexes themselves. `spk info ...` also disables indexes because displaying complete package information requires reading the package from the repository directly, not from an index. If index use is enabled in the config file, it can be disabled with -the `--no-indexes` command line flag. +the `--index-use disabled` command line option. -If index use is disabled in the config file, it can be enabled with the -`--use-indexes` flag. ### Generating an Index @@ -68,8 +67,7 @@ once for each repository. See `spk repo index -h` for more details. You do not have to generate a full index every time a package changes, -you can also update a package in an existing index, see the next -section. +you can update a package in an existing index, see the next section. ### Updating an existing Index @@ -153,10 +151,10 @@ index. It requires `flatc` to be installed to generate the rust code for the index schema. The schema is versioned internally, and the index file name contains -the schema version number as well. +the index schema version number as well. The index structure does not match the rust object structures -1-to-1. This is partially due to only keeping things needed for +one-to-one. This is partially due to only keeping things needed for solves, and partially due to what flatbuffers require, support, and recommend (lists not sets or mappings, removing duplication and intermediate structures). From bb029ffa03e2bc070958423650e1fe6789443b9e Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Thu, 9 Apr 2026 17:49:09 -0700 Subject: [PATCH 12/18] Updates spk repo index --update option so it can be specified multiple times, and fixes bugs when using it to update a specific package version. Signed-off-by: David Gilligan-Cook --- Cargo.lock | 1 + crates/spk-cli/cmd-repo/Cargo.toml | 1 + crates/spk-cli/cmd-repo/src/cmd_repo.rs | 64 ++++++---- crates/spk-cli/group2/src/cmd_stats.rs | 4 + crates/spk-cli/group2/src/cmd_stats_test.rs | 5 +- .../src/storage/flatbuffer_index.rs | 118 +++++++++++++----- .../src/storage/repository_index.rs | 14 +-- docs/ref/indexes.md | 19 ++- 8 files changed, 157 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4cca2b3e68..c549e48f60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4609,6 +4609,7 @@ version = "0.44.0" dependencies = [ "async-trait", "clap 4.5.47", + "itertools 0.14.0", "miette", "spk-cli-common", "spk-schema", diff --git a/crates/spk-cli/cmd-repo/Cargo.toml b/crates/spk-cli/cmd-repo/Cargo.toml index 5811c760f4..b02c8c1488 100644 --- a/crates/spk-cli/cmd-repo/Cargo.toml +++ b/crates/spk-cli/cmd-repo/Cargo.toml @@ -16,6 +16,7 @@ workspace = true miette = { workspace = true, features = ["fancy"] } async-trait = { workspace = true } clap = { workspace = true } +itertools = { workspace = true } spk-cli-common = { workspace = true } spk-schema = { workspace = true } spk-storage = { workspace = true } diff --git a/crates/spk-cli/cmd-repo/src/cmd_repo.rs b/crates/spk-cli/cmd-repo/src/cmd_repo.rs index 1057851ca0..615d52187b 100644 --- a/crates/spk-cli/cmd-repo/src/cmd_repo.rs +++ b/crates/spk-cli/cmd-repo/src/cmd_repo.rs @@ -6,6 +6,7 @@ use std::str::FromStr; use std::time::Instant; use clap::{Args, Subcommand}; +use itertools::Itertools; use miette::{Context, Result}; use spk_cli_common::{CommandArgs, Run, flags}; use spk_schema::VersionIdent; @@ -51,30 +52,22 @@ pub enum RepoCommand { }, /// Generate an index for a repository Index { - /// Repositories to enable for the command - /// - /// Any configured spfs repository can be named here as well as "local" or - /// a path on disk or a full remote repository url. Repositories can also - /// be limited to a specific time by appending a relative or absolute time - /// specifier (eg: origin~10m, origin~5weeks, origin@2022-10-11, - /// origin@2022-10-11T13:00.12). This time affects all interactions and - /// queries in the repository, effectively making it look like it did in the past. - /// It will cause errors for any operation that attempts to make changes to - /// the repository, even if the time is in the future. + /// Repository to generate or update an index from. #[clap(long, short = 'r')] repo: String, - /// Package/version of a published package to update in an - /// existing index. + /// Package or package/version of a published package to + /// update in an existing index. /// - /// Other packages in the index will remain unchanged. Without - /// this the full index will be constructed from scratch. If - /// the repo does not have an index, a full index will be - /// constructed from scratch if possible. + /// Can be specified multiple times. Other packages in the + /// index will not be updated. Without this the full index + /// will be constructed from scratch. If the repo does not + /// have an index, a full index will be constructed from + /// scratch if the repository supports an index. /// /// This option is only supported for flatbuffer indexes. #[clap(long, name = "PACKAGE/VERSION")] - update: Option, + update: Vec, }, } @@ -109,17 +102,43 @@ impl RepoCommand { }; let repos = vec![(repo_to_index.name().to_string(), repo_to_index.clone())]; - if let Some(package_version) = update { + if !update.is_empty() { // Update the existing index for the given package/version let start = Instant::now(); - let version_ident = VersionIdent::from_str(package_version)?; - let mut was_full_index = String::from(""); + let idents: Vec = update + .iter() + .filter_map(|pv| match VersionIdent::from_str(pv) { + Ok(i) => Some(i), + Err(err) => { + tracing::warn!( + "Skipping '{pv}': Unable to parse it as a package/version: {err}" + ); + None + } + }) + .collect(); + + tracing::debug!( + "Command line update option: [{}]", + update.iter().map(ToString::to_string).join(", ") + ); + tracing::info!( + "Package/versions to update: [{}]", + idents.iter().map(ToString::to_string).join(", ") + ); + if idents.is_empty() { + tracing::error!( + "No valid package/versions given, nothing to update. Stopping." + ); + return Ok(2); + } // Load the current index for this repo now + let mut was_full_index = String::from(""); match FlatBufferRepoIndex::from_repo_file(&repo_to_index).await { Ok(current_index) => { current_index - .update_repo_with_package_version(&repo_to_index, &version_ident) + .update_packages(&repo_to_index, &idents) .await? } Err(err) => { @@ -134,7 +153,8 @@ impl RepoCommand { }; tracing::info!( - "Index update for '{package_version}' in '{}' repo completed in: {} secs{was_full_index}", + "Index update for '{}' in '{}' repo completed in: {} secs{was_full_index}", + idents.iter().map(ToString::to_string).join(", "), repo_to_index.name(), start.elapsed().as_secs_f64() ); diff --git a/crates/spk-cli/group2/src/cmd_stats.rs b/crates/spk-cli/group2/src/cmd_stats.rs index 20278d5334..dbb1e72ed6 100644 --- a/crates/spk-cli/group2/src/cmd_stats.rs +++ b/crates/spk-cli/group2/src/cmd_stats.rs @@ -9,6 +9,7 @@ use clap::Args; use futures::TryStreamExt; use miette::Result; use spfs::io::Pluralize; +use spk_cli_common::flags::IndexUse; use spk_cli_common::{CommandArgs, Run, flags}; use spk_schema::foundation::name::PkgNameBuf; use spk_schema::version::Version; @@ -26,6 +27,7 @@ mod cmd_stats_test; pub const ONE_PACKAGE_WAIT_MESSAGE: &str = "This may take a few seconds, please wait ..."; pub const ALL_PACKAGES_WAIT_MESSAGE: &str = "This may take a few minutes, please wait ..."; +pub const INDEX_USED_WAIT_MESSAGE: &str = "This may take a few seconds, please wait ..."; // Counters for stats about a version's builds #[derive(Default, Debug)] @@ -336,6 +338,8 @@ impl Run for Stats { )); if self.package.is_some() { self.output.println(ONE_PACKAGE_WAIT_MESSAGE.to_string()); + } else if self.repos.index_use != Some(IndexUse::Disabled) { + self.output.println(INDEX_USED_WAIT_MESSAGE.to_string()); } else { self.output.println(ALL_PACKAGES_WAIT_MESSAGE.to_string()); } diff --git a/crates/spk-cli/group2/src/cmd_stats_test.rs b/crates/spk-cli/group2/src/cmd_stats_test.rs index 7bd2849808..04da05ab56 100644 --- a/crates/spk-cli/group2/src/cmd_stats_test.rs +++ b/crates/spk-cli/group2/src/cmd_stats_test.rs @@ -49,7 +49,8 @@ async fn test_stats_on_empty_repo() { ) .unwrap(); - let mut opt = Opt::try_parse_from(["stats", "--show-top", "15"]).unwrap(); + let mut opt = + Opt::try_parse_from(["stats", "--show-top", "15", "--index-use", "disabled"]).unwrap(); let result = opt.stats.run().await.unwrap(); assert_eq!(result, 0); @@ -90,7 +91,7 @@ async fn test_stats() { .await .unwrap(); - let mut opt = Opt::try_parse_from(["stats"]).unwrap(); + let mut opt = Opt::try_parse_from(["stats", "--index-use", "disabled"]).unwrap(); let result = opt.stats.run().await.unwrap(); assert_eq!(result, 0); diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index ae0658b173..5077abe988 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -396,17 +396,35 @@ impl FlatBufferRepoIndex { } /// Internal method to get the current information on the builds - /// of package version and any global vars they provide. Useful - /// when updating an existing index. + /// of the given package versions and any global vars they + /// provide. Used when updating an existing index. async fn gather_updates_from_repo( &self, repo: &crate::RepositoryHandle, - package_version: &VersionIdent, + package_versions: &[VersionIdent], ) -> miette::Result<(HashMap, GlobalVarsInfo)> { let start = Instant::now(); - let package_name_to_update = package_version.name().to_owned(); - let arc_version_to_update = Arc::new(package_version.version().clone()); + let package_names_to_update: HashSet = package_versions + .iter() + .map(|package_version| package_version.name().to_owned()) + .collect(); + + // The 0.0.0 version number, which is the default and is used + // if no version number is found when parsing a string into + // VersionIdent. + let all_versions_version = Version::default(); + + let mut versions_to_update: HashMap> = HashMap::new(); + for package_version in package_versions { + let entry = versions_to_update + .entry(package_version.name().to_owned()) + .or_default(); + if *(package_version.version()) != all_versions_version { + tracing::info!("adding to versions: {package_version}"); + entry.insert(package_version.version().clone()); + } + } let mut packages: HashMap = HashMap::new(); @@ -422,26 +440,23 @@ impl FlatBufferRepoIndex { let mut package_names = self.list_packages().await?; // Check update package name and make sure it is in the names list. - if !package_names.contains(&package_name_to_update) { - tracing::debug!("'{package_name_to_update}' not in package_names, injecting it",); - package_names.push(package_name_to_update.clone()); + for package_name_to_update in &package_names_to_update { + if !package_names.contains(package_name_to_update) { + tracing::debug!("'{package_name_to_update}' not in package_names, injecting it",); + package_names.push(package_name_to_update.clone()); + } } // Now all the names are present, make a set to help with global variables let package_names_set = HashSet::from_iter(package_names.clone()); - // Process the packages, checking for the one to update and + // Process the packages, checking for the ones to update and // pulling from the correct data source for each. for name in &package_names { - let p_v = self.list_package_versions(name).await?; - let mut package_versions = (*p_v).clone(); - - if package_name_to_update == *name && !package_versions.contains(&arc_version_to_update) - { - tracing::debug!("{arc_version_to_update} not in package_versions, injecting it",); - package_versions.push(arc_version_to_update.clone()); - package_versions.sort_by_cached_key(|v| std::cmp::Reverse(v.clone())); - } + // Get the current versions from the repo to account for + // any deleted or newly published versions. + let p_v = repo.list_package_versions(name).await?; + let package_versions = (*p_v).clone(); let pkg_info = packages.entry(name.clone()).or_default(); @@ -456,20 +471,45 @@ impl FlatBufferRepoIndex { let version_ident = VersionIdent::new(name.clone(), (**version).clone()); - // Check if this is the version we want to update - if package_name_to_update == *name && arc_version_to_update == *version { - tracing::info!("Reached the {version} of the package to update"); + // Check if this is the package and versions we want + // to update. An empty list of versions to update at + // this point means update all the package's versions. + if package_names_to_update.contains(name) + && let v2u = versions_to_update + .get(name) + .expect("a package to update should have a version set, even an empty one") + && (v2u.is_empty() || v2u.contains(version)) + { + tracing::info!("Reached version {version} of the {name} package to update"); // Get the updated data from the repo - let version_builds = repo.list_package_builds(&version_ident).await?; + let version_builds = match repo.list_package_builds(&version_ident).await { + Ok(builds) => builds, + Err(err) => { + tracing::warn!("Problem listing package builds: {err}. Skipping it."); + continue; + } + }; for build_ident in version_builds { - let build_spec = repo.read_package_from_storage(&build_ident).await?; + let build_spec = match repo.read_package_from_storage(&build_ident).await { + Ok(build) => build, + Err(err) => { + tracing::warn!("Problem reading package: {err}. Skipping it."); + continue; + } + }; let spec = build_spec.clone(); - let component_map = repo - .read_components_from_storage(build_spec.ident()) - .await?; - let published_components = component_map.keys().cloned().collect(); + let published_components = + match repo.read_components_from_storage(build_spec.ident()).await { + Ok(component_map) => component_map.keys().cloned().collect(), + Err(err) => { + tracing::warn!( + "Problem reading published components: {err}. Skipping it." + ); + continue; + } + }; let build_info = BuildInfo { spec, @@ -483,6 +523,11 @@ impl FlatBufferRepoIndex { global_vars.extract_global_vars(&build_spec, &package_names_set)?; } } else { + if package_names_to_update.contains(name) { + tracing::debug!( + "Using indexed version {version} of the {name} package to update" + ); + } // Use what's there in the flatbuffer index let version_builds = self.list_package_builds(&version_ident).await?; @@ -541,7 +586,10 @@ impl FlatBufferRepoIndex { for name in package_names { if let Some(pkg_info) = repo_packages.get(&name) { - tracing::debug!("package: {name} [{} versions]", pkg_info.versions.len()); + tracing::debug!( + "Adding package to index: {name} [{} versions]", + pkg_info.versions.len() + ); let package = pkg_info.to_fb_package_index(&mut builder, &name); packages.push(package); }; @@ -999,15 +1047,17 @@ impl RepositoryIndexMut for FlatBufferRepoIndex { Ok(()) } - // Index and the package version to update within it. The package - // version to update will have its data gathered from the - // repository rather than the current index. - async fn update_repo_with_package_version( + /// Update the existing index for the given the package versions. + /// The package versions to update will have their data gathered + /// from the repository, rather than the current index. + async fn update_packages( &self, repo: &crate::RepositoryHandle, - package_version: &VersionIdent, + package_versions: &[VersionIdent], ) -> miette::Result<()> { - let (packages, global_vars) = self.gather_updates_from_repo(repo, package_version).await?; + let (packages, global_vars) = self + .gather_updates_from_repo(repo, package_versions) + .await?; // Assemble the data into a flatbuffer index and save it let builder = diff --git a/crates/spk-storage/src/storage/repository_index.rs b/crates/spk-storage/src/storage/repository_index.rs index 453bd8663f..770abbd507 100644 --- a/crates/spk-storage/src/storage/repository_index.rs +++ b/crates/spk-storage/src/storage/repository_index.rs @@ -43,15 +43,15 @@ pub trait RepositoryIndexMut { #[allow(clippy::ptr_arg)] async fn index_repo(repos: &Vec<(String, crate::RepositoryHandle)>) -> miette::Result<()>; - // TODO: possible rename, should it be something like - // update_package_entry() instead? - /// Update an existing index for the given package/version. For - /// use when a package has been published to a repo to add it to - /// the index without generating the entire index from scratch. - async fn update_repo_with_package_version( + /// Update the existing index for the given package versions. The + /// package versions to update will have their data gathered from + /// the repository, rather than the current index. This is useful + /// when a package has been published to a repo to add it to the + /// index without generating the entire index from scratch. + async fn update_packages( &self, repo: &crate::RepositoryHandle, - package_version: &VersionIdent, + package_versions: &[VersionIdent], ) -> miette::Result<()>; } diff --git a/docs/ref/indexes.md b/docs/ref/indexes.md index 57f09c538c..c862f4c381 100644 --- a/docs/ref/indexes.md +++ b/docs/ref/indexes.md @@ -85,10 +85,21 @@ To update an existing index, e.g. the `origin` repo's index after a new `python` package was published or deprecated, run: `spk repo index -r origin --update python` -This will read in the existing index for the repository and update the -versions and builds of the named package in the index. It is faster -than generating an index from scratch. It has to be run once per -repository and once per package. +The `--update` option can be given multiple times to update several +packages at a once, e.g.: +`spk repo index -r origin --update python --update zlib` + +The `--update` option take a package/version as well. This lets the +update be restricted to a specific version of a package. This can make +for shorter update times for packages with large numbers of versions, +or builds per version, e.g.: +`spk repo index -r origin --update python/3.10.8 --update zlib/1.2.12` + +Those commands will read in the existing index for the repository and +update the versions and builds of the named package in the index. It +is faster than generating an index from scratch. It has to be run once +per repository to update the given package or packages in that +repository's index. See `spk repo index -h` for more details. From 37e0d238e9a3135cb8759a57a61f3a10d4ffa0bd Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Mon, 13 Apr 2026 12:08:36 -0700 Subject: [PATCH 13/18] Fixed bug caused by reading all version lists from the repository, instead of just the ones for the packages being updated. Signed-off-by: David Gilligan-Cook --- .../src/storage/flatbuffer_index.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index 5077abe988..95840d44d2 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -453,16 +453,19 @@ impl FlatBufferRepoIndex { // Process the packages, checking for the ones to update and // pulling from the correct data source for each. for name in &package_names { - // Get the current versions from the repo to account for - // any deleted or newly published versions. - let p_v = repo.list_package_versions(name).await?; - let package_versions = (*p_v).clone(); + // Get the current versions of the packages to update from + // the repository not the index, to account for any + // deleted, updated, or newly published versions. + let package_versions = if package_names_to_update.contains(name) { + repo.list_package_versions(name).await? + } else { + self.list_package_versions(name).await? + }; let pkg_info = packages.entry(name.clone()).or_default(); for version in package_versions.iter() { pkg_info.versions.push((**version).clone()); - let ver_info = pkg_info .version_builds .entry((**version).clone()) @@ -471,9 +474,9 @@ impl FlatBufferRepoIndex { let version_ident = VersionIdent::new(name.clone(), (**version).clone()); - // Check if this is the package and versions we want - // to update. An empty list of versions to update at - // this point means update all the package's versions. + // Check if this is the package and version we want to + // update. An empty list of versions to update means + // update all the package's versions. if package_names_to_update.contains(name) && let v2u = versions_to_update .get(name) From 2acaeeaaef09c35b39aa89f34ebaecfec58b24fb Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Fri, 17 Apr 2026 11:58:56 -0700 Subject: [PATCH 14/18] Changes index updates to use OptVersionIdent Signed-off-by: David Gilligan-Cook --- crates/spk-cli/cmd-repo/src/cmd_repo.rs | 6 +++--- crates/spk-storage/src/storage/flatbuffer_index.rs | 14 ++++++-------- crates/spk-storage/src/storage/repository_index.rs | 4 ++-- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/crates/spk-cli/cmd-repo/src/cmd_repo.rs b/crates/spk-cli/cmd-repo/src/cmd_repo.rs index 615d52187b..29bcedbdd4 100644 --- a/crates/spk-cli/cmd-repo/src/cmd_repo.rs +++ b/crates/spk-cli/cmd-repo/src/cmd_repo.rs @@ -9,7 +9,7 @@ use clap::{Args, Subcommand}; use itertools::Itertools; use miette::{Context, Result}; use spk_cli_common::{CommandArgs, Run, flags}; -use spk_schema::VersionIdent; +use spk_schema::ident::OptVersionIdent; use spk_storage::{self as storage, FlatBufferRepoIndex, RepositoryHandle, RepositoryIndexMut}; use storage::Repository; @@ -105,9 +105,9 @@ impl RepoCommand { if !update.is_empty() { // Update the existing index for the given package/version let start = Instant::now(); - let idents: Vec = update + let idents: Vec = update .iter() - .filter_map(|pv| match VersionIdent::from_str(pv) { + .filter_map(|pv| match OptVersionIdent::from_str(pv) { Ok(i) => Some(i), Err(err) => { tracing::warn!( diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index 95840d44d2..b7b4de6892 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -16,7 +16,7 @@ use itertools::Itertools; use spk_schema::foundation::ident_component::Component; use spk_schema::foundation::name::{PkgName, PkgNameBuf}; use spk_schema::foundation::version::Version; -use spk_schema::ident::VersionIdent; +use spk_schema::ident::{OptVersionIdent, VersionIdent}; use spk_schema::name::{OptNameBuf, RepositoryName}; use spk_schema::prelude::Versioned; use spk_schema::{ @@ -401,7 +401,7 @@ impl FlatBufferRepoIndex { async fn gather_updates_from_repo( &self, repo: &crate::RepositoryHandle, - package_versions: &[VersionIdent], + package_versions: &[OptVersionIdent], ) -> miette::Result<(HashMap, GlobalVarsInfo)> { let start = Instant::now(); @@ -413,16 +413,14 @@ impl FlatBufferRepoIndex { // The 0.0.0 version number, which is the default and is used // if no version number is found when parsing a string into // VersionIdent. - let all_versions_version = Version::default(); - let mut versions_to_update: HashMap> = HashMap::new(); for package_version in package_versions { let entry = versions_to_update .entry(package_version.name().to_owned()) .or_default(); - if *(package_version.version()) != all_versions_version { + if let Some(version) = package_version.target() { tracing::info!("adding to versions: {package_version}"); - entry.insert(package_version.version().clone()); + entry.insert(version.clone()); } } @@ -480,7 +478,7 @@ impl FlatBufferRepoIndex { if package_names_to_update.contains(name) && let v2u = versions_to_update .get(name) - .expect("a package to update should have a version set, even an empty one") + .expect("a package to update should have a versions set, even an empty one") && (v2u.is_empty() || v2u.contains(version)) { tracing::info!("Reached version {version} of the {name} package to update"); @@ -1056,7 +1054,7 @@ impl RepositoryIndexMut for FlatBufferRepoIndex { async fn update_packages( &self, repo: &crate::RepositoryHandle, - package_versions: &[VersionIdent], + package_versions: &[OptVersionIdent], ) -> miette::Result<()> { let (packages, global_vars) = self .gather_updates_from_repo(repo, package_versions) diff --git a/crates/spk-storage/src/storage/repository_index.rs b/crates/spk-storage/src/storage/repository_index.rs index 770abbd507..a7d02318d8 100644 --- a/crates/spk-storage/src/storage/repository_index.rs +++ b/crates/spk-storage/src/storage/repository_index.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use spk_schema::foundation::ident_component::Component; use spk_schema::foundation::name::{PkgName, PkgNameBuf}; use spk_schema::foundation::version::Version; -use spk_schema::ident::VersionIdent; +use spk_schema::ident::{OptVersionIdent, VersionIdent}; use spk_schema::name::OptNameBuf; use spk_schema::{BuildIdent, Spec}; @@ -51,7 +51,7 @@ pub trait RepositoryIndexMut { async fn update_packages( &self, repo: &crate::RepositoryHandle, - package_versions: &[VersionIdent], + package_versions: &[OptVersionIdent], ) -> miette::Result<()>; } From ea21e03a016c0fe70ff008dd72cc20103ad71ec3 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Fri, 17 Apr 2026 15:51:19 -0700 Subject: [PATCH 15/18] Cleans the index update variables and comments Signed-off-by: David Gilligan-Cook --- .../src/storage/flatbuffer_index.rs | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index b7b4de6892..8e89d4cdf1 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -405,14 +405,9 @@ impl FlatBufferRepoIndex { ) -> miette::Result<(HashMap, GlobalVarsInfo)> { let start = Instant::now(); - let package_names_to_update: HashSet = package_versions - .iter() - .map(|package_version| package_version.name().to_owned()) - .collect(); - - // The 0.0.0 version number, which is the default and is used - // if no version number is found when parsing a string into - // VersionIdent. + // Gather the version sets for each package to update. A set + // can be empty, and that indicates all the versions of that + // package should be updated. let mut versions_to_update: HashMap> = HashMap::new(); for package_version in package_versions { let entry = versions_to_update @@ -424,19 +419,16 @@ impl FlatBufferRepoIndex { } } - let mut packages: HashMap = HashMap::new(); - - let mut num_versions = 0; - let mut num_builds = 0; - - // Gather the existing global vars. Any new ones from the updated - // package version will be added when its builds are processed. - let mut global_vars = GlobalVarsInfo(self.get_global_var_values()); - - // Used to work out the order of processing and where to pull - // in the updates from. + // Get the packages from the index and add the names of + // packages to update. These are used to process all the + // packages later and to work out where to get the data from. let mut package_names = self.list_packages().await?; + let package_names_to_update: HashSet = package_versions + .iter() + .map(|package_version| package_version.name().to_owned()) + .collect(); + // Check update package name and make sure it is in the names list. for package_name_to_update in &package_names_to_update { if !package_names.contains(package_name_to_update) { @@ -445,9 +437,18 @@ impl FlatBufferRepoIndex { } } - // Now all the names are present, make a set to help with global variables + // Now all the names are present, make a set to help with + // global variables later. let package_names_set = HashSet::from_iter(package_names.clone()); + // Gather the existing global vars. Any new ones from the updated + // package version will be added when its builds are processed. + let mut global_vars = GlobalVarsInfo(self.get_global_var_values()); + + let mut packages: HashMap = HashMap::new(); + let mut num_versions = 0; + let mut num_builds = 0; + // Process the packages, checking for the ones to update and // pulling from the correct data source for each. for name in &package_names { @@ -468,6 +469,7 @@ impl FlatBufferRepoIndex { .version_builds .entry((**version).clone()) .or_default(); + num_versions += 1; let version_ident = VersionIdent::new(name.clone(), (**version).clone()); @@ -481,8 +483,8 @@ impl FlatBufferRepoIndex { .expect("a package to update should have a versions set, even an empty one") && (v2u.is_empty() || v2u.contains(version)) { - tracing::info!("Reached version {version} of the {name} package to update"); // Get the updated data from the repo + tracing::info!("Reached version {version} of the {name} package to update"); let version_builds = match repo.list_package_builds(&version_ident).await { Ok(builds) => builds, Err(err) => { @@ -524,12 +526,12 @@ impl FlatBufferRepoIndex { global_vars.extract_global_vars(&build_spec, &package_names_set)?; } } else { + // Use what's already in the flatbuffer index if package_names_to_update.contains(name) { tracing::debug!( "Using indexed version {version} of the {name} package to update" ); } - // Use what's there in the flatbuffer index let version_builds = self.list_package_builds(&version_ident).await?; for build_ident in version_builds { From dddc9ea74cce1e70cf35f1b0efa2bc40f44a85f7 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Mon, 20 Apr 2026 13:08:18 -0700 Subject: [PATCH 16/18] Updates index/repository configuration file docs Signed-off-by: David Gilligan-Cook --- crates/spk-solve/src/solvers/solver_test.rs | 1 - crates/spk-storage/src/error.rs | 2 +- .../src/storage/flatbuffer_index.rs | 2 +- docs/admin/config.md | 23 ++++++++++++------- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/crates/spk-solve/src/solvers/solver_test.rs b/crates/spk-solve/src/solvers/solver_test.rs index 12f77907be..0be69a1ba7 100644 --- a/crates/spk-solve/src/solvers/solver_test.rs +++ b/crates/spk-solve/src/solvers/solver_test.rs @@ -2430,7 +2430,6 @@ async fn test_solver_option_compat_intersection( #[rstest] #[case::step(step_solver())] -// #[case::resolvo(resolvo_solver())] #[tokio::test] async fn test_solver_components( #[case] mut solver: SolverImpl, diff --git a/crates/spk-storage/src/error.rs b/crates/spk-storage/src/error.rs index d41d00875d..e2f464b423 100644 --- a/crates/spk-storage/src/error.rs +++ b/crates/spk-storage/src/error.rs @@ -75,7 +75,7 @@ pub enum Error { IndexOpenError(#[source] std::io::Error), #[error("Unable to memory map flatbuffer index from repo file: {0}")] IndexMemMapError(#[source] std::io::Error), - #[error("Unable to write '{0}' repo's index, '{1}': {2}")] + #[error("Unable to write '{0}' repo's index file, '{1}': {2}")] IndexWriteError(String, String, #[source] std::io::Error), #[error( "Cannot generate an index from this repo: It is not a spk MemoryRepository or SpfsRepository" diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index 8e89d4cdf1..9995f75a21 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -633,7 +633,7 @@ impl FlatBufferRepoIndex { index_path.push(base_path); index_path.push(SPK_INDEX_SUB_DIR_NAME); - spfs::runtime::makedirs_with_perms(&index_path, 0o777); + spfs::runtime::makedirs_with_perms(&index_path, 0o777).map_err(|source| Error::String(format!("Unable to make {SPK_INDEX_SUB_DIR_NAME} sub-dir in '{}'s index directory: {source}", repo.name())))?; // Index file name contains the index schema version for ease // of identifying a compatible index. The index version is diff --git a/docs/admin/config.md b/docs/admin/config.md index beadd15376..e18b7788eb 100644 --- a/docs/admin/config.md +++ b/docs/admin/config.md @@ -302,13 +302,20 @@ host_filtering = false compat_rule = "x.ab" # SPK supports using pre-generated repository indexes to speed up solves. -use_indexes = false - -# The kind of repository index format. SPK supports a flatbuffer based index. -# index_kind = "flatb" - -# SPK supports validating index data before using it. -# This can be disabled, but validating is safer even though it can add -# some overhead at the start of a solve, when using an index. +# The index must be created separately. If the index does not exist for a +# repository SPK will continue to solve without it. +# +# Index use can be enabled for each named repository. For example, +# for the 'origin' repository. +[repositories.origin] +use_index = true +# Once enabled, the index settings can be configured for each named repository +[repositories.origin.index] +# SPK supports validating index data before using it. This can be disabled, +# but validating is safer even though it can add some overhead to a solve +# when using an index. verify_index_before_use = true +# The kind of index format to use for this repository. SPK currently supports +# a flatbuffer based index. +# index_kind = "flatb" ``` From f7a72889933e7c51e57dd54183f74e410666e073 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Fri, 8 May 2026 16:01:24 -0700 Subject: [PATCH 17/18] Updates docs, comments, change DISABLED_USE_INDEX to a mutex, and empty version set handling. Signed-off-by: David Gilligan-Cook --- crates/spk-cli/cmd-repo/src/cmd_repo.rs | 7 +- crates/spk-cli/common/src/flags.rs | 31 ++--- .../src/storage/flatbuffer_index.rs | 16 ++- docs/admin/config.md | 2 +- docs/ref/indexes.md | 114 +++++++++--------- 5 files changed, 93 insertions(+), 77 deletions(-) diff --git a/crates/spk-cli/cmd-repo/src/cmd_repo.rs b/crates/spk-cli/cmd-repo/src/cmd_repo.rs index 29bcedbdd4..4de3e3550a 100644 --- a/crates/spk-cli/cmd-repo/src/cmd_repo.rs +++ b/crates/spk-cli/cmd-repo/src/cmd_repo.rs @@ -88,7 +88,7 @@ impl RepoCommand { // spk repo index ... Self::Index { repo, update } => { - // Generate or update an index a repo. The repo must + // Generate or update an index in a repo. The repo must // be the underlying repo and not an indexed repo. So as // a safety measure, this disables index use for this // command regardless of config or command line flags. @@ -141,7 +141,7 @@ impl RepoCommand { .update_packages(&repo_to_index, &idents) .await? } - Err(err) => { + Err(storage::Error::IndexOpenError(err)) => { // There isn't an existing index, so generate one from scratch that // will also include the update package version. tracing::warn!("Failed to load flatbuffer index: {err}"); @@ -150,6 +150,9 @@ impl RepoCommand { was_full_index = " [no previous index, so a full index was created]".to_string() } + Err(err) => { + return Err(err.into()); + } }; tracing::info!( diff --git a/crates/spk-cli/common/src/flags.rs b/crates/spk-cli/common/src/flags.rs index 9b6d6ef5bd..9dc506d0ac 100644 --- a/crates/spk-cli/common/src/flags.rs +++ b/crates/spk-cli/common/src/flags.rs @@ -6,8 +6,7 @@ mod variant; use std::collections::HashSet; use std::convert::From; -use std::sync::Arc; -use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex}; use clap::{Args, ValueEnum, ValueHint}; use miette::{Context, IntoDiagnostic, Result, bail, miette}; @@ -64,10 +63,13 @@ static SPK_SOLVER_OUTPUT_TO_DIR: &str = "SPK_SOLVER_OUTPUT_TO_DIR"; static SPK_SOLVER_OUTPUT_TO_DIR_MIN_VERBOSITY: &str = "SPK_SOLVER_OUTPUT_TO_DIR_MIN_VERBOSITY"; static SPK_SOLVER_OUTPUT_FILE_PREFIX: &str = "SPK_SOLVER_OUTPUT_FILE_PREFIX"; -static DISABLE_INDEX_USE: Lazy = Lazy::new(|| AtomicBool::new(false)); +static DISABLE_INDEX_USE: Lazy> = Lazy::new(|| Mutex::new(false)); pub fn disable_index_use() { - DISABLE_INDEX_USE.store(true, std::sync::atomic::Ordering::Release); + let mut lock = DISABLE_INDEX_USE + .lock() + .expect("Should be able to get DISABLE_INDEX_USE value to update it"); + *lock = true; } #[derive(Args, Clone)] @@ -1006,17 +1008,16 @@ where Ok((found, configured.template.file_path().to_owned())) } -/// The index use command line setting options that can be used to -/// override index usage set in the spk config file. The default for a -/// repository in the spk config is to use an index if one exists, -/// unless the repository is called 'local'. The setting for an -/// individual repository can be changed in the config file, or by -/// using the matching environment variables. +/// The command line options that can be used to override the index +/// usage configured in the spk config file. The default is to use an +/// index if one exists, unless the repository is called 'local'. The +/// setting for an individual repository can be changed in the config +/// file, or by using the matching environment variables. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] pub enum IndexUse { - /// Use the index use settings from the repository configurations - /// in the spk config file (or their environment variable - /// overrides, if any are set). + /// Use the settings from the repository configurations in the spk + /// config file (or their environment variable overrides, if any + /// are set). ConfigFile, /// Disable all index use globally regardless of any setting in /// the spk config file (or environment variables). @@ -1193,7 +1194,9 @@ impl Repositories { // Check whether using the indexes for the repos is globally // disabled by the spk command, such as 'spk repo index' or // 'spk info'. - let disable_all_index_use = DISABLE_INDEX_USE.load(std::sync::atomic::Ordering::Relaxed); + let disable_all_index_use = *DISABLE_INDEX_USE + .lock() + .expect("Should be able to get a lock on DISABLE_INDEX_USE setting"); // Check the override from the command line flag, if any let use_index_cli_override = match self.index_use { diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index 9995f75a21..3a6cb54415 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -410,9 +410,16 @@ impl FlatBufferRepoIndex { // package should be updated. let mut versions_to_update: HashMap> = HashMap::new(); for package_version in package_versions { + // This sets the entry to an empty set if the + // package_version is really just a package without a + // target version number. let entry = versions_to_update .entry(package_version.name().to_owned()) .or_default(); + + // A package_version with a version number (target) will + // have that version number added to the existing set of + // versions. if let Some(version) = package_version.target() { tracing::info!("adding to versions: {package_version}"); entry.insert(version.clone()); @@ -449,6 +456,8 @@ impl FlatBufferRepoIndex { let mut num_versions = 0; let mut num_builds = 0; + let empty_set = HashSet::new(); + // Process the packages, checking for the ones to update and // pulling from the correct data source for each. for name in &package_names { @@ -478,9 +487,10 @@ impl FlatBufferRepoIndex { // update. An empty list of versions to update means // update all the package's versions. if package_names_to_update.contains(name) - && let v2u = versions_to_update - .get(name) - .expect("a package to update should have a versions set, even an empty one") + && let v2u = match versions_to_update.get(name) { + Some(version_set) => version_set, + None => &empty_set, + } && (v2u.is_empty() || v2u.contains(version)) { // Get the updated data from the repo diff --git a/docs/admin/config.md b/docs/admin/config.md index e18b7788eb..37ce9e7382 100644 --- a/docs/admin/config.md +++ b/docs/admin/config.md @@ -317,5 +317,5 @@ use_index = true verify_index_before_use = true # The kind of index format to use for this repository. SPK currently supports # a flatbuffer based index. -# index_kind = "flatb" +index_kind = "flatb" ``` diff --git a/docs/ref/indexes.md b/docs/ref/indexes.md index c862f4c381..a504393a19 100644 --- a/docs/ref/indexes.md +++ b/docs/ref/indexes.md @@ -4,9 +4,9 @@ summary: Indexes for improving solve times weight: 120 --- -This explains indexes, indexing, index controls, and index requirements in `spk`. +This explains indexes, indexing, index controls, and index requirements in `SPK`. -## Spk Repository Indexes +## SPK Repository Indexes `SPK` supports generating a packages index for a repository to help with solves. An index must have been generated separately for a @@ -17,30 +17,30 @@ The index is designed to help the solvers with solves (package look-ups, deprecation checks, pre-loading non-package variable references, getting install requirements, etc.). It does not contain the full package data. So it does not have the information needed to -help other `spk` operations, e.g. building or testing a package. +help other `SPK` operations, e.g. building or testing a package. If indexing is enabled, you have to generate an index before trying to use it in a solve. They are not generated on the fly (outside of tiny repositories for automated tests). If index use is enabled, but no index has been generated for a -repository, `spk` will fallback to using the underlying repository's +repository, `SPK` will fallback to using the underlying repository's packages directly. This typically results in slower solves, particularly with larger repositories. If a solve uses multiple repositories and indexes exist for some or -all of them, `spk` will use the indexes that exist. +all of them, `SPK` will use the indexes that exist. ### Enabling/Disabling Indexes -`spk` index use can be enabled or disabled in the `spk` config file. +`SPK` index use can be enabled or disabled in the `SPK` config file. It is enabled by default but require index generation and index updating to be set up, on a per repository basis, before it will have an impact. Setting this up is recommended for repositories with a large number of packages. -Most `spk` commands can use the `--index-use ` option to override +Most `SPK` commands can use the `--index-use ` option to override repository index use on a per command basis. The `spk repo index ...` command always has index use disabled because it operates on the indexes themselves. `spk info ...` also disables indexes because @@ -70,12 +70,12 @@ You do not have to generate a full index every time a package changes, you can update a package in an existing index, see the next section. -### Updating an existing Index +### Updating an Existing Index Updating a package in an existing index, such as after a new build is published or a package is deprecated, is not automatic. -A site using `spk` has to set up a system to trigger index updates +A site using `SPK` has to set up a system to trigger index updates when they want them to happen. The recommendation is after a new package is published, deprecated, undeprecated, or deleted. But regular periodic complete index generation may also work for a site, @@ -89,22 +89,22 @@ The `--update` option can be given multiple times to update several packages at a once, e.g.: `spk repo index -r origin --update python --update zlib` -The `--update` option take a package/version as well. This lets the +The `--update` option takes a package/version as well. This lets the update be restricted to a specific version of a package. This can make -for shorter update times for packages with large numbers of versions, +for shorter update times for packages with a large number of versions, or builds per version, e.g.: `spk repo index -r origin --update python/3.10.8 --update zlib/1.2.12` Those commands will read in the existing index for the repository and update the versions and builds of the named package in the index. It is faster than generating an index from scratch. It has to be run once -per repository to update the given package or packages in that +per repository to update the given package or packages in each repository's index. See `spk repo index -h` for more details. -## Index vs Repository mismatches - updates are important +## Index vs Repository Mismatches - Updates are Important When index use is enabled, it is important to update the indexes when changes happen to the repository and its packages. Otherwise, the @@ -115,15 +115,15 @@ can lead to discrepancies in solves. ## Index Details -### Index file location +### Index File Location An index is stored in a file inside the repository it indexes. Only -spk filesystem repositories (based on spfs fs repositories) support +SPK filesystem repositories (based on spfs fs repositories) support indexes. The file will be kept in the `index/spk/` sub-directory of an spfs fs repository. -### Structure and types in SPK +### Structure and Types in SPK An index is implemented by a set of types in the spk-storage and spk-schema crates that work together to wrap a repository with its @@ -149,15 +149,15 @@ in the 'spk-schema' crate: -### Flatbuffer index schema +### Flatbuffer Index Schema -`spk` uses flatbuffers for the index data format on disk. This is fast +`SPK` uses flatbuffers for the index data format on disk. This is fast to read and use, but slow to generate. Updates to an existing index require an index to be read in completely and written out again with the updated data. It does not support updating complex structures in-place. -The `spk-proto` crate contains spk's flatbuffers schema for an +The `spk-proto` crate contains SPK's flatbuffers schema for an index. It requires `flatc` to be installed to generate the rust code for the index schema. @@ -193,74 +193,74 @@ including which fields of rust objects are being ignored, omitted from an index. -### How to evolve the index schema +### How to Evolve the Index Schema The flatbuffers data format supports adding new fields and structures without breaking existing code, provided fields are not deleted. New fields should be added only to the ends of tables. -The index schema's version number lets `spk` check that the index data -is compatible with the current spk being run. That is, that the code +The index schema's version number lets `SPK` check that the index data +is compatible with the current `SPK` being run. That is, that the code understands this version of the schema. There will be situations where -multiple versions of spk are being used in a site and they may not all -be able to use index data generated by other versions of spk. +multiple versions of `SPK` are being used in a site and they may not all +be able to use index data generated by other versions of `SPK`. -Spk includes the schema version number in the index file name to allow +`SPK` includes the schema version number in the index file name to allow multiple index schemas to co-exist during a transition between index versions. -#### Adding a new field +#### Adding a New Field When a new field needs to be added to the index schema, the developer should: - Increment the schema version number - Add the new field to the schema -- Increment spk's compatible schema version number -- Add spk code to read and write the new field -- Make a new release of spk -- Generate a new index using the new spk build -- Transition the site to the new spk release +- Increment `SPK`'s compatible schema version number +- Add SPK code to read and write the new field +- Make a new release of SPK +- Generate a new index using the new SPK +- Transition the site to the new SPK release - Once the transition is complete, retire the older index data file and generation -During the transition the older version of spk will use the older -index format(s) and the newer spk will use the newer one. Both indexes +During the transition the older version of SPK will use the older +index format(s) and the newer SPK will use the newer one. Both indexes will have to be generated and kept up to date during the transition. -Once the older spk version is fully retired, the older index data can +Once the older SPK version is fully retired, the older index data can be retired too. -#### Stop reading an existing field +#### Stop Reading an Existing Field When an older field does not need to be read anymore, the developer should: -- Add spk code to stop reading the old field -- Make a new release of spk that no longer reads the field -- Transition the site to the new spk release +- Add SPK code to stop reading the old field +- Make a new release of SPK that no longer reads the field +- Transition the site to the new SPK release There is no need to change the index schema version for to stop reading a field, and index generation remains unchanged (even though it -includes data that is not read by spk anymore). +includes data that is not read by SPK anymore). -#### Stop populating an existing field +#### Stop Populating an Existing Field If an older field no longer needs to be populated, the developer should: - Check the field is no longer read (see above). If it is still being read then continuing may cause unexpected results. - Increment the schema version number -- Increment spk's compatible schema version number -- Add spk code to write None (or the default enum entry depending on the field type) into the index -- Make a new release of spk that contains this change -- Transition the site to the new spk release +- Increment SPK's compatible schema version number +- Add SPK code to write None (or the default enum entry depending on the field type) into the index +- Make a new release of SPK that contains this change +- Transition the site to the new SPK release Technically, the field will still be present in the index schema, and be set in index generation because it is needed in the index's object constructors. But the None values will reduce the space the field takes up. And while this kind of change in backwards compatible, the -empty field may be treated a default value by older versions of spk +empty field may be treated a default value by older versions of SPK when read from the index - thus the need to version up the index schema to ensure this does not cause unexpected behaviour when -multiple versions of spk are in use. +multiple versions of SPK are in use. -#### Removing an old field +#### Removing an Old Field Removing an old field will break backwards compatibility of the flatbuffers format. It is not something that should be done lightly. @@ -268,19 +268,19 @@ flatbuffers format. It is not something that should be done lightly. When a new field needs to be removed from the index schema, the developer should: - Double check the reason for removing the field, it may not be worth the trouble. This is a 2 stage process: - Stage 1: - - Add spk code to stop reading the old field - - Make a new release of spk that no longer reads the field - - Transition the site to the new spk release + - Add SPK code to stop reading the old field + - Make a new release of SPK that no longer reads the field + - Transition the site to the new SPK release - Stage 2: - Increment the schema version number - - Increment spk's compatible schema version number - - Add spk code to stop writing the old field at all - - Make another new release of spk that no longer writes the field - - Transition the site to the new spk release + - Increment SPK's compatible schema version number + - Add SPK code to stop writing the old field at all + - Make another new release of SPK that no longer writes the field + - Transition the site to the new SPK release - Once that transition is complete, retire the older index data file and generation process -During the transition the older version of spk will use the older -index format(s) and the newer spk will use the newer one. Both indexes +During the transition the older version of SPK will use the older +index format(s) and the newer SPK will use the newer one. Both indexes will have to be generated and kept up to date during the transition. -Once the older spk version is fully retired, the older index data and +Once the older SPK version is fully retired, the older index data and its generation can be retired too. From 77486abc842abaeaf9ec4729852a14c98282adc3 Mon Sep 17 00:00:00 2001 From: David Gilligan-Cook Date: Fri, 8 May 2026 16:44:28 -0700 Subject: [PATCH 18/18] Swaps match and empty set out for is_some_and in package version to update check. Signed-off-by: David Gilligan-Cook --- crates/spk-storage/src/storage/flatbuffer_index.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/crates/spk-storage/src/storage/flatbuffer_index.rs b/crates/spk-storage/src/storage/flatbuffer_index.rs index 3a6cb54415..5bb3597958 100644 --- a/crates/spk-storage/src/storage/flatbuffer_index.rs +++ b/crates/spk-storage/src/storage/flatbuffer_index.rs @@ -456,8 +456,6 @@ impl FlatBufferRepoIndex { let mut num_versions = 0; let mut num_builds = 0; - let empty_set = HashSet::new(); - // Process the packages, checking for the ones to update and // pulling from the correct data source for each. for name in &package_names { @@ -487,11 +485,9 @@ impl FlatBufferRepoIndex { // update. An empty list of versions to update means // update all the package's versions. if package_names_to_update.contains(name) - && let v2u = match versions_to_update.get(name) { - Some(version_set) => version_set, - None => &empty_set, - } - && (v2u.is_empty() || v2u.contains(version)) + && versions_to_update + .get(name) + .is_some_and(|v2u| v2u.is_empty() || v2u.contains(version)) { // Get the updated data from the repo tracing::info!("Reached version {version} of the {name} package to update");