diff --git a/src/Cargo.lock b/src/Cargo.lock index c19d692..f2c8d0f 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1685,7 +1685,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -2030,6 +2030,7 @@ dependencies = [ "bitflags", "const_format", "criterion", + "dirs", "globset", "grep-regex", "grep-searcher", @@ -2049,6 +2050,8 @@ dependencies = [ "serde", "serde_json", "serial_test", + "shellexpand", + "soft-canonicalize", "temp-env", "tempfile", "thiserror 2.0.18", @@ -2450,6 +2453,12 @@ dependencies = [ "syn", ] +[[package]] +name = "proc-canonicalize" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6127461ef34c8119591fc98e4046230f4f953bec84aaebfe326e3d82e8f383da" + [[package]] name = "proc-macro-crate" version = "3.5.0" @@ -2494,7 +2503,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls 0.23.36", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -2532,7 +2541,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -3419,6 +3428,15 @@ dependencies = [ "digest", ] +[[package]] +name = "shellexpand" +version = "3.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32824fab5e16e6c4d86dc1ba84489390419a39f97699852b66480bb87d297ed8" +dependencies = [ + "dirs", +] + [[package]] name = "shlex" version = "1.3.0" @@ -3473,6 +3491,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "soft-canonicalize" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaf99d1bbb279dfc59a8642f42fa4b6137935e506c97d1e2e57d282f417f163d" +dependencies = [ + "proc-canonicalize", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml index f499217..e3b717c 100644 --- a/src/llm-coding-tools-core/Cargo.toml +++ b/src/llm-coding-tools-core/Cargo.toml @@ -44,6 +44,12 @@ serde_json = "1.0" # Zero overhead compile time bitflag generation bitflags = "2.11.0" +# Shell-like expansion for home directory paths (~/ and $HOME/) +shellexpand = "3.1.2" + +# Cross-platform canonicalization for non-existent paths +soft-canonicalize = "0.5.5" + # Fast binary serialization for catalog cache types bitcode = "0.6.9" @@ -102,6 +108,7 @@ llm-coding-tools-bubblewrap = { version = "0.1.0", path = "../llm-coding-tools-b [dev-dependencies] serial_test = "3" tempfile = "3.27" +dirs = "6" # For tests (async and blocking with wiremock) tokio = { version = "1.51", features = ["rt-multi-thread", "macros"] } wiremock = "0.6" diff --git a/src/llm-coding-tools-core/README.md b/src/llm-coding-tools-core/README.md index 63a8dd9..09c2fe4 100644 --- a/src/llm-coding-tools-core/README.md +++ b/src/llm-coding-tools-core/README.md @@ -69,10 +69,14 @@ Path-based tools are generic over [`PathResolver`], so wrappers can choose unres - [`AbsolutePathResolver`] enforces absolute-path inputs (unrestricted mode). - [`AllowedPathResolver`] constrains operations to configured directories (sandbox mode). +- [`AllowedGlobResolver`] constrains to directories with glob pattern filtering (fine-grained sandbox mode). - Failed resolution rejects traversal and out-of-sandbox paths before tool execution. ```rust,no_run -use llm_coding_tools_core::{AbsolutePathResolver, AllowedPathResolver, PathResolver, ToolResult}; +use llm_coding_tools_core::{ + path::{AllowedGlobResolver, GlobPolicy, RuleAction}, + AbsolutePathResolver, AllowedPathResolver, PathResolver, ToolResult, +}; fn demo() -> ToolResult<()> { // Unrestricted mode: any absolute path is allowed. @@ -82,6 +86,19 @@ fn demo() -> ToolResult<()> { // Sandboxed mode: only configured directories are allowed. let sandbox = AllowedPathResolver::new(["/workspace/project", "/tmp"])?; let _lib = sandbox.resolve("src/lib.rs")?; + + // Fine-grained sandbox (last-match-wins). + let glob = AllowedGlobResolver::new(["/workspace/project"])? + .with_policy( + GlobPolicy::builder() + .add("src/**", RuleAction::Allow)? // Matches src/lib.rs + .add("*.rs", RuleAction::Allow)? // Also matches src/lib.rs + .add("target/**", RuleAction::Deny)? // Blocks target/ even if *.rs matches + .build()? + ); + let _lib = glob.resolve("src/lib.rs")?; + let _main = glob.resolve("main.rs")?; + // glob.resolve("target/debug/app")?; // Denied Ok(()) } ``` @@ -313,6 +330,7 @@ let key = resolver.resolve("OPENAI_API_KEY"); [`ToolContext`]: crate::context::ToolContext [`PathResolver`]: crate::PathResolver [`AbsolutePathResolver`]: crate::AbsolutePathResolver +[`AllowedGlobResolver`]: crate::path::AllowedGlobResolver [`AllowedPathResolver`]: crate::AllowedPathResolver [`permissions`]: crate::permissions [`Rule`]: crate::permissions::Rule diff --git a/src/llm-coding-tools-core/src/lib.rs b/src/llm-coding-tools-core/src/lib.rs index 8459858..49c6d3d 100644 --- a/src/llm-coding-tools-core/src/lib.rs +++ b/src/llm-coding-tools-core/src/lib.rs @@ -26,7 +26,7 @@ pub use context::ToolContext; pub use credentials::{CredentialLookup, CredentialResolver}; pub use error::{ToolError, ToolResult}; pub use output::ToolOutput; -pub use path::{AbsolutePathResolver, AllowedPathResolver, PathResolver}; +pub use path::{AbsolutePathResolver, AllowedGlobResolver, AllowedPathResolver, PathResolver}; pub use system_prompt::SystemPromptBuilder; // Re-export tools (always available, sync or async based on runtime feature) diff --git a/src/llm-coding-tools-core/src/path/allowed.rs b/src/llm-coding-tools-core/src/path/allowed.rs index 395ffb9..f3430e2 100644 --- a/src/llm-coding-tools-core/src/path/allowed.rs +++ b/src/llm-coding-tools-core/src/path/allowed.rs @@ -3,6 +3,7 @@ use super::PathResolver; use crate::context::PathMode; use crate::error::{ToolError, ToolResult}; +use soft_canonicalize::soft_canonicalize; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -37,17 +38,24 @@ pub struct AllowedPathResolver { impl AllowedPathResolver { /// Creates a new resolver with the given allowed directories. /// - /// Each directory is canonicalized during construction to ensure - /// consistent path comparison. Returns an error if any directory - /// doesn't exist or can't be canonicalized. + /// Each directory is resolved during construction to ensure consistent path + /// comparison. Returns an error if any directory doesn't exist or can't be + /// resolved. pub fn new(allowed_paths: impl IntoIterator>) -> ToolResult { let canonicalized: Result, _> = allowed_paths .into_iter() .map(|p| { let path = p.as_ref(); - path.canonicalize().map_err(|e| { + if !path.is_dir() { + return Err(ToolError::InvalidPath(format!( + "failed to resolve allowed path '{}': path is not an existing directory", + path.display() + ))); + } + + soft_canonicalize(path).map_err(|e| { ToolError::InvalidPath(format!( - "failed to canonicalize allowed path '{}': {}", + "failed to resolve allowed path '{}': {}", path.display(), e )) @@ -60,10 +68,15 @@ impl AllowedPathResolver { }) } - /// Creates a resolver from already-canonicalized paths. + /// Creates a resolver from already-canonicalized paths, skipping + /// filesystem validation. + /// + /// A canonical path is absolute, with all symlinks resolved and all + /// `.` and `..` components normalized. Use [`std::fs::canonicalize`] or + /// [`std::path::Path::canonicalize`] to canonicalize paths. /// - /// Use this when paths are known to be valid and canonicalized, - /// skipping the filesystem check. + /// Use this when paths are known to be valid and canonicalized, skipping + /// the filesystem check. /// /// # Safety /// @@ -104,16 +117,11 @@ impl PathResolver for AllowedPathResolver { continue; } - // For non-existent paths (write operations), validate parent - if let Some(parent) = candidate.parent() { - if let Ok(canonical_parent) = parent.canonicalize() { - if canonical_parent.starts_with(base) { - // Parent is valid, construct the final path - let file_name = candidate.file_name().ok_or_else(|| { - ToolError::InvalidPath("path has no file name".into()) - })?; - return Ok(canonical_parent.join(file_name)); - } + // Non-existent paths still need a resolved absolute target so we can + // validate containment consistently across platforms. + if let Ok(resolved) = soft_canonicalize(&candidate) { + if resolved.starts_with(base) { + return Ok(resolved); } } } @@ -147,6 +155,7 @@ mod tests { #[case::nested_existing_file("subdir/nested.txt", "nested.txt")] // exists: created by setup_test_dir() #[case::new_file_in_root("new_file.txt", "new_file.txt")] // does NOT exist: tests write path resolution #[case::new_file_in_subdir("subdir/new_file.txt", "new_file.txt")] // does NOT exist: tests write path resolution + #[case::new_file_in_missing_directories("new_dir/nested/new_file.txt", "new_file.txt")] fn resolves_valid_paths_successfully( #[case] input_path: &str, #[case] expected_filename: &str, @@ -167,6 +176,7 @@ mod tests { #[rstest] #[case::parent_traversal("../../../etc/passwd")] #[case::nested_parent_traversal("subdir/../../../new_file.txt")] + #[case::missing_dir_parent_traversal("new_dir/../../new_file.txt")] fn rejects_paths_that_escape_allowed_directory(#[case] input_path: &str) { let dir = setup_test_dir(); let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); @@ -179,6 +189,16 @@ mod tests { ); } + #[test] + fn resolves_existing_file_through_missing_directory_parent_traversal() { + let dir = setup_test_dir(); + let resolver = AllowedPathResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("subdir/new_dir/../../file.txt"); + assert!(result.is_ok()); + assert!(result.unwrap().ends_with("file.txt")); + } + #[test] fn tries_multiple_allowed_paths() { let dir1 = setup_test_dir(); diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs new file mode 100644 index 0000000..36300a1 --- /dev/null +++ b/src/llm-coding-tools-core/src/path/allowed_glob/mod.rs @@ -0,0 +1,464 @@ +//! Glob-aware allowed directory path resolver implementation. +//! +//! Provides [`AllowedGlobResolver`] which restricts path access to allowed +//! directories with glob pattern filtering. + +mod normalize; +mod policy; + +use super::PathResolver; +use crate::context::PathMode; +use crate::error::{ToolError, ToolResult}; +use normalize::expand_shell; +use soft_canonicalize::soft_canonicalize; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +pub use policy::{GlobPolicy, GlobPolicyBuilder, RuleAction}; + +/// Path resolver that restricts access to allowed directories with glob pattern filtering. +/// +/// Resolves paths relative to configured base directories, validating they stay within +/// allowed boundaries. Prevents path traversal attacks and applies glob policy filtering. +/// +/// # Path Semantics +/// +/// - **Slash normalization**: Paths normalized to `/` for consistent cross-platform matching +/// - **Shell expansion (directories only)**: Base directories support `~/`, +/// `$HOME/`, and other shell patterns. Patterns are always relative to base +/// directory without shell expansion. +/// - **Relative matching**: Patterns match relative paths within base +/// directories (e.g., `src/lib.rs`) +/// - **Last-match-wins**: Last matching rule wins, enabling override patterns via reverse +/// iteration for O(k) short-circuit. +/// +/// # Security +/// +/// Path traversal is prevented by resolving symlinks and normalizing the +/// resolved path, verifying it stays within allowed base directories, and +/// applying glob policy. Patterns match normalized relative paths with +/// last-match-wins semantics. Unmatched paths are denied. +#[derive(Debug, Clone)] +pub struct AllowedGlobResolver { + /// Allowed base directories. + base_directories: Arc<[Arc]>, + /// Optional glob policy for file filtering. + policy: Option>, +} + +impl AllowedGlobResolver { + /// Creates a new resolver with the given allowed directories. + /// + /// Directories are resolved (symlinks expanded, made absolute, and + /// normalized) during construction. Shell patterns (`~/`, `$HOME/`, `$VAR`, + /// etc.) are expanded before resolution. + /// + /// Returns `ToolError::InvalidPath` if any directory doesn't exist or cannot + /// be resolved. + /// + /// # Example + /// + /// ``` + /// use llm_coding_tools_core::path::AllowedGlobResolver; + /// use std::path::PathBuf; + /// + /// # fn example() -> Result<(), Box> { + /// let directories = vec![PathBuf::from("/home/user/project")]; + /// let resolver = AllowedGlobResolver::new(directories)?; + /// # Ok(()) + /// # } + /// ``` + pub fn new(directories: impl IntoIterator>) -> ToolResult { + let resolved: Result]>, _> = directories + .into_iter() + .map(|p| { + let path = p.as_ref(); + let expanded = expand_shell(&path.to_string_lossy())?; + if !expanded.is_dir() { + return Err(ToolError::InvalidPath(format!( + "failed to resolve base directory '{}': path is not an existing directory", + path.display() + ))); + } + + soft_canonicalize(&expanded) + .map(|pb| Arc::from(pb.into_boxed_path())) + .map_err(|e| { + ToolError::InvalidPath(format!( + "failed to resolve base directory '{}': {}", + path.display(), + e + )) + }) + }) + .collect(); + + Ok(Self { + base_directories: resolved?, + policy: None, + }) + } + + /// Creates a resolver from already-resolved paths, skipping filesystem + /// validation. + /// + /// A resolved path is absolute, with all symlinks expanded and all `.` and + /// `..` components normalized. Use [`std::fs::canonicalize`] or + /// [`std::path::Path::canonicalize`] to resolve paths. + /// + /// Caller must ensure paths are resolved. Using non-resolved paths may + /// allow path traversal attacks. + pub fn from_canonical(directories: impl IntoIterator>) -> Self { + Self { + base_directories: directories + .into_iter() + .map(|p| Arc::from(p.as_ref())) + .collect(), + policy: None, + } + } + + /// Sets the glob policy for this resolver. + /// + /// Returns self for method chaining. + pub fn with_policy(mut self, policy: GlobPolicy) -> Self { + self.policy = Some(Arc::new(policy)); + self + } + + /// Returns the allowed base directories. + pub fn base_directories(&self) -> &[Arc] { + &self.base_directories + } + + /// Returns the current glob policy, if any. + pub fn policy(&self) -> Option<&GlobPolicy> { + self.policy.as_deref() + } +} + +impl PathResolver for AllowedGlobResolver { + const PATH_MODE: PathMode = PathMode::Allowed; + + fn resolve(&self, path: &str) -> ToolResult { + let input_path = PathBuf::from(path); + + for base_dir in self.base_directories.iter() { + // Relative input joins base_dir; absolute input overrides it. + let candidate = base_dir.join(&input_path); + + // Existing file/dir: canonicalize resolves symlinks and normalizes. + if let Ok(resolved) = candidate.canonicalize() { + // Reject if symlink escapes outside base_dir. + if !resolved.starts_with(base_dir) { + continue; + } + + // Apply glob policy to the relative path. + let relative_path = resolved.strip_prefix(base_dir).unwrap_or(Path::new("")); + let normalized_relative = normalize::normalize_path(relative_path); + + if let Some(policy) = &self.policy { + if !policy.is_allowed(&normalized_relative) { + continue; + } + } + + return Ok(resolved); + } + + // Non-existent paths still need a resolved absolute target so we can + // validate containment and glob policy consistently across platforms. + if let Ok(target_path) = soft_canonicalize(&candidate) { + if !target_path.starts_with(base_dir) { + continue; + } + + // Apply glob policy to the target relative path. + let relative_path = target_path.strip_prefix(base_dir).unwrap_or(Path::new("")); + let normalized_relative = normalize::normalize_path(relative_path); + + if let Some(policy) = &self.policy { + if !policy.is_allowed(&normalized_relative) { + continue; + } + } + + return Ok(target_path); + } + } + + Err(ToolError::InvalidPath(format!( + "path '{}' is not within allowed directories", + path + ))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + use std::fs; + use tempfile::TempDir; + + fn setup_test_dir() -> TempDir { + let dir = TempDir::new().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::create_dir_all(dir.path().join("target/debug")).unwrap(); + fs::write(dir.path().join("src/lib.rs"), "content").unwrap(); + fs::write(dir.path().join("src/main.rs"), "content").unwrap(); + fs::write(dir.path().join("Cargo.toml"), "content").unwrap(); + fs::write(dir.path().join("target/debug/app"), "binary").unwrap(); + dir + } + + // Keeps policy-focused tests small and readable. + fn resolver_with_policy(dir: &TempDir, pattern: &str) -> AllowedGlobResolver { + let policy = GlobPolicy::builder() + .allow(pattern) + .unwrap() + .build() + .unwrap(); + + AllowedGlobResolver::new(vec![dir.path().to_path_buf()]) + .unwrap() + .with_policy(policy) + } + + // Builds a deeper tree for globstar matching cases. + fn setup_src_globstar_dir() -> TempDir { + let dir = TempDir::new().unwrap(); + fs::create_dir_all(dir.path().join("src/deep/nested")).unwrap(); + fs::write(dir.path().join("src/deep/nested/mod.rs"), "content").unwrap(); + fs::write(dir.path().join("src/other.rs"), "content").unwrap(); + fs::write(dir.path().join("src/main.rs"), "content").unwrap(); + fs::write(dir.path().join("src/lib.rs"), "content").unwrap(); + dir + } + + #[test] + fn constructs_with_valid_directories() { + let dir = setup_test_dir(); + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]); + assert!(resolver.is_ok()); + } + + #[test] + fn constructs_with_non_resolved_directory_and_stores_resolved() { + let dir = setup_test_dir(); + let resolved = dir.path().canonicalize().unwrap(); + + fs::create_dir_all(dir.path().join("subdir")).unwrap(); + + // Build a path that resolves back to the temp dir. + let non_resolved = dir.path().join("subdir").join(".."); + + // Construction should canonicalize before storing the base directory. + let resolver = AllowedGlobResolver::new(vec![&non_resolved]).unwrap(); + + let stored = resolver.base_directories()[0].as_ref(); + assert_eq!( + stored, + resolved.as_path(), + "stored directory should be resolved" + ); + + // The stored path should not retain unresolved components. + assert!( + !stored.to_string_lossy().contains("/subdir/../"), + "stored path should not contain unresolved components" + ); + } + + #[test] + fn fails_to_construct_with_nonexistent_directory() { + let result = AllowedGlobResolver::new(vec![PathBuf::from("/nonexistent/path/xyz")]); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("failed to resolve base directory")); + } + + #[test] + fn from_canonical_skips_validation() { + let resolver = AllowedGlobResolver::from_canonical(vec![PathBuf::from("/some/path")]); + assert_eq!(resolver.base_directories().len(), 1); + } + + #[test] + fn resolves_existing_file_within_base_directory() { + let dir = setup_test_dir(); + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + // lib.rs exists from setup_test_dir call, should pass. + let result = resolver.resolve("src/lib.rs"); + assert!(result.is_ok()); + assert!(result.unwrap().ends_with("lib.rs")); + } + + #[test] + fn resolves_new_file_within_base_directory() { + let dir = setup_test_dir(); + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + // new_file.rs does not exist initially but is in allowed directory, should pass. + let result = resolver.resolve("src/new_file.rs"); + assert!(result.is_ok()); + assert!(result.unwrap().ends_with("new_file.rs")); + } + + #[test] + fn resolves_new_file_when_intermediate_directories_do_not_exist() { + let dir = setup_test_dir(); + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + // write targets should still resolve when some parent directories do not exist yet. + let result = resolver.resolve("src/new_dir/nested/new_file.rs"); + assert!(result.is_ok()); + assert!(result.unwrap().ends_with("src/new_dir/nested/new_file.rs")); + } + + #[test] + fn resolves_new_file_with_missing_directories_under_matching_policy() { + let dir = setup_test_dir(); + let resolver = resolver_with_policy(&dir, "src/**/*.rs"); + + let result = resolver.resolve("src/new_dir/nested/new_file.rs"); + assert!(result.is_ok()); + assert!(result.unwrap().ends_with("src/new_dir/nested/new_file.rs")); + } + + #[test] + fn resolves_existing_file_through_missing_directory_parent_traversal() { + let dir = setup_test_dir(); + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("src/new_dir/../../Cargo.toml"); + assert!(result.is_ok()); + assert!(result.unwrap().ends_with("Cargo.toml")); + } + + #[rstest] + #[case::parent_traversal("../../../etc/passwd")] + #[case::nested_traversal("src/../../../etc/passwd")] + #[case::simple_parent("../Cargo.toml")] + fn rejects_path_traversal_attempts(#[case] input: &str) { + let dir = setup_test_dir(); + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + // Different traversal shapes should all be rejected the same way. + let result = resolver.resolve(input); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("not within allowed")); + } + + #[test] + #[cfg(unix)] + fn rejects_symlink_escape_attempt() { + use std::os::unix::fs::symlink; + + // We don't allow escapes via symlinks. + // The resolver should catch/reject the path. + let dir = setup_test_dir(); + let escape_target = TempDir::new().unwrap(); + fs::write(escape_target.path().join("secret.txt"), "secret").unwrap(); + + let symlink_path = dir.path().join("escape_link"); + symlink(escape_target.path(), &symlink_path).unwrap(); + + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let result = resolver.resolve("escape_link/secret.txt"); + assert!(result.is_err(), "symlink escape should be blocked"); + let err = result.unwrap_err(); + assert!(err.to_string().contains("not within allowed")); + } + + // When multiple base directories are present; we should test all of them + // in a specified order. + #[test] + fn tries_multiple_base_directories() { + let dir1 = setup_test_dir(); + let dir2 = setup_test_dir(); + fs::write(dir2.path().join("only_in_dir2.txt"), "content").unwrap(); + + let resolver = + AllowedGlobResolver::new(vec![dir1.path().to_path_buf(), dir2.path().to_path_buf()]) + .unwrap(); + + let result = resolver.resolve("only_in_dir2.txt"); + assert!(result.is_ok()); + } + + #[rstest] + #[case::src_lib_should_be_allowed("src/lib.rs", true)] + #[case::cargo_toml_should_be_denied("Cargo.toml", false)] + #[case::target_binary_should_be_denied("target/debug/app", false)] + fn resolver_with_src_policy_should_allow_only_matching_relative_paths( + #[case] input: &str, + #[case] expected_ok: bool, + ) { + let dir = setup_test_dir(); + let resolver = resolver_with_policy(&dir, "src/**"); + + // One policy, multiple paths: only matching relative paths should resolve. + let result = resolver.resolve(input); + assert!( + result.is_ok() == expected_ok, + "path '{input}' should {}match 'src/**'", + if expected_ok { "" } else { "not " } + ); + } + + #[rstest] + #[case::new_rs_file_should_be_allowed("new_file.rs", true)] + #[case::new_txt_file_should_be_denied("new_file.txt", false)] + fn resolver_should_check_policy_for_new_paths(#[case] input: &str, #[case] expected_ok: bool) { + let dir = setup_test_dir(); + let resolver = resolver_with_policy(&dir, "*.rs"); + + // New paths are checked against policy before the file exists. + let result = resolver.resolve(input); + assert!( + result.is_ok() == expected_ok, + "path '{input}' should {}match '*.rs'", + if expected_ok { "" } else { "not " } + ); + } + + #[test] + fn returns_resolved_path_without_dotdots() { + let dir = setup_test_dir(); + let resolver = AllowedGlobResolver::new(vec![dir.path().to_path_buf()]).unwrap(); + + let resolved = resolver.resolve("src/../Cargo.toml").unwrap(); + assert!( + !resolved.to_string_lossy().contains(".."), + "resolved path should not contain '..'" + ); + } + + #[rstest] + #[case::src_lib_should_be_allowed("src/lib.rs", true)] + #[case::src_main_should_be_allowed("src/main.rs", true)] + #[case::nested_mod_should_be_allowed("src/deep/nested/mod.rs", true)] + #[case::src_other_should_be_allowed("src/other.rs", true)] + #[case::cargo_toml_should_be_denied("Cargo.toml", false)] + #[case::target_binary_should_be_denied("target/debug/app", false)] + fn resolver_with_src_globstar_policy_should_match_expected_paths( + #[case] input: &str, + #[case] expected_ok: bool, + ) { + let dir = setup_src_globstar_dir(); + let resolver = resolver_with_policy(&dir, "src/**/*.rs"); + + // Verify globstar behavior across shallow, deep, and denied paths. + let result = resolver.resolve(input); + assert!( + result.is_ok() == expected_ok, + "path '{input}' should {}match 'src/**/*.rs'", + if expected_ok { "" } else { "not " } + ); + } +} diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/normalize.rs b/src/llm-coding-tools-core/src/path/allowed_glob/normalize.rs new file mode 100644 index 0000000..9323134 --- /dev/null +++ b/src/llm-coding-tools-core/src/path/allowed_glob/normalize.rs @@ -0,0 +1,143 @@ +//! Path normalization utilities for glob matching. + +use std::path::{Path, PathBuf}; + +use crate::error::{ToolError, ToolResult}; + +/// Normalizes a path to use forward slashes for consistent glob matching. +/// +/// On Windows, converts backslashes to forward slashes. +/// On Unix, this returns the path string unchanged. +pub(crate) fn normalize_path(path: &Path) -> String { + let path_str = path.to_string_lossy(); + #[cfg(windows)] + { + path_str.replace('\\', "/") + } + #[cfg(not(windows))] + { + path_str.into_owned() + } +} + +/// Expands shell-like patterns (`~/`, `$HOME/`, `$VAR`, `${VAR:-default}`) in a +/// path string. +/// +/// Returns the expanded path on success, or a `ToolError::InvalidPath` if +/// expansion fails (e.g., environment variable not set or contains non-Unicode +/// data). Uses `shellexpand` which internally uses `dirs::home_dir()` for +/// cross-platform home detection. +pub(crate) fn expand_shell(path: &str) -> ToolResult { + shellexpand::full(path) + .map(|cow| PathBuf::from(cow.into_owned())) + .map_err(|e| { + ToolError::InvalidPath(format!( + "failed to expand shell pattern in path '{}': {}", + path, e + )) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use temp_env; + use tempfile::TempDir; + + #[cfg(windows)] + fn strip_verbatim(path: PathBuf) -> PathBuf { + PathBuf::from( + path.to_string_lossy() + .strip_prefix(r"\\?\") + .unwrap_or(&path.to_string_lossy()), + ) + } + + #[cfg(not(windows))] + fn strip_verbatim(path: PathBuf) -> PathBuf { + path + } + + #[test] + fn normalize_path_converts_backslashes_on_windows() { + #[cfg(windows)] + { + assert_eq!(normalize_path(Path::new("src\\lib.rs")), "src/lib.rs"); + assert_eq!( + normalize_path(Path::new("src\\deep\\nested\\mod.rs")), + "src/deep/nested/mod.rs" + ); + assert_eq!( + normalize_path(Path::new("C:\\Users\\test\\project")), + "C:/Users/test/project" + ); + assert_eq!( + normalize_path(Path::new("src/lib\\mod.rs")), + "src/lib/mod.rs" + ); + } + + #[cfg(not(windows))] + { + assert_eq!(normalize_path(Path::new("src/lib.rs")), "src/lib.rs"); + assert_eq!( + normalize_path(Path::new("src/deep/nested/mod.rs")), + "src/deep/nested/mod.rs" + ); + } + } + + #[test] + fn expands_home_tilde() { + #[cfg(windows)] + { + let expected_home = dirs::home_dir().expect("home directory should exist"); + let expected_home = strip_verbatim(expected_home.canonicalize().unwrap()); + + let result = strip_verbatim(expand_shell("~/project").unwrap()); + assert!(result.starts_with(&expected_home)); + assert!(result.ends_with("project")); + } + + #[cfg(not(windows))] + { + let temp_dir = TempDir::new().unwrap(); + let temp_home_path = temp_dir.path().canonicalize().unwrap(); + temp_env::with_var("HOME", Some(&temp_home_path), || { + let result = expand_shell("~/project").unwrap(); + assert!(result.starts_with(&temp_home_path)); + assert!(result.ends_with("project")); + }); + } + } + + #[test] + fn expands_home_dollar() { + let temp_dir = TempDir::new().unwrap(); + let temp_home_path = temp_dir.path().canonicalize().unwrap(); + let temp_home_path = strip_verbatim(temp_home_path); + + temp_env::with_var("HOME", Some(&temp_home_path), || { + let result = strip_verbatim(expand_shell("$HOME/workspace").unwrap()); + assert!(result.starts_with(&temp_home_path)); + assert!(result.ends_with("workspace")); + }); + } + + #[test] + fn leaves_path_without_shell_patterns_unchanged() { + let result = expand_shell("/some/path").unwrap(); + assert_eq!(result, PathBuf::from("/some/path")); + } + + #[test] + fn returns_error_for_unset_environment_variable() { + temp_env::with_var("DEFINITELY_NOT_SET_12345", None::<&str>, || { + let result = expand_shell("$DEFINITELY_NOT_SET_12345/project"); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("failed to expand shell pattern")); + assert!(err.to_string().contains("DEFINITELY_NOT_SET_12345")); + }); + } +} diff --git a/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs b/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs new file mode 100644 index 0000000..0738dc9 --- /dev/null +++ b/src/llm-coding-tools-core/src/path/allowed_glob/policy.rs @@ -0,0 +1,297 @@ +//! Glob pattern policy for path resolution. +//! +//! Defines ordered allow/deny glob patterns. Patterns are evaluated with +//! **last-match-wins** precedence using reverse iteration for efficiency. +//! If no patterns match, access is denied. +//! +//! Patterns are always relative to the base directory (root-relative) and +//! support: +//! - `*` to match any number of characters within a path component +//! - `?` to match a single character +//! - `**` to match any number of path components (including `/`) + +use crate::error::{ToolError, ToolResult}; +use globset::{Glob, GlobMatcher, GlobSet, GlobSetBuilder}; + +/// Action to take when a glob pattern matches. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RuleAction { + /// Allow access to the matched path. + Allow, + /// Deny access to the matched path. + Deny, +} + +/// Glob pattern policy for path resolution. +/// +/// Patterns are evaluated with **last-match-wins** precedence using reverse +/// iteration for early exit. If no patterns match, access is denied. +/// +/// # Example +/// +/// ``` +/// use llm_coding_tools_core::path::{GlobPolicy, GlobPolicyBuilder, RuleAction}; +/// +/// # fn example() -> Result<(), Box> { +/// let policy = GlobPolicy::builder() +/// .add("*.rs", RuleAction::Allow)? +/// .add("target/**", RuleAction::Deny)? +/// .build()?; +/// # Ok(()) +/// # } +/// ``` +pub struct GlobPolicy { + /// Pre-compiled glob matchers with their associated actions + rules: Vec<(GlobMatcher, RuleAction)>, + /// Compiled set for fast path rejection + glob_set: GlobSet, +} + +impl std::fmt::Debug for GlobPolicy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GlobPolicy") + .field("rules_count", &self.rules.len()) + .field("glob_set", &self.glob_set) + .finish() + } +} + +impl GlobPolicy { + /// Creates a new policy builder. + pub fn builder() -> GlobPolicyBuilder { + GlobPolicyBuilder::new() + } + + /// Checks if a normalized path string is allowed by this policy. + /// + /// The path must already be normalized to forward slashes. Patterns are + /// evaluated with **last-match-wins** precedence using reverse iteration + /// for early exit. If no patterns match, the path is denied. + /// + /// # Arguments + /// + /// * `normalized_path` - The already-normalized path string to check + /// (typically relative to base directory with forward slashes) + /// + /// # Returns + /// + /// `true` if the path is allowed by the last matching rule, `false` otherwise. + pub(crate) fn is_allowed(&self, normalized_path: &str) -> bool { + if self.rules.is_empty() { + return false; + } + + // Speedup: Match against all globs at once. + if !self.glob_set.is_match(normalized_path) { + return false; + } + + for (matcher, action) in self.rules.iter().rev() { + if matcher.is_match(normalized_path) { + return matches!(action, RuleAction::Allow); + } + } + + false + } +} + +/// Builder for constructing [`GlobPolicy`] instances. +#[derive(Debug, Default)] +pub struct GlobPolicyBuilder { + rules: Vec<(Glob, RuleAction)>, +} + +impl GlobPolicyBuilder { + /// Creates a new empty policy builder. + pub fn new() -> Self { + Self::default() + } + + /// Adds a pattern with the specified action. + /// + /// Patterns are evaluated in the order they are added with last-match-wins + /// semantics. Patterns are always relative to the base directory and do + /// NOT support `~/` or `$HOME/` prefixes (use home expansion in base + /// directory paths instead). + /// + /// Pattern syntax: + /// - `*` matches any number of characters + /// - `?` matches exactly one character + /// - `**` matches any number of path components (including zero) + /// - `{a,b}` matches either `a` or `b` + /// + /// Patterns are matched against the entire relative path string, so `*` and + /// `?` can match path separators (`/`). For example: + /// - `*.rs` matches `src/lib.rs` because `*` can span across `/` + /// - `*.rs` matches `main.rs` at the root level + /// - `target/**` matches `target/debug/app` (and any depth under `target/`) + /// + /// # Arguments + /// + /// * `pattern` - The glob pattern string (relative to base directory, no + /// home expansion) + /// * `action` - The rule action (`Allow` or `Deny`) + /// + /// # Returns + /// + /// `Ok(Self)` on success, allowing method chaining. + /// + /// # Errors + /// + /// Returns `ToolError::InvalidPattern` if the pattern syntax is invalid. + pub fn add(mut self, pattern: &str, action: RuleAction) -> ToolResult { + let glob = Glob::new(pattern).map_err(|e| { + ToolError::InvalidPattern(format!("invalid glob pattern '{}': {}", pattern, e)) + })?; + self.rules.push((glob, action)); + Ok(self) + } + + /// Adds an allow pattern. + /// + /// This is a convenience wrapper around `add()` with `RuleAction::Allow`. + /// See `add()` for pattern syntax and behavior details. + /// + /// Patterns are relative to base directory and do NOT support `~/` or + /// `$HOME/` prefixes. + pub fn allow(self, pattern: &str) -> ToolResult { + self.add(pattern, RuleAction::Allow) + } + + /// Adds a deny pattern. + /// + /// This is a convenience wrapper around `add()` with `RuleAction::Deny`. + /// See `add()` for pattern syntax and behavior details. + /// + /// Patterns are relative to base directory and do NOT support `~/` or + /// `$HOME/` prefixes. + pub fn deny(self, pattern: &str) -> ToolResult { + self.add(pattern, RuleAction::Deny) + } + + /// Builds the policy, compiling all patterns. + /// + /// # Errors + /// + /// Returns `ToolError::InvalidPattern` if any pattern fails to compile. + pub fn build(self) -> ToolResult { + let mut builder = GlobSetBuilder::new(); + let mut rules = Vec::with_capacity(self.rules.len()); + + for (glob, action) in self.rules { + builder.add(glob.clone()); + rules.push((glob.compile_matcher(), action)); + } + + let glob_set = builder + .build() + .map_err(|e| ToolError::InvalidPattern(format!("failed to build glob set: {}", e)))?; + + Ok(GlobPolicy { rules, glob_set }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case::src_lib_rs_allowed("src/lib.rs", true)] + #[case::main_rs_allowed("main.rs", true)] + #[case::target_debug_app_denied("target/debug/app", false)] + #[case::cargo_toml_denied("Cargo.toml", false)] + fn builder_should_apply_allow_and_deny_rules( + #[case] path: &str, + #[case] expected_allowed: bool, + ) { + let policy = GlobPolicy::builder() + .allow("*.rs") + .unwrap() + .deny("target/**") + .unwrap() + .build() + .unwrap(); + + assert_eq!(policy.is_allowed(path), expected_allowed); + } + + #[test] + fn glob_policy_last_match_wins() { + let policy = GlobPolicy::builder() + .deny("target/**") + .unwrap() + .allow("target/debug/app") + .unwrap() + .build() + .unwrap(); + + assert!(policy.is_allowed("target/debug/app")); + assert!(!policy.is_allowed("target/release/app")); + assert!(!policy.is_allowed("target/anything.txt")); + + let policy2 = GlobPolicy::builder() + .allow("target/debug/app") + .unwrap() + .deny("target/**") + .unwrap() + .build() + .unwrap(); + + assert!(!policy2.is_allowed("target/debug/app")); + } + + #[test] + fn invalid_glob_pattern_fails() { + let result = GlobPolicy::builder().allow("[invalid"); + assert!(result.is_err()); + } + + #[rstest] + #[case::anything_txt_denied("anything.txt")] + #[case::src_lib_rs_denied("src/lib.rs")] + fn empty_policy_should_deny_any_path(#[case] path: &str) { + let policy = GlobPolicy::builder().build().unwrap(); + assert!(!policy.is_allowed(path)); + } + + // Keep policy construction out of the case table so each case reads as + // just input path + expected decision. + fn src_rs_policy() -> GlobPolicy { + GlobPolicy::builder() + .allow("src/**/*.rs") + .unwrap() + .build() + .unwrap() + } + + #[rstest] + #[case::root_level_lib_rs_allowed("src/lib.rs", true)] + #[case::root_level_main_rs_allowed("src/main.rs", true)] + #[case::nested_module_rs_allowed("src/deep/nested/module.rs", true)] + #[case::nested_mod_rs_allowed("src/deep/nested/mod.rs", true)] + #[case::deeply_nested_rs_allowed("src/a/b/c/d/e/file.rs", true)] + #[case::wrong_extension_denied("src/lib.txt", false)] + #[case::wrong_directory_denied("tests/test.rs", false)] + #[case::parent_directory_denied("../src/lib.rs", false)] + #[case::target_directory_denied("target/debug/lib.rs", false)] + #[case::empty_path_denied("", false)] + #[case::dot_path_denied(".", false)] + #[case::dotdot_path_denied("..", false)] + fn src_globstar_rs_policy_should_match_only_normalized_src_rs_paths( + #[case] path: &str, + #[case] expected_allowed: bool, + ) { + let policy = src_rs_policy(); + + let result = policy.is_allowed(path); + assert_eq!( + result, + expected_allowed, + "path '{}' should {}match 'src/**/*.rs'", + path, + if expected_allowed { "" } else { "not " } + ); + } +} diff --git a/src/llm-coding-tools-core/src/path/mod.rs b/src/llm-coding-tools-core/src/path/mod.rs index f9d4f87..00d53d5 100644 --- a/src/llm-coding-tools-core/src/path/mod.rs +++ b/src/llm-coding-tools-core/src/path/mod.rs @@ -3,12 +3,15 @@ //! This module provides [`PathResolver`] trait and implementations: //! - [`AbsolutePathResolver`] - Requires absolute paths only //! - [`AllowedPathResolver`] - Restricts to allowed directories +//! - [`AllowedGlobResolver`] - Restricts to allowed directories with glob pattern filtering mod absolute; mod allowed; +mod allowed_glob; pub use absolute::AbsolutePathResolver; pub use allowed::AllowedPathResolver; +pub use allowed_glob::{AllowedGlobResolver, GlobPolicy, GlobPolicyBuilder, RuleAction}; use crate::context::PathMode; use crate::error::ToolResult; diff --git a/src/llm-coding-tools-serdesai/src/lib.rs b/src/llm-coding-tools-serdesai/src/lib.rs index 5276c89..9ba4638 100644 --- a/src/llm-coding-tools-serdesai/src/lib.rs +++ b/src/llm-coding-tools-serdesai/src/lib.rs @@ -25,7 +25,9 @@ pub use llm_coding_tools_core::context; pub use llm_coding_tools_core::SystemPromptBuilder; /// Re-export path resolvers from core. -pub use llm_coding_tools_core::path::{AbsolutePathResolver, AllowedPathResolver, PathResolver}; +pub use llm_coding_tools_core::path::{ + AbsolutePathResolver, AllowedGlobResolver, AllowedPathResolver, PathResolver, +}; // Re-export tools from the tools module pub use tools::{ diff --git a/src/llm-coding-tools-serdesai/src/tools/mod.rs b/src/llm-coding-tools-serdesai/src/tools/mod.rs index bc7a4eb..7fa5ebb 100644 --- a/src/llm-coding-tools-serdesai/src/tools/mod.rs +++ b/src/llm-coding-tools-serdesai/src/tools/mod.rs @@ -8,9 +8,10 @@ //! resolver type at construction time, which selects the correct schema //! parameter names and descriptions. //! -//! Supported path resolvers: +//! These tools work with any [`PathResolver`] implementation: //! - [`AbsolutePathResolver`] - unrestricted absolute path access //! - [`AllowedPathResolver`] - sandboxed directory-restricted access +//! - [`AllowedGlobResolver`] for sandboxed access with glob pattern filtering //! //! # Public API //! @@ -44,6 +45,7 @@ //! [`AbsolutePathResolver`]: llm_coding_tools_core::path::AbsolutePathResolver //! [`AllowedPathResolver`]: llm_coding_tools_core::path::AllowedPathResolver //! [`Tool`]: serdes_ai::tools::Tool +//! [`AllowedGlobResolver`]: llm_coding_tools_core::path::AllowedGlobResolver mod bash; mod edit;