From 6559e50f1458d634b24812fb02ece2371279c1c3 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 8 May 2026 12:02:35 +0700 Subject: [PATCH 1/4] attributed_text: add chunk-readable text storage --- attributed_text/src/attributed_text.rs | 34 ++- attributed_text/src/error.rs | 4 +- attributed_text/src/lib.rs | 2 +- attributed_text/src/text_range.rs | 21 +- attributed_text/src/text_storage.rs | 290 ++++++++++++++++++++++++- 5 files changed, 337 insertions(+), 14 deletions(-) diff --git a/attributed_text/src/attributed_text.rs b/attributed_text/src/attributed_text.rs index f5c51b0fb..e14acac28 100644 --- a/attributed_text/src/attributed_text.rs +++ b/attributed_text/src/attributed_text.rs @@ -6,7 +6,7 @@ use core::fmt::Debug; use core::ops::Range; use crate::text_range::validate_range; -use crate::{Error, TextRange, TextStorage}; +use crate::{Error, TextChunk, TextRange, TextStorage}; /// A block of text with attributes applied to ranges within the text. #[derive(Debug)] @@ -50,11 +50,15 @@ impl AttributedText { } /// Borrow the underlying text as `&str` when the storage is contiguous. - pub fn as_str(&self) -> &str - where - T: AsRef, - { - self.text.as_ref() + pub fn as_str(&self) -> Option<&str> { + self.text.as_str() + } + + /// Iterates over borrowed text chunks covering `range`. + /// + /// The provided range must have been validated against this text. + pub fn chunks(&self, range: TextRange) -> impl Iterator> { + self.text.chunks(range) } /// Apply an `attribute` to a validated [`TextRange`] within the text. @@ -147,6 +151,7 @@ impl AttributedText { #[cfg(test)] mod tests { use crate::{AttributedText, Endpoint, ErrorKind, TextRange}; + use alloc::vec; use alloc::vec::Vec; #[derive(Debug, PartialEq)] @@ -260,6 +265,23 @@ mod tests { assert_eq!(at.attributes_len(), 1); } + #[test] + fn as_str_returns_contiguous_text() { + let at = AttributedText::<&str, ()>::new("Hello!"); + assert_eq!(at.as_str(), Some("Hello!")); + } + + #[test] + fn chunks_iterates_underlying_text() { + let at = AttributedText::<&str, ()>::new("aé日z"); + let range = TextRange::new(at.text(), 1..6).unwrap(); + let chunks: Vec<_> = at + .chunks(range) + .map(|chunk| (chunk.range().as_range(), chunk.text())) + .collect(); + assert_eq!(chunks, vec![(1..6, "é日")]); + } + #[test] fn set_text_clears_attributes() { let mut at = AttributedText::new("Hello!"); diff --git a/attributed_text/src/error.rs b/attributed_text/src/error.rs index 46bc58b66..c953e7ce2 100644 --- a/attributed_text/src/error.rs +++ b/attributed_text/src/error.rs @@ -76,7 +76,7 @@ impl Error { } } - pub(crate) fn not_on_char_boundary( + pub(crate) fn not_on_char_boundary( text: &T, start: usize, end: usize, @@ -182,7 +182,7 @@ pub struct BoundaryInfo { pub char_end: usize, } -fn enclosing_char_span(text: &T, index: usize) -> Option<(usize, usize)> { +fn enclosing_char_span(text: &T, index: usize) -> Option<(usize, usize)> { let len = text.len(); if index > len { return None; diff --git a/attributed_text/src/lib.rs b/attributed_text/src/lib.rs index 318ddc270..2864d7788 100644 --- a/attributed_text/src/lib.rs +++ b/attributed_text/src/lib.rs @@ -32,4 +32,4 @@ pub use crate::attribute_segments::{ pub use crate::attributed_text::AttributedText; pub use crate::error::{BoundaryInfo, Endpoint, Error, ErrorKind}; pub use crate::text_range::TextRange; -pub use crate::text_storage::TextStorage; +pub use crate::text_storage::{TextChunk, TextStorage}; diff --git a/attributed_text/src/text_range.rs b/attributed_text/src/text_range.rs index 5e371d657..92276a181 100644 --- a/attributed_text/src/text_range.rs +++ b/attributed_text/src/text_range.rs @@ -47,7 +47,7 @@ pub struct TextRange { impl TextRange { /// Returns a validated `TextRange` for the provided text. #[inline] - pub fn new(text: &T, range: Range) -> Result { + pub fn new(text: &T, range: Range) -> Result { validate_range(text, &range)?; Ok(Self { start: range.start, @@ -78,6 +78,20 @@ impl TextRange { self.end } + /// Returns the length of this range in bytes. + #[must_use] + #[inline] + pub const fn len(self) -> usize { + self.end - self.start + } + + /// Returns `true` if this range contains no bytes. + #[must_use] + #[inline] + pub const fn is_empty(self) -> bool { + self.start == self.end + } + /// Returns this range as a `Range`. #[must_use] #[inline] @@ -94,7 +108,10 @@ impl From for Range { } #[inline] -pub(crate) fn validate_range(text: &T, range: &Range) -> Result<(), Error> { +pub(crate) fn validate_range( + text: &T, + range: &Range, +) -> Result<(), Error> { let text_len = text.len(); if range.start > range.end { return Err(Error::invalid_range(range.start, range.end, text_len)); diff --git a/attributed_text/src/text_storage.rs b/attributed_text/src/text_storage.rs index 4e5dc8f49..86be94cec 100644 --- a/attributed_text/src/text_storage.rs +++ b/attributed_text/src/text_storage.rs @@ -3,6 +3,50 @@ use alloc::string::String; use alloc::sync::Arc; +use core::ops::Range; + +use crate::{Error, TextRange}; + +/// A borrowed contiguous chunk of text from a [`TextStorage`]. +/// +/// The [`TextRange`] is expressed in the storage's global byte coordinate space, while +/// [`Self::text`] returns the borrowed UTF-8 text for just this chunk. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct TextChunk<'a> { + range: TextRange, + text: &'a str, +} + +impl<'a> TextChunk<'a> { + /// Creates a text chunk for `range`. + /// + /// # Panics + /// + /// Panics if the byte length of `text` does not match `range`. + #[must_use] + pub fn new(range: TextRange, text: &'a str) -> Self { + assert_eq!( + range.len(), + text.len(), + "text chunk length must match its range" + ); + Self { range, text } + } + + /// Returns this chunk's range in the storage's global byte coordinate space. + #[must_use] + #[inline] + pub const fn range(self) -> TextRange { + self.range + } + + /// Returns this chunk's borrowed UTF-8 text. + #[must_use] + #[inline] + pub const fn text(self) -> &'a str { + self.text + } +} /// A block of text that will be wrapped by an [`AttributedText`]. /// @@ -22,6 +66,28 @@ pub trait TextStorage { /// underlying representation (e.g. [`str::is_char_boundary`] for contiguous /// strings, or by inspecting rope chunk boundaries). fn is_char_boundary(&self, index: usize) -> bool; + + /// Returns this storage as a contiguous string slice when available. + /// + /// Rope-like and sparse storage implementations should return `None`. + fn as_str(&self) -> Option<&str> { + None + } + + /// Returns a validated [`TextRange`] for this storage. + /// + /// This is equivalent to calling [`TextRange::new`] with `self`. + #[inline] + fn validate_range(&self, range: Range) -> Result { + TextRange::new(self, range) + } + + /// Iterates over borrowed chunks covering `range`. + /// + /// The provided range must have been validated against this storage. Implementations should + /// yield chunks in order, without gaps or overlaps, and should yield no chunks for an empty + /// range. + fn chunks(&self, range: TextRange) -> impl Iterator>; } impl TextStorage for String { @@ -32,15 +98,49 @@ impl TextStorage for String { fn is_char_boundary(&self, index: usize) -> bool { self.as_str().is_char_boundary(index) } + + fn as_str(&self) -> Option<&str> { + Some(self.as_ref()) + } + + fn chunks(&self, range: TextRange) -> impl Iterator> { + contiguous_chunks(self.as_ref(), range) + } +} + +impl TextStorage for str { + fn len(&self) -> usize { + Self::len(self) + } + + fn is_char_boundary(&self, index: usize) -> bool { + Self::is_char_boundary(self, index) + } + + fn as_str(&self) -> Option<&str> { + Some(self) + } + + fn chunks(&self, range: TextRange) -> impl Iterator> { + contiguous_chunks(self, range) + } } impl TextStorage for &str { fn len(&self) -> usize { - str::len(self) + (*self).len() } fn is_char_boundary(&self, index: usize) -> bool { - str::is_char_boundary(self, index) + (*self).is_char_boundary(index) + } + + fn as_str(&self) -> Option<&str> { + Some(*self) + } + + fn chunks(&self, range: TextRange) -> impl Iterator> { + contiguous_chunks(self, range) } } @@ -52,13 +152,113 @@ impl TextStorage for Arc { fn is_char_boundary(&self, index: usize) -> bool { str::is_char_boundary(self, index) } + + fn as_str(&self) -> Option<&str> { + Some(self.as_ref()) + } + + fn chunks(&self, range: TextRange) -> impl Iterator> { + contiguous_chunks(self.as_ref(), range) + } +} + +fn contiguous_chunks(text: &str, range: TextRange) -> impl Iterator> { + (!range.is_empty()) + .then(|| TextChunk::new(range, &text[range.as_range()])) + .into_iter() } #[cfg(test)] mod tests { - use super::TextStorage; + use super::{TextChunk, TextStorage}; use alloc::string::ToString; use alloc::sync::Arc; + use alloc::vec; + use alloc::vec::Vec; + use core::ops::Range; + + use crate::TextRange; + + #[derive(Debug)] + struct ChunkedText { + chunks: Vec<&'static str>, + len: usize, + } + + impl ChunkedText { + fn new(chunks: &[&'static str]) -> Self { + let chunks = chunks.to_vec(); + let len = chunks.iter().map(|chunk| chunk.len()).sum(); + Self { chunks, len } + } + } + + impl TextStorage for ChunkedText { + fn len(&self) -> usize { + self.len + } + + fn is_char_boundary(&self, index: usize) -> bool { + if index > self.len { + return false; + } + + let mut chunk_start = 0; + for chunk in &self.chunks { + let chunk_end = chunk_start + chunk.len(); + if index < chunk_end { + return chunk.is_char_boundary(index - chunk_start); + } + if index == chunk_end { + return true; + } + chunk_start = chunk_end; + } + + index == self.len + } + + fn chunks(&self, range: TextRange) -> impl Iterator> { + ChunkedTextChunks { + chunks: self.chunks.iter(), + chunk_start: 0, + range, + } + } + } + + #[derive(Clone, Debug)] + struct ChunkedTextChunks<'a> { + chunks: core::slice::Iter<'a, &'static str>, + chunk_start: usize, + range: TextRange, + } + + impl<'a> Iterator for ChunkedTextChunks<'a> { + type Item = TextChunk<'a>; + + fn next(&mut self) -> Option { + for chunk in self.chunks.by_ref() { + let chunk = *chunk; + let chunk_start = self.chunk_start; + let chunk_end = chunk_start + chunk.len(); + self.chunk_start = chunk_end; + + let start = self.range.start().max(chunk_start); + let end = self.range.end().min(chunk_end); + if start < end { + let local_start = start - chunk_start; + let local_end = end - chunk_start; + return Some(TextChunk::new( + TextRange::new_unchecked(start, end), + &chunk[local_start..local_end], + )); + } + } + + None + } + } fn assert_boundaries(t: &T, trues: &[usize], falses: &[usize]) { for &i in trues { @@ -69,6 +269,16 @@ mod tests { } } + fn collect_chunks( + text: &T, + range: Range, + ) -> Vec<(Range, &str)> { + let range = TextRange::new(text, range).expect("valid range"); + text.chunks(range) + .map(|chunk| (chunk.range().as_range(), chunk.text())) + .collect() + } + #[test] fn is_char_boundary_ascii() { let s = "abc"; @@ -95,4 +305,78 @@ mod tests { // Boundaries at 0, 4, 8 assert_boundaries(&s, &[0, 4, 8], &[1, 2, 3, 5, 6, 7]); } + + #[test] + fn validates_range_from_str_directly() { + let s = "éclair"; + let range = TextRange::new(s, 0..2).unwrap(); + assert_eq!(range.as_range(), 0..2); + + let range = s.validate_range(2..s.len()).unwrap(); + assert_eq!(range.as_range(), 2..s.len()); + } + + #[test] + fn contiguous_storage_has_fast_path() { + let borrowed = "hello"; + assert_eq!(TextStorage::as_str(&borrowed), Some("hello")); + + let owned = borrowed.to_string(); + assert_eq!(TextStorage::as_str(&owned), Some("hello")); + + let arc: Arc = Arc::from(borrowed); + assert_eq!(TextStorage::as_str(&arc), Some("hello")); + } + + #[test] + fn contiguous_chunks_cover_full_range() { + let s = "abc"; + assert_eq!(collect_chunks(s, 0..3), vec![(0..3, "abc")]); + } + + #[test] + fn contiguous_chunks_cover_multibyte_subrange() { + let s = "aé日z"; + assert_eq!(collect_chunks(s, 1..6), vec![(1..6, "é日")]); + } + + #[test] + fn empty_range_yields_no_chunks() { + let s = "abc"; + assert!(collect_chunks(s, 1..1).is_empty()); + } + + #[test] + fn chunked_storage_boundaries() { + let text = ChunkedText::new(&["ab", "é", "日z"]); + assert_boundaries(&text, &[0, 1, 2, 4, 7, 8], &[3, 5, 6, 9]); + } + + #[test] + fn chunked_storage_has_no_contiguous_fast_path() { + let text = ChunkedText::new(&["ab", "é", "日z"]); + assert_eq!(text.as_str(), None); + } + + #[test] + fn chunked_chunks_cover_single_storage_chunk() { + let text = ChunkedText::new(&["ab", "é", "日z"]); + assert_eq!(collect_chunks(&text, 4..7), vec![(4..7, "日")]); + } + + #[test] + fn chunked_chunks_cover_multiple_storage_chunks() { + let text = ChunkedText::new(&["ab", "é", "日z"]); + assert_eq!( + collect_chunks(&text, 1..7), + vec![(1..2, "b"), (2..4, "é"), (4..7, "日")] + ); + } + + #[test] + fn chunked_storage_rejects_endpoint_inside_multibyte_scalar() { + let text = ChunkedText::new(&["ab", "é", "日z"]); + assert!(TextRange::new(&text, 3..4).is_err()); + assert!(TextRange::new(&text, 4..6).is_err()); + } } From 32c851b39312625e7c5704f4d59a0d887612d1bb Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 8 May 2026 14:36:26 +0700 Subject: [PATCH 2/4] attributed_text: yield validated ranges from segments --- attributed_text/src/attribute_segments.rs | 102 +++++++++++++++------- 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/attributed_text/src/attribute_segments.rs b/attributed_text/src/attribute_segments.rs index 9f317e588..a2c5a3e90 100644 --- a/attributed_text/src/attribute_segments.rs +++ b/attributed_text/src/attribute_segments.rs @@ -11,6 +11,7 @@ use core::fmt::Debug; use core::ops::Range; use crate::AttributedText; +use crate::TextRange; use crate::TextStorage; fn build_segment_state( @@ -176,7 +177,7 @@ impl AttributeSegmentsWorkspace { /// Iterator over contiguous attribute segments produced from an [`AttributedText`]. /// -/// Each yielded item is a non-empty, non-overlapping byte range. The active spans for the +/// Each yielded item is a non-empty, non-overlapping [`TextRange`]. The active spans for the /// yielded range are exposed through [`AttributeSegments::active_spans`]. /// /// # Examples @@ -197,7 +198,7 @@ impl AttributeSegmentsWorkspace { /// let mut workspace = AttributeSegmentsWorkspace::new(); /// let mut segments = workspace.segments(&text); /// -/// assert_eq!(segments.next(), Some(0..1)); +/// assert_eq!(segments.next().map(TextRange::as_range), Some(0..1)); /// let colors: Vec<_> = segments /// .active_spans() /// .iter() @@ -205,7 +206,7 @@ impl AttributeSegmentsWorkspace { /// .collect(); /// assert_eq!(colors, vec![&Color::Red]); /// -/// assert_eq!(segments.next(), Some(1..2)); +/// assert_eq!(segments.next().map(TextRange::as_range), Some(1..2)); /// let colors: Vec<_> = segments /// .active_spans() /// .iter() @@ -268,7 +269,7 @@ impl<'w, 'a, T: Debug + TextStorage, Attr: Debug> AttributeSegments<'w, 'a, T, A } impl Iterator for AttributeSegments<'_, '_, T, Attr> { - type Item = Range; + type Item = TextRange; fn size_hint(&self) -> (usize, Option) { // Remaining segments are remaining adjacent boundary pairs: [i, i + 1). @@ -288,7 +289,7 @@ impl Iterator for AttributeSegments<'_, '_, self.index += 1; debug_assert!(start < end, "boundaries are sorted + deduped"); - return Some(start..end); + return Some(TextRange::new_unchecked(start, end)); } self.workspace.active.clear(); None @@ -328,6 +329,10 @@ pub struct ActiveSpansIter<'s, 'a, T: Debug + TextStorage, Attr: Debug> { impl<'s, 'a, T: Debug + TextStorage, Attr: Debug> Iterator for ActiveSpansIter<'s, 'a, T, Attr> { type Item = (&'a Range, &'a Attr); + fn size_hint(&self) -> (usize, Option) { + self.ids.size_hint() + } + fn next(&mut self) -> Option { let &attr_index = self.ids.next()?; Some( @@ -389,7 +394,6 @@ impl<'active, 's, 'a, T: Debug + TextStorage, Attr: Debug> IntoIterator #[cfg(test)] mod tests { use super::*; - use crate::TextRange; use alloc::vec; use alloc::vec::Vec; @@ -400,6 +404,10 @@ mod tests { Green, } + fn r(range: Range) -> Option { + Some(TextRange::new_unchecked(range.start, range.end)) + } + #[test] fn empty_text_yields_nothing() { let at = AttributedText::<&str, Color>::new(""); @@ -413,7 +421,7 @@ mod tests { let at = AttributedText::<&str, Color>::new("hello"); let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..5)); + assert_eq!(segments.next(), r(0..5)); assert!(segments.active_spans().is_empty()); assert_eq!(segments.next(), None); } @@ -426,11 +434,11 @@ mod tests { let mut segments = workspace.segments(&at); assert_eq!(segments.size_hint(), (3, Some(3))); - assert_eq!(segments.next(), Some(0..1)); + assert_eq!(segments.next(), r(0..1)); assert_eq!(segments.size_hint(), (2, Some(2))); - assert_eq!(segments.next(), Some(1..3)); + assert_eq!(segments.next(), r(1..3)); assert_eq!(segments.size_hint(), (1, Some(1))); - assert_eq!(segments.next(), Some(3..5)); + assert_eq!(segments.next(), r(3..5)); assert_eq!(segments.size_hint(), (0, Some(0))); assert_eq!(segments.next(), None); } @@ -441,7 +449,7 @@ mod tests { at.apply_attribute(TextRange::new(at.text(), 0..5).unwrap(), Color::Red); let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..5)); + assert_eq!(segments.next(), r(0..5)); let active: Vec<_> = segments.active_spans().iter().collect(); assert_eq!(active.len(), 1); assert_eq!(active[0].1, &Color::Red); @@ -454,11 +462,11 @@ mod tests { at.apply_attribute(TextRange::new(at.text(), 1..3).unwrap(), Color::Red); let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..1)); + assert_eq!(segments.next(), r(0..1)); assert!(segments.active_spans().is_empty()); - assert_eq!(segments.next(), Some(1..3)); + assert_eq!(segments.next(), r(1..3)); assert_eq!(segments.active_spans().len(), 1); - assert_eq!(segments.next(), Some(3..5)); + assert_eq!(segments.next(), r(3..5)); assert!(segments.active_spans().is_empty()); assert_eq!(segments.next(), None); } @@ -470,22 +478,22 @@ mod tests { at.apply_attribute(TextRange::new(at.text(), 2..5).unwrap(), Color::Blue); let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..1)); + assert_eq!(segments.next(), r(0..1)); assert!(segments.active_spans().is_empty()); - assert_eq!(segments.next(), Some(1..2)); + assert_eq!(segments.next(), r(1..2)); let a: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(a, vec![&Color::Red]); - assert_eq!(segments.next(), Some(2..4)); + assert_eq!(segments.next(), r(2..4)); let a: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(a, vec![&Color::Red, &Color::Blue]); - assert_eq!(segments.next(), Some(4..5)); + assert_eq!(segments.next(), r(4..5)); let a: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(a, vec![&Color::Blue]); - assert_eq!(segments.next(), Some(5..6)); + assert_eq!(segments.next(), r(5..6)); assert!(segments.active_spans().is_empty()); assert_eq!(segments.next(), None); } @@ -498,7 +506,7 @@ mod tests { at.apply_attribute(TextRange::new(at.text(), 0..6).unwrap(), Color::Green); let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..6)); + assert_eq!(segments.next(), r(0..6)); let forward: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(forward, vec![&Color::Red, &Color::Blue, &Color::Green]); @@ -519,9 +527,9 @@ mod tests { at.apply_attribute(TextRange::new(at.text(), 2..2).unwrap(), Color::Red); let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..2)); + assert_eq!(segments.next(), r(0..2)); assert!(segments.active_spans().is_empty()); - assert_eq!(segments.next(), Some(2..5)); + assert_eq!(segments.next(), r(2..5)); assert!(segments.active_spans().is_empty()); assert_eq!(segments.next(), None); } @@ -533,10 +541,10 @@ mod tests { at.apply_attribute(TextRange::new(at.text(), 3..6).unwrap(), Color::Blue); let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..3)); + assert_eq!(segments.next(), r(0..3)); let a: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(a, vec![&Color::Red]); - assert_eq!(segments.next(), Some(3..6)); + assert_eq!(segments.next(), r(3..6)); let a: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(a, vec![&Color::Blue]); assert_eq!(segments.next(), None); @@ -549,7 +557,7 @@ mod tests { let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..3)); + assert_eq!(segments.next(), r(0..3)); assert_eq!(segments.active_spans().len(), 1); assert_eq!(segments.next(), None); assert!(segments.active_spans().is_empty()); @@ -563,14 +571,14 @@ mod tests { let mut workspace = AttributeSegmentsWorkspace::new(); let mut segments = workspace.segments(&at); - assert_eq!(segments.next(), Some(0..1)); + assert_eq!(segments.next(), r(0..1)); let first: Vec<_> = (&segments.active_spans()) .into_iter() .map(|(_, c)| c) .collect(); assert_eq!(first, vec![&Color::Red]); - assert_eq!(segments.next(), Some(1..3)); + assert_eq!(segments.next(), r(1..3)); let overlap: Vec<_> = (&segments.active_spans()) .into_iter() .map(|(_, c)| c) @@ -578,6 +586,38 @@ mod tests { assert_eq!(overlap, vec![&Color::Red, &Color::Blue]); } + #[test] + fn active_spans_iter_reports_exact_len() { + let mut at = AttributedText::new("abcd"); + at.apply_attribute(TextRange::new(at.text(), 0..4).unwrap(), Color::Red); + at.apply_attribute(TextRange::new(at.text(), 1..3).unwrap(), Color::Blue); + let mut workspace = AttributeSegmentsWorkspace::new(); + let mut segments = workspace.segments(&at); + + assert_eq!(segments.next(), r(0..1)); + { + let active = segments.active_spans(); + let mut iter = active.iter(); + assert_eq!(iter.size_hint(), (1, Some(1))); + assert_eq!(iter.len(), 1); + assert_eq!(iter.next().map(|(_, c)| c), Some(&Color::Red)); + assert_eq!(iter.size_hint(), (0, Some(0))); + assert_eq!(iter.len(), 0); + } + + assert_eq!(segments.next(), r(1..3)); + { + let active = segments.active_spans(); + let mut iter = active.iter(); + assert_eq!(iter.size_hint(), (2, Some(2))); + assert_eq!(iter.len(), 2); + assert_eq!(iter.next_back().map(|(_, c)| c), Some(&Color::Blue)); + assert_eq!(iter.len(), 1); + assert_eq!(iter.next().map(|(_, c)| c), Some(&Color::Red)); + assert_eq!(iter.len(), 0); + } + } + #[test] fn workspace_reuses_for_multiple_texts() { let mut workspace = AttributeSegmentsWorkspace::new(); @@ -586,10 +626,10 @@ mod tests { a.apply_attribute(TextRange::new(a.text(), 0..1).unwrap(), Color::Red); { let mut segments = workspace.segments(&a); - assert_eq!(segments.next(), Some(0..1)); + assert_eq!(segments.next(), r(0..1)); let first: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(first, vec![&Color::Red]); - assert_eq!(segments.next(), Some(1..3)); + assert_eq!(segments.next(), r(1..3)); assert!(segments.active_spans().is_empty()); assert_eq!(segments.next(), None); } @@ -598,9 +638,9 @@ mod tests { b.apply_attribute(TextRange::new(b.text(), 1..4).unwrap(), Color::Blue); { let mut segments = workspace.segments(&b); - assert_eq!(segments.next(), Some(0..1)); + assert_eq!(segments.next(), r(0..1)); assert!(segments.active_spans().is_empty()); - assert_eq!(segments.next(), Some(1..4)); + assert_eq!(segments.next(), r(1..4)); let second: Vec<_> = segments.active_spans().iter().map(|(_, c)| c).collect(); assert_eq!(second, vec![&Color::Blue]); assert_eq!(segments.next(), None); From 75ec27927b4418def5eefd29ebb3e5c5690891f2 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 8 May 2026 16:14:40 +0700 Subject: [PATCH 3/4] attributed_text: store validated attribute ranges --- attributed_text/src/attribute_segments.rs | 60 ++++++++++++++------- attributed_text/src/attributed_text.rs | 63 +++++++++++++---------- attributed_text/src/text_range.rs | 14 +++++ 3 files changed, 92 insertions(+), 45 deletions(-) diff --git a/attributed_text/src/attribute_segments.rs b/attributed_text/src/attribute_segments.rs index a2c5a3e90..14732cc7d 100644 --- a/attributed_text/src/attribute_segments.rs +++ b/attributed_text/src/attribute_segments.rs @@ -8,7 +8,6 @@ use alloc::vec::Vec; use core::fmt::Debug; -use core::ops::Range; use crate::AttributedText; use crate::TextRange; @@ -37,8 +36,8 @@ fn build_segment_state( workspace.boundaries.push(0); workspace.boundaries.push(len_u32); for (range, _) in attributed.attributes_iter() { - let start_u32 = u32::try_from(range.start).expect("range start should fit in u32"); - let end_u32 = u32::try_from(range.end).expect("range end should fit in u32"); + let start_u32 = u32::try_from(range.start()).expect("range start should fit in u32"); + let end_u32 = u32::try_from(range.end()).expect("range end should fit in u32"); workspace.boundaries.push(start_u32); workspace.boundaries.push(end_u32); } @@ -55,11 +54,11 @@ fn build_segment_state( workspace.span_build.reserve(attr_count); for (attr_index, (range, _)) in attributed.attributes_iter().enumerate() { - if range.start == range.end { + if range.is_empty() { continue; } - let start_u32 = u32::try_from(range.start).expect("range start should fit in u32"); - let end_u32 = u32::try_from(range.end).expect("range end should fit in u32"); + let start_u32 = u32::try_from(range.start()).expect("range start should fit in u32"); + let end_u32 = u32::try_from(range.end()).expect("range end should fit in u32"); let start_boundary = workspace .boundaries .binary_search(&start_u32) @@ -199,20 +198,21 @@ impl AttributeSegmentsWorkspace { /// let mut segments = workspace.segments(&text); /// /// assert_eq!(segments.next().map(TextRange::as_range), Some(0..1)); -/// let colors: Vec<_> = segments -/// .active_spans() +/// let active_spans = segments.active_spans(); +/// let mut active = active_spans /// .iter() -/// .map(|(_, c)| c) -/// .collect(); -/// assert_eq!(colors, vec![&Color::Red]); +/// .map(|(range, color)| (range.as_range(), color)); +/// assert_eq!(active.next(), Some((0..2, &Color::Red))); +/// assert_eq!(active.next(), None); /// /// assert_eq!(segments.next().map(TextRange::as_range), Some(1..2)); -/// let colors: Vec<_> = segments -/// .active_spans() +/// let active_spans = segments.active_spans(); +/// let mut active = active_spans /// .iter() -/// .map(|(_, c)| c) -/// .collect(); -/// assert_eq!(colors, vec![&Color::Red, &Color::Blue]); +/// .map(|(range, color)| (range.as_range(), color)); +/// assert_eq!(active.next(), Some((0..2, &Color::Red))); +/// assert_eq!(active.next(), Some((1..5, &Color::Blue))); +/// assert_eq!(active.next(), None); /// /// let active = segments.active_spans(); /// let mut count = 0; @@ -222,6 +222,27 @@ impl AttributeSegmentsWorkspace { /// assert_eq!(count, 2); /// ``` /// +/// Read the text covered by each segment through [`AttributedText::chunks`]: +/// +/// ``` +/// use attributed_text::{AttributeSegmentsWorkspace, AttributedText, TextRange}; +/// +/// let mut text = AttributedText::new("aé日z"); +/// text.apply_attribute(TextRange::new(text.text(), 1..6).unwrap(), "emphasis"); +/// +/// let mut workspace = AttributeSegmentsWorkspace::new(); +/// let mut segments = workspace.segments(&text); +/// +/// while let Some(range) = segments.next() { +/// let active_attrs: Vec<_> = segments.active_spans().iter().collect(); +/// for chunk in text.chunks(range) { +/// // Use `chunk.text()` with `active_attrs` for this segment. +/// assert!(!chunk.text().is_empty()); +/// } +/// assert!(active_attrs.len() <= 1); +/// } +/// ``` +/// /// # Implementation notes /// /// Indices are stored as `u32` to reduce memory footprint on 64-bit platforms. This caps @@ -327,7 +348,7 @@ pub struct ActiveSpansIter<'s, 'a, T: Debug + TextStorage, Attr: Debug> { } impl<'s, 'a, T: Debug + TextStorage, Attr: Debug> Iterator for ActiveSpansIter<'s, 'a, T, Attr> { - type Item = (&'a Range, &'a Attr); + type Item = (TextRange, &'a Attr); fn size_hint(&self) -> (usize, Option) { self.ids.size_hint() @@ -361,7 +382,7 @@ impl<'s, 'a, T: Debug + TextStorage, Attr: Debug> DoubleEndedIterator impl<'s, 'a, T: Debug + TextStorage, Attr: Debug> ActiveSpans<'s, 'a, T, Attr> { /// Iterate over the active spans in application order (ascending span id). /// - /// Each item is `(&Range, &Attr)`. + /// Each item is `(TextRange, &Attr)`. pub fn iter(&self) -> ActiveSpansIter<'_, 'a, T, Attr> { ActiveSpansIter { ids: self.active_ids.iter(), @@ -383,7 +404,7 @@ impl<'s, 'a, T: Debug + TextStorage, Attr: Debug> ActiveSpans<'s, 'a, T, Attr> { impl<'active, 's, 'a, T: Debug + TextStorage, Attr: Debug> IntoIterator for &'active ActiveSpans<'s, 'a, T, Attr> { - type Item = (&'a Range, &'a Attr); + type Item = (TextRange, &'a Attr); type IntoIter = ActiveSpansIter<'active, 'a, T, Attr>; fn into_iter(self) -> Self::IntoIter { @@ -396,6 +417,7 @@ mod tests { use super::*; use alloc::vec; use alloc::vec::Vec; + use core::ops::Range; #[derive(Debug, Clone, PartialEq, Eq)] enum Color { diff --git a/attributed_text/src/attributed_text.rs b/attributed_text/src/attributed_text.rs index e14acac28..7f3e218ea 100644 --- a/attributed_text/src/attributed_text.rs +++ b/attributed_text/src/attributed_text.rs @@ -12,7 +12,7 @@ use crate::{Error, TextChunk, TextRange, TextStorage}; #[derive(Debug)] pub struct AttributedText { text: T, - attributes: Vec<(Range, Attr)>, + attributes: Vec<(TextRange, Attr)>, } impl AttributedText { @@ -64,7 +64,7 @@ impl AttributedText { /// Apply an `attribute` to a validated [`TextRange`] within the text. #[inline] pub fn apply_attribute(&mut self, range: TextRange, attribute: Attr) { - self.attributes.push((range.into(), attribute)); + self.attributes.push((range, attribute)); } /// Apply an `attribute` to a byte range within the text. @@ -77,7 +77,8 @@ impl AttributedText { attribute: Attr, ) -> Result<(), Error> { validate_range(&self.text, &range)?; - self.attributes.push((range, attribute)); + self.attributes + .push((TextRange::new_unchecked(range.start, range.end), attribute)); Ok(()) } @@ -85,8 +86,8 @@ impl AttributedText { /// /// Attributes are yielded in the order they were applied. #[inline] - pub fn attributes_iter(&self) -> impl ExactSizeIterator, &Attr)> { - self.attributes.iter().map(|(range, attr)| (range, attr)) + pub fn attributes_iter(&self) -> impl ExactSizeIterator { + self.attributes.iter().map(|(range, attr)| (*range, attr)) } /// Get an iterator over the attributes (and their ranges) that apply at the given `index`. @@ -95,10 +96,10 @@ impl AttributedText { /// attributes, it just reports everything. /// /// This performs a full scan of all attributes on each call (`O(n)` in applied span count). - pub fn attributes_at(&self, index: usize) -> impl Iterator, &Attr)> { + pub fn attributes_at(&self, index: usize) -> impl Iterator { self.attributes.iter().filter_map(move |(attr_span, attr)| { - if attr_span.contains(&index) { - Some((attr_span, attr)) + if attr_span.contains(index) { + Some((*attr_span, attr)) } else { None } @@ -118,11 +119,11 @@ impl AttributedText { /// the exact covered subranges). pub fn attributes_for_range( &self, - range: Range, - ) -> impl Iterator, &Attr)> { + range: TextRange, + ) -> impl Iterator { self.attributes.iter().filter_map(move |(attr_span, attr)| { - if (attr_span.start < range.end) && (attr_span.end > range.start) { - Some((attr_span, attr)) + if attr_span.overlaps(range) { + Some((*attr_span, attr)) } else { None } @@ -136,10 +137,10 @@ impl AttributedText { /// Returns the `(range, attribute)` pair at the given insertion-order span index. #[inline] - pub(crate) fn attribute_at_idx(&self, index: usize) -> Option<(&Range, &Attr)> { + pub(crate) fn attribute_at_idx(&self, index: usize) -> Option<(TextRange, &Attr)> { self.attributes .get(index) - .map(|(range, attr)| (range, attr)) + .map(|(range, attr)| (*range, attr)) } /// Remove all applied attribute spans. @@ -160,6 +161,10 @@ mod tests { Remove, } + fn r(range: core::ops::Range) -> TextRange { + TextRange::new_unchecked(range.start, range.end) + } + #[test] fn attributes_at() { let t = "Hello!"; @@ -180,13 +185,13 @@ mod tests { // Index 2 is in both spans; returns ranges and attrs in application order. let at_2: Vec<_> = at.attributes_at(2).collect(); assert_eq!(at_2.len(), 2); - assert_eq!(at_2[0], (&(1..3), &TestAttribute::Keep)); - assert_eq!(at_2[1], (&(2..5), &TestAttribute::Remove)); + assert_eq!(at_2[0], (r(1..3), &TestAttribute::Keep)); + assert_eq!(at_2[1], (r(2..5), &TestAttribute::Remove)); // Index 4 is only in the second span. let at_4: Vec<_> = at.attributes_at(4).collect(); assert_eq!(at_4.len(), 1); - assert_eq!(at_4[0], (&(2..5), &TestAttribute::Remove)); + assert_eq!(at_4[0], (r(2..5), &TestAttribute::Remove)); } #[test] @@ -204,19 +209,25 @@ mod tests { ); // Range overlapping only the first span. - let r: Vec<_> = at.attributes_for_range(0..2).collect(); - assert_eq!(r.len(), 1); - assert_eq!(r[0], (&(1..3), &TestAttribute::Keep)); + let attrs: Vec<_> = at + .attributes_for_range(TextRange::new(at.text(), 0..2).unwrap()) + .collect(); + assert_eq!(attrs.len(), 1); + assert_eq!(attrs[0], (r(1..3), &TestAttribute::Keep)); // Range overlapping both spans. - let r: Vec<_> = at.attributes_for_range(2..5).collect(); - assert_eq!(r.len(), 2); - assert_eq!(r[0], (&(1..3), &TestAttribute::Keep)); - assert_eq!(r[1], (&(4..6), &TestAttribute::Remove)); + let attrs: Vec<_> = at + .attributes_for_range(TextRange::new(at.text(), 2..5).unwrap()) + .collect(); + assert_eq!(attrs.len(), 2); + assert_eq!(attrs[0], (r(1..3), &TestAttribute::Keep)); + assert_eq!(attrs[1], (r(4..6), &TestAttribute::Remove)); // Range between the two spans, overlapping neither. - let r: Vec<_> = at.attributes_for_range(3..4).collect(); - assert!(r.is_empty()); + let attrs: Vec<_> = at + .attributes_for_range(TextRange::new(at.text(), 3..4).unwrap()) + .collect(); + assert!(attrs.is_empty()); } #[test] diff --git a/attributed_text/src/text_range.rs b/attributed_text/src/text_range.rs index 92276a181..19ff90002 100644 --- a/attributed_text/src/text_range.rs +++ b/attributed_text/src/text_range.rs @@ -92,6 +92,20 @@ impl TextRange { self.start == self.end } + /// Returns `true` if `index` is contained in this range. + #[must_use] + #[inline] + pub const fn contains(self, index: usize) -> bool { + self.start <= index && index < self.end + } + + /// Returns `true` if this range and `other` overlap. + #[must_use] + #[inline] + pub const fn overlaps(self, other: Self) -> bool { + self.start < other.end && self.end > other.start + } + /// Returns this range as a `Range`. #[must_use] #[inline] From e9af9365b9a57554083080e12c7be84666752c06 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 8 May 2026 16:15:54 +0700 Subject: [PATCH 4/4] attributed_text: add combined segment view --- attributed_text/src/attribute_segments.rs | 84 ++++++++++++++++++++++- attributed_text/src/lib.rs | 2 +- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/attributed_text/src/attribute_segments.rs b/attributed_text/src/attribute_segments.rs index 14732cc7d..271af7dbe 100644 --- a/attributed_text/src/attribute_segments.rs +++ b/attributed_text/src/attribute_segments.rs @@ -234,7 +234,7 @@ impl AttributeSegmentsWorkspace { /// let mut segments = workspace.segments(&text); /// /// while let Some(range) = segments.next() { -/// let active_attrs: Vec<_> = segments.active_spans().iter().collect(); +/// let active_attrs = segments.active_spans(); /// for chunk in text.chunks(range) { /// // Use `chunk.text()` with `active_attrs` for this segment. /// assert!(!chunk.text().is_empty()); @@ -243,6 +243,26 @@ impl AttributeSegmentsWorkspace { /// } /// ``` /// +/// Or use [`AttributeSegments::next_segment`] to receive the range and active spans together: +/// +/// ``` +/// use attributed_text::{AttributeSegmentsWorkspace, AttributedText, TextRange}; +/// +/// let mut text = AttributedText::new("aé日z"); +/// text.apply_attribute(TextRange::new(text.text(), 1..6).unwrap(), "emphasis"); +/// +/// let mut workspace = AttributeSegmentsWorkspace::new(); +/// let mut segments = workspace.segments(&text); +/// +/// while let Some(segment) = segments.next_segment() { +/// let active_attrs = segment.active_spans(); +/// for chunk in text.chunks(segment.range()) { +/// assert!(!chunk.text().is_empty()); +/// } +/// assert!(active_attrs.len() <= 1); +/// } +/// ``` +/// /// # Implementation notes /// /// Indices are stored as `u32` to reduce memory footprint on 64-bit platforms. This caps @@ -287,6 +307,45 @@ impl<'w, 'a, T: Debug + TextStorage, Attr: Debug> AttributeSegments<'w, 'a, T, A attributed: self.attributed, } } + + /// Returns the next segment as a combined range and active-span view. + /// + /// This is a convenience wrapper around [`Iterator::next`] and [`Self::active_spans`]. + /// The returned segment borrows this iterator, so it must be dropped before requesting + /// another segment. + pub fn next_segment(&mut self) -> Option> { + let range = self.next()?; + Some(AttributeSegment { + range, + active_ids: &self.workspace.active, + attributed: self.attributed, + }) + } +} + +/// A range yielded by [`AttributeSegments`] with its active attribute spans. +#[derive(Clone, Debug)] +pub struct AttributeSegment<'s, 'a, T: Debug + TextStorage, Attr: Debug> { + range: TextRange, + active_ids: &'s [u32], + attributed: &'a AttributedText, +} + +impl<'s, 'a, T: Debug + TextStorage, Attr: Debug> AttributeSegment<'s, 'a, T, Attr> { + /// Returns the segment range. + #[must_use] + pub const fn range(&self) -> TextRange { + self.range + } + + /// Returns the spans active over this segment. + #[must_use] + pub const fn active_spans(&self) -> ActiveSpans<'s, 'a, T, Attr> { + ActiveSpans { + active_ids: self.active_ids, + attributed: self.attributed, + } + } } impl Iterator for AttributeSegments<'_, '_, T, Attr> { @@ -640,6 +699,29 @@ mod tests { } } + #[test] + fn next_segment_returns_range_and_active_spans_together() { + let mut at = AttributedText::new("abcd"); + at.apply_attribute(TextRange::new(at.text(), 1..3).unwrap(), Color::Red); + let mut workspace = AttributeSegmentsWorkspace::new(); + let mut segments = workspace.segments(&at); + + let segment = segments.next_segment().unwrap(); + assert_eq!(segment.range(), TextRange::new_unchecked(0, 1)); + assert!(segment.active_spans().is_empty()); + + let segment = segments.next_segment().unwrap(); + assert_eq!(segment.range(), TextRange::new_unchecked(1, 3)); + let active: Vec<_> = segment.active_spans().iter().map(|(_, c)| c).collect(); + assert_eq!(active, vec![&Color::Red]); + + let segment = segments.next_segment().unwrap(); + assert_eq!(segment.range(), TextRange::new_unchecked(3, 4)); + assert!(segment.active_spans().is_empty()); + + assert!(segments.next_segment().is_none()); + } + #[test] fn workspace_reuses_for_multiple_texts() { let mut workspace = AttributeSegmentsWorkspace::new(); diff --git a/attributed_text/src/lib.rs b/attributed_text/src/lib.rs index 2864d7788..e72c79aee 100644 --- a/attributed_text/src/lib.rs +++ b/attributed_text/src/lib.rs @@ -27,7 +27,7 @@ mod text_range; mod text_storage; pub use crate::attribute_segments::{ - ActiveSpans, ActiveSpansIter, AttributeSegments, AttributeSegmentsWorkspace, + ActiveSpans, ActiveSpansIter, AttributeSegment, AttributeSegments, AttributeSegmentsWorkspace, }; pub use crate::attributed_text::AttributedText; pub use crate::error::{BoundaryInfo, Endpoint, Error, ErrorKind};