Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 139 additions & 40 deletions attributed_text/src/attribute_segments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,19 @@ use crate::AttributedText;
use crate::TextRange;
use crate::TextStorage;

fn build_segment_state<T: Debug + TextStorage, Attr: Debug>(
attributed: &AttributedText<T, Attr>,
fn build_segment_state_from_ranges<I>(
len: usize,
attr_count: usize,
ranges: I,
workspace: &mut AttributeSegmentsWorkspace,
) {
let len = attributed.len();
) where
I: IntoIterator<Item = (usize, TextRange)>,
{
debug_assert!(
len <= u32::MAX as usize,
"attributed_text currently supports texts up to u32::MAX bytes (got {len})"
);
let len_u32 = u32::try_from(len).expect("validated by debug_assert above");
let attr_count = attributed.attributes_len();
debug_assert!(
attr_count <= u32::MAX as usize,
"attributed_text currently supports up to u32::MAX span attributes (got {attr_count})"
Expand All @@ -35,11 +37,26 @@ fn build_segment_state<T: Debug + TextStorage, Attr: Debug>(
.reserve(2 + attr_count.saturating_mul(2));
workspace.boundaries.push(0);
workspace.boundaries.push(len_u32);
for (range, _) in attributed.attributes_iter() {
workspace.span_build.clear();
workspace.span_build.reserve(attr_count);

for (attr_index, range) in ranges {
debug_assert!(
range.end() <= len,
"attribute range end {} exceeds text length {len}",
range.end()
);
let start_u32 = u32::try_from(range.start()).expect("range start should fit in u32");
let end_u32 = u32::try_from(range.end()).expect("range end should fit in u32");
workspace.boundaries.push(start_u32);
workspace.boundaries.push(end_u32);
if !range.is_empty() {
workspace.span_build.push((
u32::try_from(attr_index).expect("attribute index overflow"),
start_u32,
end_u32,
));
}
}
workspace.boundaries.sort_unstable();
workspace.boundaries.dedup();
Expand All @@ -50,32 +67,23 @@ fn build_segment_state<T: Debug + TextStorage, Attr: Debug>(
workspace.start_counts.resize(boundary_count, 0);
workspace.end_counts.clear();
workspace.end_counts.resize(boundary_count, 0);
workspace.span_build.clear();
workspace.span_build.reserve(attr_count);

for (attr_index, (range, _)) in attributed.attributes_iter().enumerate() {
if range.is_empty() {
continue;
}
let start_u32 = u32::try_from(range.start()).expect("range start should fit in u32");
let end_u32 = u32::try_from(range.end()).expect("range end should fit in u32");
for (_attr_index, start_u32, end_u32) in &mut workspace.span_build {
let start_boundary = workspace
.boundaries
.binary_search(&start_u32)
.binary_search(start_u32)
.expect("attribute boundary start should be in boundary list");
let end_boundary = workspace
.boundaries
.binary_search(&end_u32)
.binary_search(end_u32)
.expect("attribute boundary end should be in boundary list");
if start_boundary == end_boundary {
continue;
}
debug_assert_ne!(
start_boundary, end_boundary,
"non-empty attributes should span at least one boundary interval"
);

workspace.span_build.push((
u32::try_from(attr_index).expect("attribute index overflow"),
u32::try_from(start_boundary).expect("start boundary index overflow"),
u32::try_from(end_boundary).expect("end boundary index overflow"),
));
*start_u32 = u32::try_from(start_boundary).expect("start boundary index overflow");
*end_u32 = u32::try_from(end_boundary).expect("end boundary index overflow");
workspace.start_counts[start_boundary] += 1;
workspace.end_counts[end_boundary] += 1;
}
Expand Down Expand Up @@ -137,6 +145,40 @@ fn build_segment_state<T: Debug + TextStorage, Attr: Debug>(
}
}

fn build_segment_state<T: Debug + TextStorage, Attr: Debug>(
attributed: &AttributedText<T, Attr>,
workspace: &mut AttributeSegmentsWorkspace,
) {
build_segment_state_from_ranges(
attributed.len(),
attributed.attributes_len(),
attributed
.attributes_iter()
.enumerate()
.map(|(index, (range, _attr))| (index, range)),
workspace,
);
}

fn update_active_for_boundary(workspace: &mut AttributeSegmentsWorkspace, boundary_index: usize) {
let end_range = workspace.end_offsets[boundary_index] as usize
..workspace.end_offsets[boundary_index + 1] as usize;
for &id in &workspace.end_events[end_range] {
if let Ok(ix) = workspace.active.binary_search(&id) {
workspace.active.remove(ix);
}
}

let start_range = workspace.start_offsets[boundary_index] as usize
..workspace.start_offsets[boundary_index + 1] as usize;
for &id in &workspace.start_events[start_range] {
match workspace.active.binary_search(&id) {
Ok(_) => {}
Err(ix) => workspace.active.insert(ix, id),
}
}
}

/// Reusable allocation workspace for attribute segmentation.
///
/// Reusing a workspace amortizes setup allocations when processing many pieces of text.
Expand Down Expand Up @@ -172,6 +214,51 @@ impl AttributeSegmentsWorkspace {
index: 0,
}
}

/// Calls `f` for each segment produced from an unchecked attribute span slice.
///
/// `text_len` is the length in bytes of the text that `spans` must
/// belong to. Spans are interpreted in slice order, which is the
/// application order reported through `active_span_indices`.
///
/// This method does not validate span ranges in release builds. Callers
/// must only pass ranges that are valid for the text identified by
/// `text_len`, including UTF-8 boundary alignment and bounds within
/// `text_len`.
///
/// `active_span_indices` contains zero-based indices into `spans`, sorted
/// in application order. The slice is only valid until `f` returns.
pub fn for_each_span_segment_unchecked<Attr, F>(
&mut self,
text_len: usize,
spans: &[(TextRange, Attr)],
mut f: F,
) where
F: FnMut(TextRange, &[u32]),
{
build_segment_state_from_ranges(
text_len,
spans.len(),
spans
.iter()
.enumerate()
.map(|(index, (range, _attr))| (index, *range)),
self,
);

let mut index = 0;
while index + 1 < self.boundaries.len() {
update_active_for_boundary(self, index);
let start = self.boundaries[index] as usize;
let end = self.boundaries[index + 1] as usize;
index += 1;
debug_assert!(start < end, "boundaries are sorted + deduped");

f(TextRange::new_unchecked(start, end), &self.active);
}

self.active.clear();
}
}

/// Iterator over contiguous attribute segments produced from an [`AttributedText`].
Expand Down Expand Up @@ -279,22 +366,7 @@ pub struct AttributeSegments<'w, 'a, T: Debug + TextStorage, Attr: Debug> {

impl<'w, 'a, T: Debug + TextStorage, Attr: Debug> AttributeSegments<'w, 'a, T, Attr> {
fn update_active_for_boundary(&mut self, boundary_index: usize) {
let end_range = self.workspace.end_offsets[boundary_index] as usize
..self.workspace.end_offsets[boundary_index + 1] as usize;
for &id in &self.workspace.end_events[end_range] {
if let Ok(ix) = self.workspace.active.binary_search(&id) {
self.workspace.active.remove(ix);
}
}

let start_range = self.workspace.start_offsets[boundary_index] as usize
..self.workspace.start_offsets[boundary_index + 1] as usize;
for &id in &self.workspace.start_events[start_range] {
match self.workspace.active.binary_search(&id) {
Ok(_) => {}
Err(ix) => self.workspace.active.insert(ix, id),
}
}
update_active_for_boundary(self.workspace, boundary_index);
}

/// Returns the spans active for the most recently yielded segment.
Expand Down Expand Up @@ -507,6 +579,33 @@ mod tests {
assert_eq!(segments.next(), None);
}

#[test]
fn unchecked_span_segments_match_attributed_text_segments() {
let mut at = AttributedText::new("hello");
at.apply_attribute(TextRange::new(at.text(), 0..2).unwrap(), Color::Red);
at.apply_attribute(TextRange::new(at.text(), 1..5).unwrap(), Color::Blue);

let spans = vec![
(TextRange::new(at.text(), 0..2).unwrap(), Color::Red),
(TextRange::new(at.text(), 1..5).unwrap(), Color::Blue),
];

let mut workspace = AttributeSegmentsWorkspace::new();
let mut segments = Vec::new();
workspace.for_each_span_segment_unchecked(at.len(), &spans, |range, active| {
segments.push((range, active.to_vec()));
});

assert_eq!(
segments,
vec![
(TextRange::new_unchecked(0, 1), vec![0]),
(TextRange::new_unchecked(1, 2), vec![0, 1]),
(TextRange::new_unchecked(2, 5), vec![1]),
]
);
}

#[test]
fn size_hint_tracks_remaining_segments() {
let mut at = AttributedText::new("hello");
Expand Down
31 changes: 31 additions & 0 deletions attributed_text/src/attributed_text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ impl<T: Debug + TextStorage, Attr: Debug> AttributedText<T, Attr> {
}
}

/// Create an `AttributedText` from text and unchecked attribute spans.
///
/// Attributes are retained in the order provided. This constructor does
/// not validate spans in release builds. Callers are responsible for
/// ensuring each [`TextRange`] is valid for `text`. Prefer
/// [`Self::apply_attribute_bytes`] or [`TextRange::new`] unless the spans
/// have already been checked against this exact text storage.
pub fn from_attributes_unchecked(text: T, attributes: Vec<(TextRange, Attr)>) -> Self {
Self { text, attributes }
}

/// Borrow the underlying text storage.
pub fn text(&self) -> &T {
&self.text
Expand Down Expand Up @@ -194,6 +205,26 @@ mod tests {
assert_eq!(at_4[0], (r(2..5), &TestAttribute::Remove));
}

#[test]
fn from_attributes_unchecked_retains_attribute_order() {
let at = AttributedText::from_attributes_unchecked(
"Hello!",
vec![
(TextRange::new_unchecked(1, 3), TestAttribute::Keep),
(TextRange::new_unchecked(2, 5), TestAttribute::Remove),
],
);

let attrs = at.attributes_at(2).collect::<Vec<_>>();
assert_eq!(
attrs,
vec![
(TextRange::new_unchecked(1, 3), &TestAttribute::Keep),
(TextRange::new_unchecked(2, 5), &TestAttribute::Remove),
]
);
}

#[test]
fn attributes_for_range() {
let t = "Hello!";
Expand Down
Loading