From 40e84a4c0cf20deecb9e99d43063020695fe21de Mon Sep 17 00:00:00 2001 From: Mihnea-Teodor Stoica Date: Fri, 24 Apr 2026 18:46:15 +0300 Subject: [PATCH 1/3] Fix zero-width regex replace all (#775) --- crates/edit/src/buffer/mod.rs | 93 ++++++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 8 deletions(-) diff --git a/crates/edit/src/buffer/mod.rs b/crates/edit/src/buffer/mod.rs index 4ec01f103a7..53138aae9fe 100644 --- a/crates/edit/src/buffer/mod.rs +++ b/crates/edit/src/buffer/mod.rs @@ -1175,7 +1175,7 @@ impl TextBuffer { replacement: &[u8], ) -> icu::Result<()> { // Editors traditionally replace the previous search hit, not the next possible one. - if let (Some(search), Some(..)) = (&self.search, &self.selection) { + if let Some(search) = &self.search { let search = unsafe { &mut *search.get() }; if search.selection_generation == self.selection_generation { let scratch = scratch_arena(None); @@ -1203,15 +1203,23 @@ impl TextBuffer { let parsed_replacements = Self::find_parse_replacement(&scratch, &mut search, replacement); loop { - self.find_select_next(&mut search, offset, false); - if !self.has_selection() { - break; - } + let Some(range) = self.find_select_next(&mut search, offset, false) else { break }; let replacement = self.find_fill_replacement(&mut search, replacement, &parsed_replacements); self.write(&replacement, self.cursor, true); - offset = self.cursor.offset; + + if range.is_empty() { + let next = self + .cursor_move_delta_internal(self.cursor, CursorMovement::Grapheme, 1) + .offset; + if next <= self.cursor.offset { + break; + } + offset = next; + } else { + offset = self.cursor.offset; + } } Ok(()) @@ -1277,7 +1285,12 @@ impl TextBuffer { }) } - fn find_select_next(&mut self, search: &mut ActiveSearch, offset: usize, wrap: bool) { + fn find_select_next( + &mut self, + search: &mut ActiveSearch, + offset: usize, + wrap: bool, + ) -> Option> { if search.buffer_generation != self.buffer.generation() { unsafe { search.regex.set_text(&mut search.text, offset) }; search.buffer_generation = self.buffer.generation(); @@ -1297,7 +1310,7 @@ impl TextBuffer { hit = search.regex.next(); } - search.selection_generation = if let Some(range) = hit { + search.selection_generation = if let Some(range) = hit.clone() { // Now the search offset is no more at the start of the buffer. search.next_search_offset = range.end; @@ -1316,6 +1329,8 @@ impl TextBuffer { search.no_matches = true; self.set_selection(None) }; + + hit } fn find_parse_replacement<'a>( @@ -3086,3 +3101,65 @@ fn detect_bom(bytes: &[u8]) -> Option<&'static str> { } None } + +#[cfg(test)] +mod tests { + use super::{SearchOptions, TextBuffer}; + + fn buffer_contents(buf: &TextBuffer) -> Vec { + let mut out = Vec::with_capacity(buf.text_length()); + let mut off = 0; + + while off < buf.text_length() { + let chunk = buf.read_forward(off); + out.extend_from_slice(chunk); + off += chunk.len(); + } + + out + } + + #[test] + fn replace_all_supports_zero_width_regex_matches() { + let mut buf = TextBuffer::new(true).unwrap(); + buf.write_raw(b"hello\nworld"); + + buf.find_and_replace_all( + "$", + SearchOptions { + use_regex: true, + ..Default::default() + }, + b"foo", + ) + .unwrap(); + + assert_eq!(buffer_contents(&buf), b"hellofoo\nworldfoo".to_vec()); + } + + #[test] + fn replace_supports_zero_width_regex_matches() { + let mut buf = TextBuffer::new(true).unwrap(); + buf.write_raw(b"hello"); + buf.find_and_select( + "$", + SearchOptions { + use_regex: true, + ..Default::default() + }, + ) + .unwrap(); + + buf.find_and_replace( + "$", + SearchOptions { + use_regex: true, + ..Default::default() + }, + b"foo", + ) + .unwrap(); + + assert_eq!(buffer_contents(&buf), b"hellofoo".to_vec()); + } +} From 422244a8a6d92a3c8597310a5f1e3073ced1a1ff Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 27 Apr 2026 20:20:34 +0200 Subject: [PATCH 2/3] Fix implementation --- crates/edit/src/buffer/mod.rs | 130 ++++++++++++++++++---------------- crates/edit/src/icu.rs | 40 ++++------- 2 files changed, 81 insertions(+), 89 deletions(-) diff --git a/crates/edit/src/buffer/mod.rs b/crates/edit/src/buffer/mod.rs index 7744052a759..aaebb0a8701 100644 --- a/crates/edit/src/buffer/mod.rs +++ b/crates/edit/src/buffer/mod.rs @@ -1152,15 +1152,15 @@ impl TextBuffer { // If the user moved the cursor since the last search, but the needle remained the same, // we still need to move the start of the search to the new cursor position. - let next_search_offset = match self.selection { - Some(TextBufferSelection { beg, end }) => { - if self.selection_generation == search.selection_generation { + let next_search_offset = if self.selection_generation == search.selection_generation { search.next_search_offset } else { + match self.selection { + Some(TextBufferSelection { beg, end }) => { self.cursor_move_to_logical_internal(self.cursor, beg.min(end)).offset - } } _ => self.cursor.offset, + } }; self.find_select_next(search, next_search_offset, true); @@ -1179,11 +1179,19 @@ impl TextBuffer { let search = unsafe { &mut *search.get() }; if search.selection_generation == self.selection_generation { let scratch = scratch_arena(None); + let zero_width = self.selection.is_none(); let parsed_replacements = Self::find_parse_replacement(&scratch, &mut *search, replacement); let replacement = self.find_fill_replacement(&mut *search, replacement, &parsed_replacements); - self.write(&replacement, self.cursor, true); + self.write_raw(&replacement); + + // After replacing a zero-width match, advance past it so that find_and_select wraps to the + // next match rather than finding the same anchor (e.g. `$`) again at the same line end. + if zero_width { + search.next_search_offset = + self.find_advance_past_zero_width(self.active_edit_off).unwrap_or(0); + } } } @@ -1197,34 +1205,48 @@ impl TextBuffer { options: SearchOptions, replacement: &[u8], ) -> icu::Result<()> { + self.edit_begin_grouping(); + let scratch = scratch_arena(None); let mut search = self.find_construct_search(pattern, options)?; let mut offset = 0; let parsed_replacements = Self::find_parse_replacement(&scratch, &mut search, replacement); - loop { - let Some(range) = self.find_select_next(&mut search, offset, false) else { break }; - + while let Some(range) = self.find_select_next(&mut search, offset, false) { let replacement = self.find_fill_replacement(&mut search, replacement, &parsed_replacements); - self.write(&replacement, self.cursor, true); + self.write_raw(&replacement); + // The `active_edit_off` points to the end of the last edit made by `write_raw()`. + // This differs from the self.cursor.offset, if `write_raw()` did an `insert_final_newline`. + offset = self.active_edit_off; + + // Avoid infinite loops when hitting zero-length matches + // by advancing past the zero-length match location. + // + // This is technically not entirely correct. For instance imagine replacing + // "^|f" with "x" in "foo". It should technically produce "xxoo", but I + // found that other editors also do it wrong, so it can't matter too much. if range.is_empty() { - let next = self - .cursor_move_delta_internal(self.cursor, CursorMovement::Grapheme, 1) - .offset; - if next <= self.cursor.offset { - break; - } - offset = next; - } else { - offset = self.cursor.offset; + offset = match self.find_advance_past_zero_width(offset) { + Some(next) => next, + None => break, + }; } } + self.edit_end_grouping(); Ok(()) } + /// After replacing a zero-width match, compute the offset to resume + /// searching from. Returns `None` if we're at the end of the buffer. + fn find_advance_past_zero_width(&self, offset: usize) -> Option { + let cursor = self.cursor_move_to_offset_internal(self.cursor, offset); + let next = self.cursor_move_delta_internal(cursor, CursorMovement::Grapheme, 1); + (next.offset > offset).then_some(next.offset) + } + fn find_construct_search( &self, pattern: &str, @@ -1310,7 +1332,7 @@ impl TextBuffer { hit = search.regex.next(); } - search.selection_generation = if let Some(range) = hit.clone() { + search.selection_generation = if let Some(range) = &hit { // Now the search offset is no more at the start of the buffer. search.next_search_offset = range.end; @@ -3115,60 +3137,46 @@ fn detect_bom(bytes: &[u8]) -> Option<&'static str> { mod tests { use super::{SearchOptions, TextBuffer}; - fn buffer_contents(buf: &TextBuffer) -> Vec { - let mut out = Vec::with_capacity(buf.text_length()); - let mut off = 0; - - while off < buf.text_length() { - let chunk = buf.read_forward(off); - out.extend_from_slice(chunk); - off += chunk.len(); - } - - out + fn buffer_contents(buf: &mut TextBuffer) -> String { + let mut str = String::new(); + buf.save_as_string(&mut str); + str } #[test] - fn replace_all_supports_zero_width_regex_matches() { - let mut buf = TextBuffer::new(true).unwrap(); - buf.write_raw(b"hello\nworld"); + fn replace_one_zero_width() { + let mut buf = TextBuffer::new(false).unwrap(); + buf.set_crlf(false); + buf.set_insert_final_newline(true); + buf.write_raw(b"a\nb\n"); + buf.cursor_move_to_logical(Default::default()); - buf.find_and_replace_all( - "$", - SearchOptions { - use_regex: true, - ..Default::default() - }, - b"foo", - ) - .unwrap(); + for _ in 0..6 { + buf.find_and_replace( + "$", + SearchOptions { use_regex: true, ..Default::default() }, + b"x", + ) + .unwrap(); + } - assert_eq!(buffer_contents(&buf), b"hellofoo\nworldfoo".to_vec()); + assert_eq!(buffer_contents(&mut buf), "axx\nbxx\nx\n"); } #[test] - fn replace_supports_zero_width_regex_matches() { - let mut buf = TextBuffer::new(true).unwrap(); - buf.write_raw(b"hello"); - buf.find_and_select( - "$", - SearchOptions { - use_regex: true, - ..Default::default() - }, - ) - .unwrap(); + fn replace_all_zero_width() { + let mut buf = TextBuffer::new(false).unwrap(); + buf.set_crlf(false); + buf.set_insert_final_newline(true); + buf.write_raw(b"a\nb\n"); - buf.find_and_replace( + buf.find_and_replace_all( "$", - SearchOptions { - use_regex: true, - ..Default::default() - }, - b"foo", + SearchOptions { use_regex: true, ..Default::default() }, + b"x", ) .unwrap(); - assert_eq!(buffer_contents(&buf), b"hellofoo".to_vec()); + assert_eq!(buffer_contents(&mut buf), "ax\nbx\nx\n"); } } diff --git a/crates/edit/src/icu.rs b/crates/edit/src/icu.rs index 3b070e5b859..8da282b2bca 100644 --- a/crates/edit/src/icu.rs +++ b/crates/edit/src/icu.rs @@ -8,6 +8,7 @@ use std::ffi::{CStr, c_char}; use std::mem::MaybeUninit; use std::ops::Range; use std::ptr::{null, null_mut}; +use std::sync::OnceLock; use std::{fmt, mem}; use stdext::arena::{Arena, scratch_arena}; @@ -993,28 +994,18 @@ const LIBICUI18N_PROC_NAMES: [*const c_char; 12] = [ proc_name!("uregex_end64"), ]; -enum LibraryFunctionsState { - Uninitialized, - Failed, - Loaded(LibraryFunctions), -} - -static mut LIBRARY_FUNCTIONS: LibraryFunctionsState = LibraryFunctionsState::Uninitialized; +static LIBRARY_FUNCTIONS: OnceLock> = OnceLock::new(); pub fn init() -> Result<()> { init_if_needed()?; Ok(()) } -#[allow(static_mut_refs)] fn init_if_needed() -> Result<&'static LibraryFunctions> { - #[cold] - fn load() { + fn load() -> Option { unsafe { - LIBRARY_FUNCTIONS = LibraryFunctionsState::Failed; - let Ok(icu) = sys::load_icu() else { - return; + return None; }; type TransparentFunction = unsafe extern "C" fn() -> *const (); @@ -1058,7 +1049,7 @@ fn init_if_needed() -> Result<&'static LibraryFunctions> { "Failed to load ICU function: {:?}", CStr::from_ptr(name) ); - return; + return None; }; ptr.write(func); @@ -1066,27 +1057,20 @@ fn init_if_needed() -> Result<&'static LibraryFunctions> { } } - LIBRARY_FUNCTIONS = LibraryFunctionsState::Loaded(funcs.assume_init()); - } - } - - unsafe { - if matches!(&LIBRARY_FUNCTIONS, LibraryFunctionsState::Uninitialized) { - load(); + Some(funcs.assume_init()) } } - match unsafe { &LIBRARY_FUNCTIONS } { - LibraryFunctionsState::Loaded(f) => Ok(f), - _ => Err(ICU_MISSING_ERROR), + match LIBRARY_FUNCTIONS.get_or_init(load) { + Some(f) => Ok(f), + None => Err(ICU_MISSING_ERROR), } } -#[allow(static_mut_refs)] fn assume_loaded() -> &'static LibraryFunctions { - match unsafe { &LIBRARY_FUNCTIONS } { - LibraryFunctionsState::Loaded(f) => f, - _ => unreachable!(), + match LIBRARY_FUNCTIONS.get() { + Some(Some(f)) => f, + _ => unsafe { std::hint::unreachable_unchecked() }, } } From 88bf5a6cc271c4022578b13466398535086f4a7a Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 27 Apr 2026 23:14:47 +0200 Subject: [PATCH 3/3] Format --- crates/edit/src/buffer/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/edit/src/buffer/mod.rs b/crates/edit/src/buffer/mod.rs index aaebb0a8701..9f57eabe415 100644 --- a/crates/edit/src/buffer/mod.rs +++ b/crates/edit/src/buffer/mod.rs @@ -1153,13 +1153,13 @@ impl TextBuffer { // If the user moved the cursor since the last search, but the needle remained the same, // we still need to move the start of the search to the new cursor position. let next_search_offset = if self.selection_generation == search.selection_generation { - search.next_search_offset - } else { + search.next_search_offset + } else { match self.selection { Some(TextBufferSelection { beg, end }) => { self.cursor_move_to_logical_internal(self.cursor, beg.min(end)).offset - } - _ => self.cursor.offset, + } + _ => self.cursor.offset, } };