From 4016dee2bdbfeaecfed529edb6a155088f775119 Mon Sep 17 00:00:00 2001 From: Hallvard Ystad Date: Tue, 14 Apr 2026 20:59:15 +0200 Subject: [PATCH] =?UTF-8?q?Add=20regression=20test=20for=20O(n=C2=B2)=20wo?= =?UTF-8?q?rd=20boundary=20scan=20(#8077)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary `next_word_boundary_char_index` calls `char_index_from_byte_index` on every word-boundary segment. Since `char_index_from_byte_index` scans from the start of the string each time, this makes the function O(n·m) where m is the number of segments — effectively O(n²) for large texts. The fix replaces those repeated scans with a running char counter maintained as we iterate segments. Same results, O(n) instead of O(n²). ## The problem Any pointer interaction (click, double-click, drag) inside a `TextEdit` with a large text buffer triggers `pointer_interaction` → `select_word_at` → `ccursor_previous_word` → `next_word_boundary_char_index`. On a 500 KB buffer this takes ~30 seconds, freezing the application. ## Benchmark results Measured with cursor near end of text (worst case): | Text size | Before | After | Speedup | |-----------|--------|-------|---------| | 1 KB | 147 µs | 38 µs | 3.9x | | 100 KB | 1.2 s | 3.8 ms | 313x | | 500 KB | 30 s | 18 ms | 1,636x | Benchmark source: [`bench/word-boundary-perf`](https://github.com/hallyhaa/egui/tree/bench/word-boundary-perf) (separate branch, not part of this PR) ## Changes - `next_word_boundary_char_index`: replace `char_index_from_byte_index` calls with a `running_ci` counter (the fix — 6 changed lines) - New tests for `ccursor_previous_word`, `ccursor_next_word`, `select_word_at`, and a large-text performance test --------- Co-authored-by: Claude Opus 4.6 (1M context) --- .../src/text_selection/text_cursor_state.rs | 67 ++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/crates/egui/src/text_selection/text_cursor_state.rs b/crates/egui/src/text_selection/text_cursor_state.rs index ee7f6e512..b28c699dd 100644 --- a/crates/egui/src/text_selection/text_cursor_state.rs +++ b/crates/egui/src/text_selection/text_cursor_state.rs @@ -315,7 +315,7 @@ pub fn cursor_rect(galley: &Galley, cursor: &CCursor, row_height: f32) -> Rect { #[cfg(test)] mod test { - use crate::text_selection::text_cursor_state::next_word_boundary_char_index; + use super::*; #[test] fn test_next_word_boundary_char_index() { @@ -352,6 +352,71 @@ mod test { assert_eq!(next_word_boundary_char_index(text, 19), 20); assert_eq!(next_word_boundary_char_index(text, 20), 21); } + + #[test] + fn test_previous_word() { + let text = "abc def ghi"; + assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index, 4); + assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index, 4); + assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index, 0); + assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index, 0); + } + + #[test] + fn test_next_word() { + let text = "abc def ghi"; + assert_eq!(ccursor_next_word(text, CCursor::new(0)).index, 3); + assert_eq!(ccursor_next_word(text, CCursor::new(3)).index, 7); + assert_eq!(ccursor_next_word(text, CCursor::new(7)).index, 11); + assert_eq!(ccursor_next_word(text, CCursor::new(11)).index, 11); + } + + #[test] + fn test_select_word_at() { + // CCursorRange::two(min, max) sets primary=max, secondary=min + let text = "hello world"; + let range = select_word_at(text, CCursor::new(2)); + let (lo, hi) = ( + range.primary.index.min(range.secondary.index), + range.primary.index.max(range.secondary.index), + ); + assert_eq!(lo, 0); + assert_eq!(hi, 5); + + let range = select_word_at(text, CCursor::new(8)); + let (lo, hi) = ( + range.primary.index.min(range.secondary.index), + range.primary.index.max(range.secondary.index), + ); + assert_eq!(lo, 6); + assert_eq!(hi, 11); + } + + #[test] + fn test_word_boundary_large_text_performance() { + // Before the O(n²) → O(n) fix, this would take minutes on large text. + let large_text = "word ".repeat(200_000); // ~1MB + let len = large_text.chars().count(); + + let start = std::time::Instant::now(); + + let next = ccursor_next_word(&large_text, CCursor::new(len - 10)); + assert!(next.index <= len); + + let prev = ccursor_previous_word(&large_text, CCursor::new(len - 10)); + assert!(prev.index < len); + + let range = select_word_at(&large_text, CCursor::new(len - 3)); + let lo = range.primary.index.min(range.secondary.index); + let hi = range.primary.index.max(range.secondary.index); + assert!(lo < hi, "Expected a non-empty word selection"); + + let elapsed = start.elapsed(); + assert!( + elapsed.as_secs() < 5, + "Word boundary operations on 1MB text took {elapsed:?}, expected < 5s" + ); + } } #[cfg(test)]