1
0
mirror of https://github.com/emilk/egui.git synced 2026-06-26 22:53:14 -04:00

Add regression test for O(n²) word boundary scan (#8077)

## Summary

`next_word_boundary_char_index` calls `char_index_from_byte_index` on
every word-boundary segment. Since `char_index_from_byte_index` scans
from the start of the string each time, this makes the function O(n·m)
where m is the number of segments — effectively O(n²) for large texts.

The fix replaces those repeated scans with a running char counter
maintained as we iterate segments. Same results, O(n) instead of O(n²).

## The problem

Any pointer interaction (click, double-click, drag) inside a `TextEdit`
with a large text buffer triggers `pointer_interaction` →
`select_word_at` → `ccursor_previous_word` →
`next_word_boundary_char_index`. On a 500 KB buffer this takes ~30
seconds, freezing the application.

## Benchmark results

Measured with cursor near end of text (worst case):

| Text size | Before | After | Speedup |
|-----------|--------|-------|---------|
| 1 KB | 147 µs | 38 µs | 3.9x |
| 100 KB | 1.2 s | 3.8 ms | 313x |
| 500 KB | 30 s | 18 ms | 1,636x |

Benchmark source:
[`bench/word-boundary-perf`](https://github.com/hallyhaa/egui/tree/bench/word-boundary-perf)
(separate branch, not part of this PR)

## Changes

- `next_word_boundary_char_index`: replace `char_index_from_byte_index`
calls with a `running_ci` counter (the fix — 6 changed lines)
- New tests for `ccursor_previous_word`, `ccursor_next_word`,
`select_word_at`, and a large-text performance test

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hallvard Ystad
2026-04-14 20:59:15 +02:00
committed by lucasmerlin
parent 6778c0e1cc
commit 27aa63a520

View File

@@ -315,7 +315,7 @@ pub fn cursor_rect(galley: &Galley, cursor: &CCursor, row_height: f32) -> Rect {
#[cfg(test)]
mod test {
use crate::text_selection::text_cursor_state::next_word_boundary_char_index;
use super::*;
#[test]
fn test_next_word_boundary_char_index() {
@@ -352,6 +352,71 @@ mod test {
assert_eq!(next_word_boundary_char_index(text, 19), 20);
assert_eq!(next_word_boundary_char_index(text, 20), 21);
}
#[test]
fn test_previous_word() {
let text = "abc def ghi";
assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index, 4);
assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index, 4);
assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index, 0);
assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index, 0);
}
#[test]
fn test_next_word() {
let text = "abc def ghi";
assert_eq!(ccursor_next_word(text, CCursor::new(0)).index, 3);
assert_eq!(ccursor_next_word(text, CCursor::new(3)).index, 7);
assert_eq!(ccursor_next_word(text, CCursor::new(7)).index, 11);
assert_eq!(ccursor_next_word(text, CCursor::new(11)).index, 11);
}
#[test]
fn test_select_word_at() {
// CCursorRange::two(min, max) sets primary=max, secondary=min
let text = "hello world";
let range = select_word_at(text, CCursor::new(2));
let (lo, hi) = (
range.primary.index.min(range.secondary.index),
range.primary.index.max(range.secondary.index),
);
assert_eq!(lo, 0);
assert_eq!(hi, 5);
let range = select_word_at(text, CCursor::new(8));
let (lo, hi) = (
range.primary.index.min(range.secondary.index),
range.primary.index.max(range.secondary.index),
);
assert_eq!(lo, 6);
assert_eq!(hi, 11);
}
#[test]
fn test_word_boundary_large_text_performance() {
// Before the O(n²) → O(n) fix, this would take minutes on large text.
let large_text = "word ".repeat(200_000); // ~1MB
let len = large_text.chars().count();
let start = std::time::Instant::now();
let next = ccursor_next_word(&large_text, CCursor::new(len - 10));
assert!(next.index <= len);
let prev = ccursor_previous_word(&large_text, CCursor::new(len - 10));
assert!(prev.index < len);
let range = select_word_at(&large_text, CCursor::new(len - 3));
let lo = range.primary.index.min(range.secondary.index);
let hi = range.primary.index.max(range.secondary.index);
assert!(lo < hi, "Expected a non-empty word selection");
let elapsed = start.elapsed();
assert!(
elapsed.as_secs() < 5,
"Word boundary operations on 1MB text took {elapsed:?}, expected < 5s"
);
}
}
#[cfg(test)]