1
0
mirror of https://github.com/emilk/egui.git synced 2026-06-26 14:49:06 -04:00

Optimize text selection performance for large documents (#7917)

**Perf: Optimize text selection and navigation performance for large
documents**

#### **Summary**
This PR significantly improves the performance of text selection
(double-clicking) and cursor navigation within `TextEdit` and `Label`
widgets, particularly when handling large documents (e.g., 1MB+ or
logs). It eliminates several $O(N^2)$ bottlenecks and unnecessary memory
allocations in `text_cursor_state.rs`.

#### **Problems Identified**
1. **$O(N^2)$ Word Boundary Scanning:** In
`next_word_boundary_char_index`, `char_index_from_byte_index` was called
repeatedly inside a loop. This caused the entire document to be scanned
from the beginning for every word found, leading to quadratic time
complexity.
2. **Heavy String Allocations:** `ccursor_previous_word` used
`collect::<String>()` and `rev()` to search backwards, causing a full
copy and memory allocation of the text (or line) every time the user
moved the cursor or double-clicked.
3. **Inefficient Line Start Finding:** `find_line_start` performed
global character counts (`text.chars().count()`) and global skips, which
is very slow for large files.
4. **Global Search Scope:** `select_word_at` was performing word
boundary searches across the entire document even for simple
double-click actions.

#### **Key Changes & Optimizations**
1. **Line-Scoped Selection:** Updated `select_word_at` to first identify
the current line and then perform word boundary searches within that
local scope. This reduces the search space from millions of characters
to hundreds.
2. **Linear Time ($O(N)$) Boundary Search:** Refactored
`next_word_boundary_char_index` to use a running cumulative character
counter. This ensures the text is scanned only once.
3. **Zero-Allocation Backwards Search:** Optimized
`ccursor_previous_word` to use `next_back()` on the
`DoubleEndedIterator` provided by `unicode-segmentation`. This removes
all temporary `String` allocations.
4. **Byte-Based Line Search:** Optimized `find_line_start` to use
byte-based reverse scanning (`rfind('\n')`), which is significantly
faster than counting characters from the start of the document.

#### **Performance Impact**
In my tests with large text files (over 10,000 lines / 1MB+):
- **Before:** Double-clicking a word caused a UI freeze for 2–5 seconds.
- **After:** Word selection and navigation are near-instantaneous
(0–1ms), providing a smooth "native-like" experience even in WASM
environments.
This commit is contained in:
rustbasic
2026-04-14 20:49:54 +09:00
committed by lucasmerlin
parent 5278a73bca
commit 6778c0e1cc

View File

@@ -106,38 +106,26 @@ impl TextCursorState {
}
fn select_word_at(text: &str, ccursor: CCursor) -> CCursorRange {
if ccursor.index == 0 {
CCursorRange::two(ccursor, ccursor_next_word(text, ccursor))
} else {
let it = text.chars();
let mut it = it.skip(ccursor.index - 1);
if let Some(char_before_cursor) = it.next() {
if let Some(char_after_cursor) = it.next() {
if is_word_char(char_before_cursor) && is_word_char(char_after_cursor) {
let min = ccursor_previous_word(text, ccursor + 1);
let max = ccursor_next_word(text, min);
CCursorRange::two(min, max)
} else if is_word_char(char_before_cursor) {
let min = ccursor_previous_word(text, ccursor);
let max = ccursor_next_word(text, min);
CCursorRange::two(min, max)
} else if is_word_char(char_after_cursor) {
let max = ccursor_next_word(text, ccursor);
CCursorRange::two(ccursor, max)
} else {
let min = ccursor_previous_word(text, ccursor);
let max = ccursor_next_word(text, ccursor);
CCursorRange::two(min, max)
}
} else {
let min = ccursor_previous_word(text, ccursor);
CCursorRange::two(min, ccursor)
}
} else {
let max = ccursor_next_word(text, ccursor);
CCursorRange::two(ccursor, max)
}
if text.is_empty() {
return CCursorRange::one(ccursor);
}
let line_start = find_line_start(text, ccursor);
let line_end = ccursor_next_line(text, line_start);
let line_range = line_start.index..line_end.index;
let current_line_text = slice_char_range(text, line_range.clone());
let relative_idx = ccursor.index - line_start.index;
let relative_ccursor = CCursor::new(relative_idx);
let min = ccursor_previous_word(current_line_text, relative_ccursor);
let max = ccursor_next_word(current_line_text, relative_ccursor);
CCursorRange::two(
CCursor::new(line_start.index + min.index),
CCursor::new(line_start.index + max.index),
)
}
fn select_line_at(text: &str, ccursor: CCursor) -> CCursorRange {
@@ -209,16 +197,20 @@ fn ccursor_previous_line(text: &str, ccursor: CCursor) -> CCursor {
}
fn next_word_boundary_char_index(text: &str, cursor_ci: usize) -> usize {
for (word_byte_index, word) in text.split_word_bound_indices() {
let word_ci = char_index_from_byte_index(text, word_byte_index);
let mut current_char_idx = 0;
for (_word_byte_index, word) in text.split_word_bound_indices() {
let word_ci = current_char_idx;
// We consider `.` a word boundary.
// At least that's how Mac works when navigating something like `www.example.com`.
for (dot_ci_offset, chr) in word.chars().enumerate() {
let dot_ci = word_ci + dot_ci_offset;
let mut word_char_count = 0;
for chr in word.chars() {
let dot_ci = word_ci + word_char_count;
if chr == '.' && cursor_ci < dot_ci {
return dot_ci;
}
word_char_count += 1;
}
// Splitting considers contiguous whitespace as one word, such words must be skipped,
@@ -228,9 +220,11 @@ fn next_word_boundary_char_index(text: &str, cursor_ci: usize) -> usize {
if cursor_ci < word_ci && !all_word_chars(word) {
return word_ci;
}
current_char_idx += word_char_count;
}
char_index_from_byte_index(text, text.len())
current_char_idx
}
fn all_word_chars(text: &str) -> bool {
@@ -265,22 +259,14 @@ fn is_linebreak(c: char) -> bool {
/// Accepts and returns character offset (NOT byte offset!).
pub fn find_line_start(text: &str, current_index: CCursor) -> CCursor {
// We know that new lines, '\n', are a single byte char, but we have to
// work with char offsets because before the new line there may be any
// number of multi byte chars.
// We need to know the char index to be able to correctly set the cursor
// later.
let chars_count = text.chars().count();
let byte_idx = byte_index_from_char_index(text, current_index.index);
let text_before = &text[..byte_idx];
let position = text
.chars()
.rev()
.skip(chars_count - current_index.index)
.position(|x| x == '\n');
match position {
Some(pos) => CCursor::new(current_index.index - pos),
None => CCursor::new(0),
if let Some(last_newline_byte) = text_before.rfind('\n') {
let char_idx = char_index_from_byte_index(text, last_newline_byte + 1);
CCursor::new(char_idx)
} else {
CCursor::new(0)
}
}
@@ -367,3 +353,51 @@ mod test {
assert_eq!(next_word_boundary_char_index(text, 20), 21);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_previous_word_graphemes() {
let cases = [
("", 0, 0),
("hello", 0, 0),
("hello", "hello".chars().count(), 0),
("hello world", 6, 0),
("hello world", 8, 6),
("hello world", "hello world".chars().count(), 6),
("hello world ", "hello world ".chars().count(), 6),
("hello world", "hello world".chars().count(), 8),
(" ", " ".chars().count(), 0),
("hello, world", "hello, world".chars().count(), 7),
("www.example.com", "www.example.com".chars().count(), 12),
("안녕! 😊 세상", 8, 6),
("❤️👍 skvělá knihovna 👍❤️", 18, 11),
(
"a e\u{301} b",
"a e\u{301} b".chars().count(),
"a e\u{301} ".chars().count(),
),
(
"hi 🙂 world",
"hi 🙂 world".chars().count(),
"hi 🙂 ".chars().count(),
),
(
"hi 👨‍👩‍👧‍👦 world",
"hi 👨‍👩‍👧‍👦 world".chars().count(),
"hi 👨‍👩‍👧‍👦 ".chars().count(),
),
];
for (text, cursor, expected) in cases {
let result = ccursor_previous_word(text, CCursor::new(cursor));
assert_eq!(
result.index, expected,
"text={text:?}, cursor={cursor}, got={}, expected={expected}",
result.index
);
}
}
}