diff --git a/crates/egui/src/data/output.rs b/crates/egui/src/data/output.rs index 4793fa8a0..5808ccabc 100644 --- a/crates/egui/src/data/output.rs +++ b/crates/egui/src/data/output.rs @@ -1,5 +1,9 @@ //! All the data egui returns to the backend at the end of each frame. +use std::ops::Range; + +use epaint::text::CharIndex; + use crate::{OrderedViewportIdMap, RepaintCause, ViewportOutput, WidgetType}; /// What egui emits each frame from [`crate::Context::run_ui`]. @@ -554,7 +558,9 @@ pub struct WidgetInfo { pub value: Option, /// Selected range of characters in [`Self::current_text_value`]. - pub text_selection: Option>, + /// + /// The range is `start..end` in *character* offsets (not bytes), with `end` exclusive. + pub text_selection: Option>, /// The hint text for text edit fields. pub hint_text: Option, @@ -689,7 +695,7 @@ impl WidgetInfo { #[expect(clippy::needless_pass_by_value)] pub fn text_selection_changed( enabled: bool, - text_selection: std::ops::RangeInclusive, + text_selection: Range, current_text_value: impl ToString, ) -> Self { Self { diff --git a/crates/egui/src/lib.rs b/crates/egui/src/lib.rs index 0cc58c152..b90eff7bc 100644 --- a/crates/egui/src/lib.rs +++ b/crates/egui/src/lib.rs @@ -454,8 +454,8 @@ pub use epaint::{ pub mod text { pub use crate::text_selection::CCursorRange; pub use epaint::text::{ - FontData, FontDefinitions, FontFamily, Fonts, Galley, LayoutJob, LayoutSection, TextFormat, - TextWrapping, cursor::CCursor, + ByteIndex, CharIndex, FontData, FontDefinitions, FontFamily, Fonts, Galley, LayoutJob, + LayoutSection, TextFormat, TextWrapping, cursor::CCursor, }; } diff --git a/crates/egui/src/text_selection/accesskit_text.rs b/crates/egui/src/text_selection/accesskit_text.rs index 650e7e5c0..3dc1c95a1 100644 --- a/crates/egui/src/text_selection/accesskit_text.rs +++ b/crates/egui/src/text_selection/accesskit_text.rs @@ -1,4 +1,5 @@ use emath::TSTransform; +use epaint::text::CharIndex; use crate::{Context, Galley, Id}; @@ -9,7 +10,8 @@ pub(crate) const MAX_CHARS_PER_TEXT_RUN: usize = 255; /// Convert a (row, column) layout cursor position to a text run node ID and character index, /// accounting for rows that are split into multiple text runs. -fn text_run_position(parent_id: Id, row: usize, column: usize) -> accesskit::TextPosition { +fn text_run_position(parent_id: Id, row: usize, column: CharIndex) -> accesskit::TextPosition { + let column = column.0; // When column lands exactly on a chunk boundary (e.g., 255), it refers to // the end of the previous chunk, not the start of a new one. let chunk_index = if column > 0 && column.is_multiple_of(MAX_CHARS_PER_TEXT_RUN) { diff --git a/crates/egui/src/text_selection/cursor_range.rs b/crates/egui/src/text_selection/cursor_range.rs index 26678362e..4229756db 100644 --- a/crates/egui/src/text_selection/cursor_range.rs +++ b/crates/egui/src/text_selection/cursor_range.rs @@ -1,4 +1,4 @@ -use epaint::{Galley, text::cursor::CCursor}; +use epaint::{Galley, text::CharIndex, text::cursor::CCursor}; use crate::{Event, Id, Key, Modifiers, os::OperatingSystem}; @@ -49,7 +49,7 @@ impl CCursorRange { } /// The range of selected character indices. - pub fn as_sorted_char_range(&self) -> std::ops::Range { + pub fn as_sorted_char_range(&self) -> std::ops::Range { let [start, end] = self.sorted_cursors(); std::ops::Range { start: start.index, @@ -237,7 +237,7 @@ fn ccursor_from_accesskit_text_position( if run_id.accesskit_id() == position.node { let column = chunk_idx * MAX_CHARS_PER_TEXT_RUN + position.character_index; return Some(CCursor { - index: total_length + column, + index: CharIndex(total_length + column), prefer_next_row: !(column == row.glyphs.len() && !row.ends_with_newline && (i + 1) < galley.rows.len()), diff --git a/crates/egui/src/text_selection/text_cursor_state.rs b/crates/egui/src/text_selection/text_cursor_state.rs index b28c699dd..f88368f22 100644 --- a/crates/egui/src/text_selection/text_cursor_state.rs +++ b/crates/egui/src/text_selection/text_cursor_state.rs @@ -1,6 +1,6 @@ //! Text cursor changes/interaction, without modifying the text. -use epaint::text::{Galley, cursor::CCursor}; +use epaint::text::{ByteIndex, ByteRangeExt as _, CharIndex, Galley, cursor::CCursor}; use unicode_segmentation::UnicodeSegmentation as _; use crate::{NumExt as _, Rect, Response, Ui, epaint}; @@ -129,11 +129,11 @@ fn select_word_at(text: &str, ccursor: CCursor) -> CCursorRange { } fn select_line_at(text: &str, ccursor: CCursor) -> CCursorRange { - if ccursor.index == 0 { + if ccursor.index == CharIndex::ZERO { CCursorRange::two(ccursor, ccursor_next_line(text, ccursor)) } else { let it = text.chars(); - let mut it = it.skip(ccursor.index - 1); + let mut it = it.skip(ccursor.index.0 - 1); if let Some(char_before_cursor) = it.next() { if let Some(char_after_cursor) = it.next() { if (!is_linebreak(char_before_cursor)) && (!is_linebreak(char_after_cursor)) { @@ -178,26 +178,26 @@ fn ccursor_next_line(text: &str, ccursor: CCursor) -> CCursor { } pub fn ccursor_previous_word(text: &str, ccursor: CCursor) -> CCursor { - let num_chars = text.chars().count(); + let num_chars = CharIndex(text.chars().count()); let reversed: String = text.graphemes(true).rev().collect(); + let boundary = next_word_boundary_char_index(&reversed, num_chars - ccursor.index); CCursor { - index: num_chars - - next_word_boundary_char_index(&reversed, num_chars - ccursor.index).min(num_chars), + index: num_chars - boundary.min(num_chars), prefer_next_row: true, } } fn ccursor_previous_line(text: &str, ccursor: CCursor) -> CCursor { - let num_chars = text.chars().count(); + let num_chars = CharIndex(text.chars().count()); + let boundary = next_line_boundary_char_index(text.chars().rev(), num_chars - ccursor.index); CCursor { - index: num_chars - - next_line_boundary_char_index(text.chars().rev(), num_chars - ccursor.index), + index: num_chars - boundary, prefer_next_row: true, } } -fn next_word_boundary_char_index(text: &str, cursor_ci: usize) -> usize { - let mut current_char_idx = 0; +fn next_word_boundary_char_index(text: &str, cursor_ci: CharIndex) -> CharIndex { + let mut current_char_idx = CharIndex::ZERO; for (_word_byte_index, word) in text.split_word_bound_indices() { let word_ci = current_char_idx; @@ -231,8 +231,11 @@ fn all_word_chars(text: &str) -> bool { text.chars().all(is_word_char) } -fn next_line_boundary_char_index(it: impl Iterator, mut index: usize) -> usize { - let mut it = it.skip(index); +fn next_line_boundary_char_index( + it: impl Iterator, + mut index: CharIndex, +) -> CharIndex { + let mut it = it.skip(index.0); if let Some(_first) = it.next() { index += 1; @@ -260,36 +263,38 @@ fn is_linebreak(c: char) -> bool { /// Accepts and returns character offset (NOT byte offset!). pub fn find_line_start(text: &str, current_index: CCursor) -> CCursor { let byte_idx = byte_index_from_char_index(text, current_index.index); - let text_before = &text[..byte_idx]; + let text_before = (ByteIndex::ZERO..byte_idx).slice(text); if let Some(last_newline_byte) = text_before.rfind('\n') { - let char_idx = char_index_from_byte_index(text, last_newline_byte + 1); + let char_idx = char_index_from_byte_index(text, ByteIndex(last_newline_byte + 1)); CCursor::new(char_idx) } else { CCursor::new(0) } } -pub fn byte_index_from_char_index(s: &str, char_index: usize) -> usize { +pub fn byte_index_from_char_index(s: &str, char_index: CharIndex) -> ByteIndex { for (ci, (bi, _)) in s.char_indices().enumerate() { - if ci == char_index { - return bi; + if ci == char_index.0 { + return ByteIndex(bi); } } - s.len() + ByteIndex(s.len()) } -pub fn char_index_from_byte_index(input: &str, byte_index: usize) -> usize { +pub fn char_index_from_byte_index(input: &str, byte_index: ByteIndex) -> CharIndex { for (ci, (bi, _)) in input.char_indices().enumerate() { - if bi == byte_index { - return ci; + if bi == byte_index.0 { + return CharIndex(ci); } } - input.char_indices().last().map_or(0, |(i, _)| i + 1) + // `byte_index` is at or past the end of the string (or not on a char boundary): + // return the total number of characters. + CharIndex(input.chars().count()) } -pub fn slice_char_range(s: &str, char_range: std::ops::Range) -> &str { +pub fn slice_char_range(s: &str, char_range: std::ops::Range) -> &str { assert!( char_range.start <= char_range.end, "Invalid range, start must be less than end, but start = {}, end = {}", @@ -298,7 +303,7 @@ pub fn slice_char_range(s: &str, char_range: std::ops::Range) -> &str { ); let start_byte = byte_index_from_char_index(s, char_range.start); let end_byte = byte_index_from_char_index(s, char_range.end); - &s[start_byte..end_byte] + (start_byte..end_byte).slice(s) } /// The thin rectangle of one end of the selection, e.g. the primary cursor, in local galley coordinates. @@ -321,21 +326,21 @@ mod test { fn test_next_word_boundary_char_index() { // ASCII only let text = "abc d3f g_h i-j"; - assert_eq!(next_word_boundary_char_index(text, 1), 3); - assert_eq!(next_word_boundary_char_index(text, 3), 7); - assert_eq!(next_word_boundary_char_index(text, 9), 11); - assert_eq!(next_word_boundary_char_index(text, 12), 13); - assert_eq!(next_word_boundary_char_index(text, 13), 15); - assert_eq!(next_word_boundary_char_index(text, 15), 15); + assert_eq!(next_word_boundary_char_index(text, CharIndex(1)).0, 3); + assert_eq!(next_word_boundary_char_index(text, CharIndex(3)).0, 7); + assert_eq!(next_word_boundary_char_index(text, CharIndex(9)).0, 11); + assert_eq!(next_word_boundary_char_index(text, CharIndex(12)).0, 13); + assert_eq!(next_word_boundary_char_index(text, CharIndex(13)).0, 15); + assert_eq!(next_word_boundary_char_index(text, CharIndex(15)).0, 15); - assert_eq!(next_word_boundary_char_index("", 0), 0); - assert_eq!(next_word_boundary_char_index("", 1), 0); + assert_eq!(next_word_boundary_char_index("", CharIndex(0)).0, 0); + assert_eq!(next_word_boundary_char_index("", CharIndex(1)).0, 0); // ASCII only let text = "abc.def.ghi"; - assert_eq!(next_word_boundary_char_index(text, 1), 3); - assert_eq!(next_word_boundary_char_index(text, 3), 7); - assert_eq!(next_word_boundary_char_index(text, 7), 11); + assert_eq!(next_word_boundary_char_index(text, CharIndex(1)).0, 3); + assert_eq!(next_word_boundary_char_index(text, CharIndex(3)).0, 7); + assert_eq!(next_word_boundary_char_index(text, CharIndex(7)).0, 11); // Unicode graphemes, some of which consist of multiple Unicode characters, // !!! Unicode character is not always what is tranditionally considered a character, @@ -343,32 +348,66 @@ mod test { // handling of and around emojis is kind of weird and is not consistent across // text editors and browsers let text = "❤️👍 skvělá knihovna 👍❤️"; - assert_eq!(next_word_boundary_char_index(text, 0), 2); - assert_eq!(next_word_boundary_char_index(text, 2), 3); // this does not skip the space between thumbs-up and 'skvělá' - assert_eq!(next_word_boundary_char_index(text, 6), 10); - assert_eq!(next_word_boundary_char_index(text, 9), 10); - assert_eq!(next_word_boundary_char_index(text, 12), 19); - assert_eq!(next_word_boundary_char_index(text, 15), 19); - assert_eq!(next_word_boundary_char_index(text, 19), 20); - assert_eq!(next_word_boundary_char_index(text, 20), 21); + assert_eq!(next_word_boundary_char_index(text, CharIndex(0)).0, 2); + assert_eq!(next_word_boundary_char_index(text, CharIndex(2)).0, 3); // this does not skip the space between thumbs-up and 'skvělá' + assert_eq!(next_word_boundary_char_index(text, CharIndex(6)).0, 10); + assert_eq!(next_word_boundary_char_index(text, CharIndex(9)).0, 10); + assert_eq!(next_word_boundary_char_index(text, CharIndex(12)).0, 19); + assert_eq!(next_word_boundary_char_index(text, CharIndex(15)).0, 19); + assert_eq!(next_word_boundary_char_index(text, CharIndex(19)).0, 20); + assert_eq!(next_word_boundary_char_index(text, CharIndex(20)).0, 21); } #[test] fn test_previous_word() { let text = "abc def ghi"; - assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index, 4); - assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index, 4); - assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index, 0); - assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index, 0); + assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index.0, 4); + assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index.0, 4); + assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index.0, 0); + assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index.0, 0); } #[test] fn test_next_word() { let text = "abc def ghi"; - assert_eq!(ccursor_next_word(text, CCursor::new(0)).index, 3); - assert_eq!(ccursor_next_word(text, CCursor::new(3)).index, 7); - assert_eq!(ccursor_next_word(text, CCursor::new(7)).index, 11); - assert_eq!(ccursor_next_word(text, CCursor::new(11)).index, 11); + assert_eq!(ccursor_next_word(text, CCursor::new(0)).index.0, 3); + assert_eq!(ccursor_next_word(text, CCursor::new(3)).index.0, 7); + assert_eq!(ccursor_next_word(text, CCursor::new(7)).index.0, 11); + assert_eq!(ccursor_next_word(text, CCursor::new(11)).index.0, 11); + } + + #[test] + fn test_index_conversion_roundtrip() { + // "é" is 2 bytes, "👍" is 4 bytes. + let text = "aé👍b"; + let char_count = text.chars().count(); // 4 + assert_eq!(char_count, 4); + + // char -> byte, including the end index + assert_eq!(byte_index_from_char_index(text, CharIndex(0)).0, 0); + assert_eq!(byte_index_from_char_index(text, CharIndex(1)).0, 1); + assert_eq!(byte_index_from_char_index(text, CharIndex(2)).0, 3); + assert_eq!(byte_index_from_char_index(text, CharIndex(3)).0, 7); + assert_eq!(byte_index_from_char_index(text, CharIndex(4)).0, 8); + // Past the end clamps to the byte length: + assert_eq!( + byte_index_from_char_index(text, CharIndex(99)).0, + text.len() + ); + + // byte -> char, including the end index + assert_eq!(char_index_from_byte_index(text, ByteIndex(0)).0, 0); + assert_eq!(char_index_from_byte_index(text, ByteIndex(1)).0, 1); + assert_eq!(char_index_from_byte_index(text, ByteIndex(3)).0, 2); + assert_eq!(char_index_from_byte_index(text, ByteIndex(7)).0, 3); + // The end byte index must map to the character count, not to some byte offset: + assert_eq!(char_index_from_byte_index(text, ByteIndex(text.len())).0, 4); + // Past the end clamps to the character count: + assert_eq!(char_index_from_byte_index(text, ByteIndex(99)).0, 4); + + // Empty string: + assert_eq!(byte_index_from_char_index("", CharIndex(0)).0, 0); + assert_eq!(char_index_from_byte_index("", ByteIndex(0)).0, 0); } #[test] @@ -380,16 +419,16 @@ mod test { range.primary.index.min(range.secondary.index), range.primary.index.max(range.secondary.index), ); - assert_eq!(lo, 0); - assert_eq!(hi, 5); + assert_eq!(lo.0, 0); + assert_eq!(hi.0, 5); let range = select_word_at(text, CCursor::new(8)); let (lo, hi) = ( range.primary.index.min(range.secondary.index), range.primary.index.max(range.secondary.index), ); - assert_eq!(lo, 6); - assert_eq!(hi, 11); + assert_eq!(lo.0, 6); + assert_eq!(hi.0, 11); } #[test] @@ -401,10 +440,10 @@ mod test { let start = std::time::Instant::now(); let next = ccursor_next_word(&large_text, CCursor::new(len - 10)); - assert!(next.index <= len); + assert!(next.index.0 <= len); let prev = ccursor_previous_word(&large_text, CCursor::new(len - 10)); - assert!(prev.index < len); + assert!(prev.index.0 < len); let range = select_word_at(&large_text, CCursor::new(len - 3)); let lo = range.primary.index.min(range.secondary.index); @@ -459,9 +498,9 @@ mod tests { for (text, cursor, expected) in cases { let result = ccursor_previous_word(text, CCursor::new(cursor)); assert_eq!( - result.index, expected, + result.index.0, expected, "text={text:?}, cursor={cursor}, got={}, expected={expected}", - result.index + result.index.0 ); } } diff --git a/crates/egui/src/text_selection/visuals.rs b/crates/egui/src/text_selection/visuals.rs index e114ddb55..b18cbc820 100644 --- a/crates/egui/src/text_selection/visuals.rs +++ b/crates/egui/src/text_selection/visuals.rs @@ -57,9 +57,9 @@ pub fn paint_text_selection( if !row.glyphs.is_empty() { // Change color of the selected text: - let first_glyph_index = if ri == min.row { min.column } else { 0 }; + let first_glyph_index = if ri == min.row { min.column.0 } else { 0 }; let last_glyph_index = if ri == max.row { - max.column + max.column.0 } else { row.glyphs.len() }; diff --git a/crates/egui/src/widgets/text_edit/builder.rs b/crates/egui/src/widgets/text_edit/builder.rs index 1489fc67c..8075c57e8 100644 --- a/crates/egui/src/widgets/text_edit/builder.rs +++ b/crates/egui/src/widgets/text_edit/builder.rs @@ -899,7 +899,7 @@ impl TextEdit<'_> { ) }); } else if selection_changed && let Some(cursor_range) = cursor_range { - let char_range = cursor_range.primary.index..=cursor_range.secondary.index; + let char_range = cursor_range.as_sorted_char_range(); let info = WidgetInfo::text_selection_changed( ui.is_enabled(), char_range, diff --git a/crates/egui/src/widgets/text_edit/text_buffer.rs b/crates/egui/src/widgets/text_edit/text_buffer.rs index dbc2db26e..848b993d0 100644 --- a/crates/egui/src/widgets/text_edit/text_buffer.rs +++ b/crates/egui/src/widgets/text_edit/text_buffer.rs @@ -1,6 +1,11 @@ use std::{borrow::Cow, ops::Range}; -use epaint::{Galley, text::cursor::CCursor}; +use epaint::{ + Galley, + text::{ + ByteIndex, ByteRangeExt as _, CharIndex, CharRange, CharRangeExt as _, cursor::CCursor, + }, +}; /// One `\t` character is this many spaces wide (for indentation purposes). const TAB_SIZE: usize = 4; @@ -31,36 +36,36 @@ pub trait TextBuffer { /// /// # Return /// Returns how many *characters* were successfully inserted - fn insert_text(&mut self, text: &str, char_index: usize) -> usize; + fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize; /// Deletes a range of text `char_range` from this buffer. /// /// # Notes /// `char_range` is a *character range*, not a byte range. - fn delete_char_range(&mut self, char_range: Range); + fn delete_char_range(&mut self, char_range: Range); /// Reads the given character range. - fn char_range(&self, char_range: Range) -> &str { + fn char_range(&self, char_range: Range) -> &str { slice_char_range(self.as_str(), char_range) } - fn byte_index_from_char_index(&self, char_index: usize) -> usize { + fn byte_index_from_char_index(&self, char_index: CharIndex) -> ByteIndex { byte_index_from_char_index(self.as_str(), char_index) } - fn char_index_from_byte_index(&self, char_index: usize) -> usize { - char_index_from_byte_index(self.as_str(), char_index) + fn char_index_from_byte_index(&self, byte_index: ByteIndex) -> CharIndex { + char_index_from_byte_index(self.as_str(), byte_index) } /// Clears all characters in this buffer fn clear(&mut self) { - self.delete_char_range(0..self.as_str().len()); + self.delete_char_range(CharRange::full(self.as_str())); } /// Replaces all contents of this string with `text` fn replace_with(&mut self, text: &str) { self.clear(); - self.insert_text(text, 0); + self.insert_text(text, CharIndex(0)); } /// Clears all characters in this buffer and returns a string of the contents. @@ -90,12 +95,12 @@ pub trait TextBuffer { fn decrease_indentation(&mut self, ccursor: &mut CCursor) { let line_start = find_line_start(self.as_str(), *ccursor); - let remove_len = if self.as_str().chars().nth(line_start.index) == Some('\t') { + let remove_len = if self.as_str().chars().nth(line_start.index.0) == Some('\t') { Some(1) } else if self .as_str() .chars() - .skip(line_start.index) + .skip(line_start.index.0) .take(TAB_SIZE) .all(|c| c == ' ') { @@ -126,7 +131,7 @@ pub trait TextBuffer { } fn delete_previous_char(&mut self, ccursor: CCursor) -> CCursor { - if ccursor.index > 0 { + if CharIndex::ZERO < ccursor.index { let max_ccursor = ccursor; let min_ccursor = max_ccursor - 1; self.delete_selected_ccursor_range([min_ccursor, max_ccursor]) @@ -190,8 +195,8 @@ pub trait TextBuffer { /// impl TextBuffer for ExampleBuffer { /// fn is_mutable(&self) -> bool { unimplemented!() } /// fn as_str(&self) -> &str { unimplemented!() } - /// fn insert_text(&mut self, text: &str, char_index: usize) -> usize { unimplemented!() } - /// fn delete_char_range(&mut self, char_range: std::ops::Range) { unimplemented!() } + /// fn insert_text(&mut self, text: &str, char_index: egui::text::CharIndex) -> usize { unimplemented!() } + /// fn delete_char_range(&mut self, char_range: std::ops::Range) { unimplemented!() } /// /// // Implement it like the following: /// fn type_id(&self) -> TypeId { @@ -220,17 +225,17 @@ impl TextBuffer for String { self.as_ref() } - fn insert_text(&mut self, text: &str, char_index: usize) -> usize { + fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize { // Get the byte index from the character index let byte_idx = byte_index_from_char_index(self.as_str(), char_index); // Then insert the string - self.insert_str(byte_idx, text); + self.insert_str(byte_idx.into(), text); text.chars().count() } - fn delete_char_range(&mut self, char_range: Range) { + fn delete_char_range(&mut self, char_range: Range) { assert!( char_range.start <= char_range.end, "start must be <= end, but got {char_range:?}" @@ -241,7 +246,7 @@ impl TextBuffer for String { let byte_end = byte_index_from_char_index(self.as_str(), char_range.end); // Then drain all characters within this range - self.drain(byte_start..byte_end); + self.drain((byte_start..byte_end).as_usize()); } fn clear(&mut self) { @@ -270,11 +275,11 @@ impl TextBuffer for Cow<'_, str> { self.as_ref() } - fn insert_text(&mut self, text: &str, char_index: usize) -> usize { + fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize { ::insert_text(self.to_mut(), text, char_index) } - fn delete_char_range(&mut self, char_range: Range) { + fn delete_char_range(&mut self, char_range: Range) { ::delete_char_range(self.to_mut(), char_range); } @@ -305,11 +310,11 @@ impl TextBuffer for &str { self } - fn insert_text(&mut self, _text: &str, _ch_idx: usize) -> usize { + fn insert_text(&mut self, _text: &str, _ch_idx: CharIndex) -> usize { 0 } - fn delete_char_range(&mut self, _ch_range: Range) {} + fn delete_char_range(&mut self, _ch_range: Range) {} fn type_id(&self) -> std::any::TypeId { std::any::TypeId::of::<&str>() diff --git a/crates/egui_extras/src/syntax_highlighting.rs b/crates/egui_extras/src/syntax_highlighting.rs index adf2c9221..899e08fda 100644 --- a/crates/egui_extras/src/syntax_highlighting.rs +++ b/crates/egui_extras/src/syntax_highlighting.rs @@ -604,7 +604,8 @@ impl Highlighter { } #[cfg(feature = "syntect")] -fn as_byte_range(whole: &str, range: &str) -> std::ops::Range { +fn as_byte_range(whole: &str, range: &str) -> std::ops::Range { + use egui::text::ByteIndex; let whole_start = whole.as_ptr() as usize; let range_start = range.as_ptr() as usize; assert!( @@ -617,7 +618,7 @@ fn as_byte_range(whole: &str, range: &str) -> std::ops::Range { range_start + range.len() ); let offset = range_start - whole_start; - offset..(offset + range.len()) + ByteIndex(offset)..ByteIndex(offset + range.len()) } // ---------------------------------------------------------------------------- diff --git a/crates/epaint/src/text/cursor.rs b/crates/epaint/src/text/cursor.rs index a436ca1b1..5660c8322 100644 --- a/crates/epaint/src/text/cursor.rs +++ b/crates/epaint/src/text/cursor.rs @@ -1,5 +1,7 @@ //! Different types of text cursors, i.e. ways to point into a [`super::Galley`]. +use super::index::CharIndex; + /// Character cursor. /// /// The default cursor is zero. @@ -7,7 +9,7 @@ #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct CCursor { /// Character offset (NOT byte offset!). - pub index: usize, + pub index: CharIndex, /// If this cursors sits right at the border of a wrapped row break (NOT paragraph break) /// do we prefer the next row? @@ -18,9 +20,9 @@ pub struct CCursor { impl CCursor { #[inline] - pub fn new(index: usize) -> Self { + pub fn new(index: impl Into) -> Self { Self { - index, + index: index.into(), prefer_next_row: false, } } @@ -83,5 +85,5 @@ pub struct LayoutCursor { /// Character based (NOT bytes). /// It is fine if this points to something beyond the end of the current row. /// When moving up/down it may again be within the next row. - pub column: usize, + pub column: CharIndex, } diff --git a/crates/epaint/src/text/fonts.rs b/crates/epaint/src/text/fonts.rs index 23fe62387..6b6090c4a 100644 --- a/crates/epaint/src/text/fonts.rs +++ b/crates/epaint/src/text/fonts.rs @@ -10,7 +10,7 @@ use std::{ use crate::{ TextureAtlas, text::{ - Galley, LayoutJob, LayoutSection, TextOptions, VariationCoords, + ByteIndex, Galley, LayoutJob, LayoutSection, TextOptions, VariationCoords, font::{Font, FontFace}, }, }; @@ -1070,10 +1070,10 @@ impl GalleyCache { // `start` and `end` are the byte range of the current paragraph. // How does the current section overlap with the paragraph range? - if section_range.end <= start { + if section_range.end <= ByteIndex(start) { // The section is behind us current_section += 1; - } else if end < section_range.start { + } else if ByteIndex(end) < section_range.start { break; // Haven't reached this one yet. } else { // Section range overlaps with paragraph range @@ -1082,13 +1082,13 @@ impl GalleyCache { "Bad byte_range: {section_range:?}" ); let new_range = section_range.start.saturating_sub(start) - ..(section_range.end.at_most(end)).saturating_sub(start); + ..(section_range.end.min(ByteIndex(end))).saturating_sub(start); debug_assert!( new_range.start <= new_range.end, "Bad new section range: {new_range:?}" ); paragraph_job.sections.push(LayoutSection { - leading_space: if start <= section_range.start { + leading_space: if ByteIndex(start) <= section_range.start { *leading_space } else { 0.0 diff --git a/crates/epaint/src/text/index.rs b/crates/epaint/src/text/index.rs new file mode 100644 index 000000000..3fcd9a4ce --- /dev/null +++ b/crates/epaint/src/text/index.rs @@ -0,0 +1,202 @@ +//! Strongly-typed offsets into text. +//! +//! UTF-8 text can be indexed either by _byte_ offset or by _character_ +//! (Unicode scalar) offset. Mixing the two is a common source of bugs, +//! so we use distinct types to keep them apart. + +use std::ops::Range; + +/// A byte offset into a UTF-8 string. +/// +/// This is what you use to slice a [`str`] (e.g. `&text[range.start.0..range.end.0]`). +/// Not to be confused with [`CharIndex`], which counts characters instead of bytes. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr( + feature = "serde", + derive(serde::Deserialize, serde::Serialize), + serde(transparent) +)] +pub struct ByteIndex(pub usize); + +/// A character (Unicode scalar) offset into a string. +/// +/// Counts characters, not bytes, so it is independent of the UTF-8 encoding. +/// Not to be confused with [`ByteIndex`]. See also [`super::cursor::CCursor`]. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr( + feature = "serde", + derive(serde::Deserialize, serde::Serialize), + serde(transparent) +)] +pub struct CharIndex(pub usize); + +macro_rules! impl_text_index { + ($Type:ident) => { + impl $Type { + /// The zero offset, i.e. the very start of the text. + pub const ZERO: Self = Self(0); + + /// Saturating integer addition. + #[inline] + pub fn saturating_add(self, rhs: usize) -> Self { + Self(self.0.saturating_add(rhs)) + } + + /// Saturating integer subtraction. + #[inline] + pub fn saturating_sub(self, rhs: usize) -> Self { + Self(self.0.saturating_sub(rhs)) + } + } + + impl From for $Type { + #[inline] + fn from(index: usize) -> Self { + Self(index) + } + } + + impl From<$Type> for usize { + #[inline] + fn from(index: $Type) -> Self { + index.0 + } + } + + impl std::ops::Add for $Type { + type Output = Self; + + #[inline] + fn add(self, rhs: usize) -> Self { + Self(self.0 + rhs) + } + } + + /// Compose offsets, e.g. a base position plus a relative one. + impl std::ops::Add<$Type> for $Type { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self { + Self(self.0 + rhs.0) + } + } + + impl std::ops::Sub for $Type { + type Output = Self; + + #[inline] + fn sub(self, rhs: usize) -> Self { + Self(self.0 - rhs) + } + } + + impl std::ops::Sub<$Type> for $Type { + type Output = Self; + + #[inline] + fn sub(self, rhs: Self) -> Self { + Self(self.0 - rhs.0) + } + } + + impl std::ops::AddAssign for $Type { + #[inline] + fn add_assign(&mut self, rhs: usize) { + self.0 += rhs; + } + } + + impl std::ops::AddAssign<$Type> for $Type { + #[inline] + fn add_assign(&mut self, rhs: Self) { + self.0 += rhs.0; + } + } + + impl std::ops::SubAssign for $Type { + #[inline] + fn sub_assign(&mut self, rhs: usize) { + self.0 -= rhs; + } + } + + impl std::fmt::Display for $Type { + #[inline] + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } + } + }; +} + +impl_text_index!(ByteIndex); +impl_text_index!(CharIndex); + +/// A range of [`ByteIndex`], i.e. a byte range into a [`str`]. +pub type ByteRange = Range; + +/// A range of [`CharIndex`], i.e. a character range into a [`str`]. +pub type CharRange = Range; + +/// Extension methods for a [`ByteRange`]. +pub trait ByteRangeExt { + /// The full byte range covering `text`, i.e. `0..text.len()`. + fn full(text: &str) -> Self; + + /// The `start..end` byte range as plain `usize`, for slicing a [`str`]. + fn as_usize(&self) -> Range; + + /// Slice the given string by this byte range. + fn slice<'s>(&self, text: &'s str) -> &'s str; +} + +impl ByteRangeExt for ByteRange { + #[inline] + fn full(text: &str) -> Self { + ByteIndex::ZERO..ByteIndex(text.len()) + } + + #[inline] + fn as_usize(&self) -> Range { + self.start.0..self.end.0 + } + + #[inline] + fn slice<'s>(&self, text: &'s str) -> &'s str { + &text[self.as_usize()] + } +} + +/// Extension methods for a [`CharRange`]. +pub trait CharRangeExt { + /// The full character range covering `text`, i.e. `0..text.chars().count()`. + fn full(text: &str) -> Self; +} + +impl CharRangeExt for CharRange { + #[inline] + fn full(text: &str) -> Self { + CharIndex::ZERO..CharIndex(text.chars().count()) + } +} + +#[cfg(test)] +mod tests { + use super::CharIndex; + + #[test] + fn arithmetic() { + // Add a relative offset to a base position: + assert_eq!(CharIndex(2) + CharIndex(3), CharIndex(5)); + assert_eq!(CharIndex(2) + 3, CharIndex(5)); + + let mut idx = CharIndex(2); + idx += CharIndex(3); + assert_eq!(idx, CharIndex(5)); + + // Subtract a relative offset from a position: + assert_eq!(CharIndex(5) - CharIndex(2), CharIndex(3)); + assert_eq!(CharIndex(5) - 2, CharIndex(3)); + } +} diff --git a/crates/epaint/src/text/mod.rs b/crates/epaint/src/text/mod.rs index d62092d12..1b82a9e05 100644 --- a/crates/epaint/src/text/mod.rs +++ b/crates/epaint/src/text/mod.rs @@ -3,6 +3,7 @@ pub mod cursor; mod font; mod fonts; +mod index; mod text_layout; mod text_layout_types; @@ -11,6 +12,7 @@ pub use { FontData, FontDefinitions, FontFamily, FontId, FontInsert, FontPriority, FontTweak, Fonts, FontsImpl, FontsView, InsertFontFamily, }, + index::{ByteIndex, ByteRange, ByteRangeExt, CharIndex, CharRange, CharRangeExt}, text_layout::*, text_layout_types::*, }; diff --git a/crates/epaint/src/text/text_layout.rs b/crates/epaint/src/text/text_layout.rs index 1c8aba1e3..73878ed93 100644 --- a/crates/epaint/src/text/text_layout.rs +++ b/crates/epaint/src/text/text_layout.rs @@ -15,8 +15,8 @@ use crate::{ }; use super::{ - FontsImpl, Galley, Glyph, LayoutJob, LayoutSection, PlacedRow, Row, RowVisuals, - VariationCoords, + ByteRangeExt as _, FontsImpl, Galley, Glyph, LayoutJob, LayoutSection, PlacedRow, Row, + RowVisuals, VariationCoords, font::{Font, FontFace, ShapedGlyph}, }; @@ -454,7 +454,7 @@ fn layout_section( } paragraph.cursor_x_px += leading_space * pixels_per_point; - let section_text = &job.text[byte_range.clone()]; + let section_text = &job.text[byte_range.as_usize()]; let mut ctx = ShapingContext { pixels_per_point, font_size, @@ -1574,7 +1574,7 @@ mod tests { pixels_per_point, Arc::new(LayoutJob::single_section( iter::chain( - (0..elided_galley.rows[0].char_count_excluding_newline()).map(|_| ch), + (0..elided_galley.rows[0].char_count_excluding_newline().0).map(|_| ch), iter::once('…'), ) .collect::(), @@ -1866,7 +1866,7 @@ mod tests { // Verify cursor round-trip: end cursor index == char count. assert_eq!( - galley.end().index, + galley.end().index.0, expected_chars, "Galley::end().index mismatch for {text:?}", ); @@ -1892,9 +1892,9 @@ mod tests { let galley = layout(&mut fonts, pixels_per_point, job.into()); // Walking through every cursor index should produce valid positions. - for i in 0..=galley.end().index { + for i in 0..=galley.end().index.0 { let cursor = CCursor { - index: i, + index: CharIndex(i), prefer_next_row: false, }; let rect = galley.pos_from_cursor(cursor); diff --git a/crates/epaint/src/text/text_layout_types.rs b/crates/epaint/src/text/text_layout_types.rs index d471a433a..45b7f201d 100644 --- a/crates/epaint/src/text/text_layout_types.rs +++ b/crates/epaint/src/text/text_layout_types.rs @@ -4,6 +4,7 @@ use std::{ops::Range, str::FromStr as _}; use super::{ cursor::{CCursor, LayoutCursor}, font::UvRect, + index::{ByteIndex, ByteRange, ByteRangeExt as _, CharIndex}, }; use crate::{Color32, FontId, Mesh, Stroke, text::FontsView}; use emath::{Align, GuiRounding as _, NumExt as _, OrderedFloat, Pos2, Rect, Vec2, pos2, vec2}; @@ -119,7 +120,7 @@ impl LayoutJob { Self { sections: vec![LayoutSection { leading_space: 0.0, - byte_range: 0..text.len(), + byte_range: ByteRange::full(&text), format: TextFormat::simple(font_id, color), }], text, @@ -138,7 +139,7 @@ impl LayoutJob { Self { sections: vec![LayoutSection { leading_space: 0.0, - byte_range: 0..text.len(), + byte_range: ByteRange::full(&text), format, }], text, @@ -153,7 +154,7 @@ impl LayoutJob { Self { sections: vec![LayoutSection { leading_space: 0.0, - byte_range: 0..text.len(), + byte_range: ByteRange::full(&text), format: TextFormat::simple(font_id, color), }], text, @@ -168,7 +169,7 @@ impl LayoutJob { Self { sections: vec![LayoutSection { leading_space: 0.0, - byte_range: 0..text.len(), + byte_range: ByteRange::full(&text), format, }], text, @@ -192,7 +193,7 @@ impl LayoutJob { pub fn append(&mut self, text: &str, leading_space: f32, format: TextFormat) { let start = self.text.len(); self.text += text; - let byte_range = start..self.text.len(); + let byte_range = ByteIndex(start)..ByteIndex(self.text.len()); // Optimization: merge into the previous section if it has the same format // and this one adds no leading space. @@ -217,7 +218,7 @@ impl LayoutJob { /// /// Panics if the job has no sections. /// Assumes [`LayoutJob::sections`] are ordered by increasing `byte_range` (as produced by [`Self::append`]). - pub fn format_at_byte(&self, byte_idx: usize) -> &TextFormat { + pub fn format_at_byte(&self, byte_idx: ByteIndex) -> &TextFormat { self.debug_sanity_check(); let last = self.sections.last().expect("LayoutJob has no sections"); let idx = self @@ -250,12 +251,12 @@ impl LayoutJob { .expect("checked above") .byte_range .start, - 0, + ByteIndex::ZERO, "First LayoutSection must start at byte 0" ); assert_eq!( self.sections.last().expect("checked above").byte_range.end, - self.text.len(), + ByteIndex(self.text.len()), "Last LayoutSection must end at the end of the text" ); @@ -341,7 +342,7 @@ pub struct LayoutSection { pub leading_space: f32, /// Range into [`LayoutJob::text`]. - pub byte_range: Range, + pub byte_range: Range, /// How to format the text in this section (font, color, etc). pub format: TextFormat, @@ -946,23 +947,23 @@ impl Row { /// Excludes the implicit `\n` after the [`Row`], if any. #[inline] - pub fn char_count_excluding_newline(&self) -> usize { - self.glyphs.len() + pub fn char_count_excluding_newline(&self) -> CharIndex { + CharIndex(self.glyphs.len()) } /// Closest char at the desired x coordinate in row-relative coordinates. /// Returns something in the range `[0, char_count_excluding_newline()]`. - pub fn char_at(&self, desired_x: f32) -> usize { + pub fn char_at(&self, desired_x: f32) -> CharIndex { for (i, glyph) in self.glyphs.iter().enumerate() { if desired_x < glyph.logical_rect().center().x { - return i; + return CharIndex(i); } } self.char_count_excluding_newline() } - pub fn x_offset(&self, column: usize) -> f32 { - if let Some(glyph) = self.glyphs.get(column) { + pub fn x_offset(&self, column: CharIndex) -> f32 { + if let Some(glyph) = self.glyphs.get(column.0) { glyph.pos.x } else { self.size.x @@ -988,8 +989,8 @@ impl PlacedRow { /// Includes the implicit `\n` after the [`PlacedRow`], if any. #[inline] - pub fn char_count_including_newline(&self) -> usize { - self.row.glyphs.len() + (self.ends_with_newline as usize) + pub fn char_count_including_newline(&self) -> CharIndex { + CharIndex(self.row.glyphs.len() + (self.ends_with_newline as usize)) } } @@ -1188,7 +1189,7 @@ impl Galley { let mut best_y_dist = f32::INFINITY; let mut cursor = CCursor::default(); - let mut ccursor_index = 0; + let mut ccursor_index = CharIndex::ZERO; for row in &self.rows { let min_y = row.min_y(); @@ -1234,7 +1235,7 @@ impl Galley { return Default::default(); } let mut ccursor = CCursor { - index: 0, + index: CharIndex::ZERO, prefer_next_row: true, }; for row in &self.rows { @@ -1251,7 +1252,7 @@ impl Galley { pub fn layout_from_cursor(&self, cursor: CCursor) -> LayoutCursor { let prefer_next_row = cursor.prefer_next_row; let mut ccursor_it = CCursor { - index: 0, + index: CharIndex::ZERO, prefer_next_row, }; @@ -1294,15 +1295,13 @@ impl Galley { let prefer_next_row = layout_cursor.column < self.rows[layout_cursor.row].char_count_excluding_newline(); let mut cursor_it = CCursor { - index: 0, + index: CharIndex::ZERO, prefer_next_row, }; for (row_nr, row) in self.rows.iter().enumerate() { if row_nr == layout_cursor.row { - cursor_it.index += layout_cursor - .column - .at_most(row.char_count_excluding_newline()); + cursor_it.index += layout_cursor.column.min(row.char_count_excluding_newline()); return cursor_it; } @@ -1316,7 +1315,7 @@ impl Galley { impl Galley { #[expect(clippy::unused_self)] pub fn cursor_left_one_character(&self, cursor: &CCursor) -> CCursor { - if cursor.index == 0 { + if cursor.index == CharIndex::ZERO { Default::default() } else { CCursor { @@ -1392,7 +1391,7 @@ impl Galley { let layout_cursor = self.layout_from_cursor(*cursor); self.cursor_from_layout(LayoutCursor { row: layout_cursor.row, - column: 0, + column: CharIndex::ZERO, }) } @@ -1406,7 +1405,7 @@ impl Galley { pub fn cursor_begin_of_paragraph(&self, cursor: &CCursor) -> CCursor { let mut layout_cursor = self.layout_from_cursor(*cursor); - layout_cursor.column = 0; + layout_cursor.column = CharIndex::ZERO; loop { let prev_row = layout_cursor