1
0
mirror of https://github.com/emilk/egui.git synced 2026-06-26 14:49:06 -04:00

Use strongly typed CharIndex and ByteIndex + bug fixes (#8245)

Less risk of confusing the two.

Found and fix a couple real bugs in the process!

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Emil Ernerfeldt
2026-06-21 02:24:00 +02:00
committed by GitHub
parent eac51da9ca
commit 13d6b5afcf
15 changed files with 396 additions and 138 deletions

View File

@@ -1,5 +1,9 @@
//! All the data egui returns to the backend at the end of each frame.
use std::ops::Range;
use epaint::text::CharIndex;
use crate::{OrderedViewportIdMap, RepaintCause, ViewportOutput, WidgetType};
/// What egui emits each frame from [`crate::Context::run_ui`].
@@ -554,7 +558,9 @@ pub struct WidgetInfo {
pub value: Option<f64>,
/// Selected range of characters in [`Self::current_text_value`].
pub text_selection: Option<std::ops::RangeInclusive<usize>>,
///
/// The range is `start..end` in *character* offsets (not bytes), with `end` exclusive.
pub text_selection: Option<Range<CharIndex>>,
/// The hint text for text edit fields.
pub hint_text: Option<String>,
@@ -689,7 +695,7 @@ impl WidgetInfo {
#[expect(clippy::needless_pass_by_value)]
pub fn text_selection_changed(
enabled: bool,
text_selection: std::ops::RangeInclusive<usize>,
text_selection: Range<CharIndex>,
current_text_value: impl ToString,
) -> Self {
Self {

View File

@@ -454,8 +454,8 @@ pub use epaint::{
pub mod text {
pub use crate::text_selection::CCursorRange;
pub use epaint::text::{
FontData, FontDefinitions, FontFamily, Fonts, Galley, LayoutJob, LayoutSection, TextFormat,
TextWrapping, cursor::CCursor,
ByteIndex, CharIndex, FontData, FontDefinitions, FontFamily, Fonts, Galley, LayoutJob,
LayoutSection, TextFormat, TextWrapping, cursor::CCursor,
};
}

View File

@@ -1,4 +1,5 @@
use emath::TSTransform;
use epaint::text::CharIndex;
use crate::{Context, Galley, Id};
@@ -9,7 +10,8 @@ pub(crate) const MAX_CHARS_PER_TEXT_RUN: usize = 255;
/// Convert a (row, column) layout cursor position to a text run node ID and character index,
/// accounting for rows that are split into multiple text runs.
fn text_run_position(parent_id: Id, row: usize, column: usize) -> accesskit::TextPosition {
fn text_run_position(parent_id: Id, row: usize, column: CharIndex) -> accesskit::TextPosition {
let column = column.0;
// When column lands exactly on a chunk boundary (e.g., 255), it refers to
// the end of the previous chunk, not the start of a new one.
let chunk_index = if column > 0 && column.is_multiple_of(MAX_CHARS_PER_TEXT_RUN) {

View File

@@ -1,4 +1,4 @@
use epaint::{Galley, text::cursor::CCursor};
use epaint::{Galley, text::CharIndex, text::cursor::CCursor};
use crate::{Event, Id, Key, Modifiers, os::OperatingSystem};
@@ -49,7 +49,7 @@ impl CCursorRange {
}
/// The range of selected character indices.
pub fn as_sorted_char_range(&self) -> std::ops::Range<usize> {
pub fn as_sorted_char_range(&self) -> std::ops::Range<CharIndex> {
let [start, end] = self.sorted_cursors();
std::ops::Range {
start: start.index,
@@ -237,7 +237,7 @@ fn ccursor_from_accesskit_text_position(
if run_id.accesskit_id() == position.node {
let column = chunk_idx * MAX_CHARS_PER_TEXT_RUN + position.character_index;
return Some(CCursor {
index: total_length + column,
index: CharIndex(total_length + column),
prefer_next_row: !(column == row.glyphs.len()
&& !row.ends_with_newline
&& (i + 1) < galley.rows.len()),

View File

@@ -1,6 +1,6 @@
//! Text cursor changes/interaction, without modifying the text.
use epaint::text::{Galley, cursor::CCursor};
use epaint::text::{ByteIndex, ByteRangeExt as _, CharIndex, Galley, cursor::CCursor};
use unicode_segmentation::UnicodeSegmentation as _;
use crate::{NumExt as _, Rect, Response, Ui, epaint};
@@ -129,11 +129,11 @@ fn select_word_at(text: &str, ccursor: CCursor) -> CCursorRange {
}
fn select_line_at(text: &str, ccursor: CCursor) -> CCursorRange {
if ccursor.index == 0 {
if ccursor.index == CharIndex::ZERO {
CCursorRange::two(ccursor, ccursor_next_line(text, ccursor))
} else {
let it = text.chars();
let mut it = it.skip(ccursor.index - 1);
let mut it = it.skip(ccursor.index.0 - 1);
if let Some(char_before_cursor) = it.next() {
if let Some(char_after_cursor) = it.next() {
if (!is_linebreak(char_before_cursor)) && (!is_linebreak(char_after_cursor)) {
@@ -178,26 +178,26 @@ fn ccursor_next_line(text: &str, ccursor: CCursor) -> CCursor {
}
pub fn ccursor_previous_word(text: &str, ccursor: CCursor) -> CCursor {
let num_chars = text.chars().count();
let num_chars = CharIndex(text.chars().count());
let reversed: String = text.graphemes(true).rev().collect();
let boundary = next_word_boundary_char_index(&reversed, num_chars - ccursor.index);
CCursor {
index: num_chars
- next_word_boundary_char_index(&reversed, num_chars - ccursor.index).min(num_chars),
index: num_chars - boundary.min(num_chars),
prefer_next_row: true,
}
}
fn ccursor_previous_line(text: &str, ccursor: CCursor) -> CCursor {
let num_chars = text.chars().count();
let num_chars = CharIndex(text.chars().count());
let boundary = next_line_boundary_char_index(text.chars().rev(), num_chars - ccursor.index);
CCursor {
index: num_chars
- next_line_boundary_char_index(text.chars().rev(), num_chars - ccursor.index),
index: num_chars - boundary,
prefer_next_row: true,
}
}
fn next_word_boundary_char_index(text: &str, cursor_ci: usize) -> usize {
let mut current_char_idx = 0;
fn next_word_boundary_char_index(text: &str, cursor_ci: CharIndex) -> CharIndex {
let mut current_char_idx = CharIndex::ZERO;
for (_word_byte_index, word) in text.split_word_bound_indices() {
let word_ci = current_char_idx;
@@ -231,8 +231,11 @@ fn all_word_chars(text: &str) -> bool {
text.chars().all(is_word_char)
}
fn next_line_boundary_char_index(it: impl Iterator<Item = char>, mut index: usize) -> usize {
let mut it = it.skip(index);
fn next_line_boundary_char_index(
it: impl Iterator<Item = char>,
mut index: CharIndex,
) -> CharIndex {
let mut it = it.skip(index.0);
if let Some(_first) = it.next() {
index += 1;
@@ -260,36 +263,38 @@ fn is_linebreak(c: char) -> bool {
/// Accepts and returns character offset (NOT byte offset!).
pub fn find_line_start(text: &str, current_index: CCursor) -> CCursor {
let byte_idx = byte_index_from_char_index(text, current_index.index);
let text_before = &text[..byte_idx];
let text_before = (ByteIndex::ZERO..byte_idx).slice(text);
if let Some(last_newline_byte) = text_before.rfind('\n') {
let char_idx = char_index_from_byte_index(text, last_newline_byte + 1);
let char_idx = char_index_from_byte_index(text, ByteIndex(last_newline_byte + 1));
CCursor::new(char_idx)
} else {
CCursor::new(0)
}
}
pub fn byte_index_from_char_index(s: &str, char_index: usize) -> usize {
pub fn byte_index_from_char_index(s: &str, char_index: CharIndex) -> ByteIndex {
for (ci, (bi, _)) in s.char_indices().enumerate() {
if ci == char_index {
return bi;
if ci == char_index.0 {
return ByteIndex(bi);
}
}
s.len()
ByteIndex(s.len())
}
pub fn char_index_from_byte_index(input: &str, byte_index: usize) -> usize {
pub fn char_index_from_byte_index(input: &str, byte_index: ByteIndex) -> CharIndex {
for (ci, (bi, _)) in input.char_indices().enumerate() {
if bi == byte_index {
return ci;
if bi == byte_index.0 {
return CharIndex(ci);
}
}
input.char_indices().last().map_or(0, |(i, _)| i + 1)
// `byte_index` is at or past the end of the string (or not on a char boundary):
// return the total number of characters.
CharIndex(input.chars().count())
}
pub fn slice_char_range(s: &str, char_range: std::ops::Range<usize>) -> &str {
pub fn slice_char_range(s: &str, char_range: std::ops::Range<CharIndex>) -> &str {
assert!(
char_range.start <= char_range.end,
"Invalid range, start must be less than end, but start = {}, end = {}",
@@ -298,7 +303,7 @@ pub fn slice_char_range(s: &str, char_range: std::ops::Range<usize>) -> &str {
);
let start_byte = byte_index_from_char_index(s, char_range.start);
let end_byte = byte_index_from_char_index(s, char_range.end);
&s[start_byte..end_byte]
(start_byte..end_byte).slice(s)
}
/// The thin rectangle of one end of the selection, e.g. the primary cursor, in local galley coordinates.
@@ -321,21 +326,21 @@ mod test {
fn test_next_word_boundary_char_index() {
// ASCII only
let text = "abc d3f g_h i-j";
assert_eq!(next_word_boundary_char_index(text, 1), 3);
assert_eq!(next_word_boundary_char_index(text, 3), 7);
assert_eq!(next_word_boundary_char_index(text, 9), 11);
assert_eq!(next_word_boundary_char_index(text, 12), 13);
assert_eq!(next_word_boundary_char_index(text, 13), 15);
assert_eq!(next_word_boundary_char_index(text, 15), 15);
assert_eq!(next_word_boundary_char_index(text, CharIndex(1)).0, 3);
assert_eq!(next_word_boundary_char_index(text, CharIndex(3)).0, 7);
assert_eq!(next_word_boundary_char_index(text, CharIndex(9)).0, 11);
assert_eq!(next_word_boundary_char_index(text, CharIndex(12)).0, 13);
assert_eq!(next_word_boundary_char_index(text, CharIndex(13)).0, 15);
assert_eq!(next_word_boundary_char_index(text, CharIndex(15)).0, 15);
assert_eq!(next_word_boundary_char_index("", 0), 0);
assert_eq!(next_word_boundary_char_index("", 1), 0);
assert_eq!(next_word_boundary_char_index("", CharIndex(0)).0, 0);
assert_eq!(next_word_boundary_char_index("", CharIndex(1)).0, 0);
// ASCII only
let text = "abc.def.ghi";
assert_eq!(next_word_boundary_char_index(text, 1), 3);
assert_eq!(next_word_boundary_char_index(text, 3), 7);
assert_eq!(next_word_boundary_char_index(text, 7), 11);
assert_eq!(next_word_boundary_char_index(text, CharIndex(1)).0, 3);
assert_eq!(next_word_boundary_char_index(text, CharIndex(3)).0, 7);
assert_eq!(next_word_boundary_char_index(text, CharIndex(7)).0, 11);
// Unicode graphemes, some of which consist of multiple Unicode characters,
// !!! Unicode character is not always what is tranditionally considered a character,
@@ -343,32 +348,66 @@ mod test {
// handling of and around emojis is kind of weird and is not consistent across
// text editors and browsers
let text = "❤️👍 skvělá knihovna 👍❤️";
assert_eq!(next_word_boundary_char_index(text, 0), 2);
assert_eq!(next_word_boundary_char_index(text, 2), 3); // this does not skip the space between thumbs-up and 'skvělá'
assert_eq!(next_word_boundary_char_index(text, 6), 10);
assert_eq!(next_word_boundary_char_index(text, 9), 10);
assert_eq!(next_word_boundary_char_index(text, 12), 19);
assert_eq!(next_word_boundary_char_index(text, 15), 19);
assert_eq!(next_word_boundary_char_index(text, 19), 20);
assert_eq!(next_word_boundary_char_index(text, 20), 21);
assert_eq!(next_word_boundary_char_index(text, CharIndex(0)).0, 2);
assert_eq!(next_word_boundary_char_index(text, CharIndex(2)).0, 3); // this does not skip the space between thumbs-up and 'skvělá'
assert_eq!(next_word_boundary_char_index(text, CharIndex(6)).0, 10);
assert_eq!(next_word_boundary_char_index(text, CharIndex(9)).0, 10);
assert_eq!(next_word_boundary_char_index(text, CharIndex(12)).0, 19);
assert_eq!(next_word_boundary_char_index(text, CharIndex(15)).0, 19);
assert_eq!(next_word_boundary_char_index(text, CharIndex(19)).0, 20);
assert_eq!(next_word_boundary_char_index(text, CharIndex(20)).0, 21);
}
#[test]
fn test_previous_word() {
let text = "abc def ghi";
assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index, 4);
assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index, 4);
assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index, 0);
assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index, 0);
assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index.0, 4);
assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index.0, 4);
assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index.0, 0);
assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index.0, 0);
}
#[test]
fn test_next_word() {
let text = "abc def ghi";
assert_eq!(ccursor_next_word(text, CCursor::new(0)).index, 3);
assert_eq!(ccursor_next_word(text, CCursor::new(3)).index, 7);
assert_eq!(ccursor_next_word(text, CCursor::new(7)).index, 11);
assert_eq!(ccursor_next_word(text, CCursor::new(11)).index, 11);
assert_eq!(ccursor_next_word(text, CCursor::new(0)).index.0, 3);
assert_eq!(ccursor_next_word(text, CCursor::new(3)).index.0, 7);
assert_eq!(ccursor_next_word(text, CCursor::new(7)).index.0, 11);
assert_eq!(ccursor_next_word(text, CCursor::new(11)).index.0, 11);
}
#[test]
fn test_index_conversion_roundtrip() {
// "é" is 2 bytes, "👍" is 4 bytes.
let text = "aé👍b";
let char_count = text.chars().count(); // 4
assert_eq!(char_count, 4);
// char -> byte, including the end index
assert_eq!(byte_index_from_char_index(text, CharIndex(0)).0, 0);
assert_eq!(byte_index_from_char_index(text, CharIndex(1)).0, 1);
assert_eq!(byte_index_from_char_index(text, CharIndex(2)).0, 3);
assert_eq!(byte_index_from_char_index(text, CharIndex(3)).0, 7);
assert_eq!(byte_index_from_char_index(text, CharIndex(4)).0, 8);
// Past the end clamps to the byte length:
assert_eq!(
byte_index_from_char_index(text, CharIndex(99)).0,
text.len()
);
// byte -> char, including the end index
assert_eq!(char_index_from_byte_index(text, ByteIndex(0)).0, 0);
assert_eq!(char_index_from_byte_index(text, ByteIndex(1)).0, 1);
assert_eq!(char_index_from_byte_index(text, ByteIndex(3)).0, 2);
assert_eq!(char_index_from_byte_index(text, ByteIndex(7)).0, 3);
// The end byte index must map to the character count, not to some byte offset:
assert_eq!(char_index_from_byte_index(text, ByteIndex(text.len())).0, 4);
// Past the end clamps to the character count:
assert_eq!(char_index_from_byte_index(text, ByteIndex(99)).0, 4);
// Empty string:
assert_eq!(byte_index_from_char_index("", CharIndex(0)).0, 0);
assert_eq!(char_index_from_byte_index("", ByteIndex(0)).0, 0);
}
#[test]
@@ -380,16 +419,16 @@ mod test {
range.primary.index.min(range.secondary.index),
range.primary.index.max(range.secondary.index),
);
assert_eq!(lo, 0);
assert_eq!(hi, 5);
assert_eq!(lo.0, 0);
assert_eq!(hi.0, 5);
let range = select_word_at(text, CCursor::new(8));
let (lo, hi) = (
range.primary.index.min(range.secondary.index),
range.primary.index.max(range.secondary.index),
);
assert_eq!(lo, 6);
assert_eq!(hi, 11);
assert_eq!(lo.0, 6);
assert_eq!(hi.0, 11);
}
#[test]
@@ -401,10 +440,10 @@ mod test {
let start = std::time::Instant::now();
let next = ccursor_next_word(&large_text, CCursor::new(len - 10));
assert!(next.index <= len);
assert!(next.index.0 <= len);
let prev = ccursor_previous_word(&large_text, CCursor::new(len - 10));
assert!(prev.index < len);
assert!(prev.index.0 < len);
let range = select_word_at(&large_text, CCursor::new(len - 3));
let lo = range.primary.index.min(range.secondary.index);
@@ -459,9 +498,9 @@ mod tests {
for (text, cursor, expected) in cases {
let result = ccursor_previous_word(text, CCursor::new(cursor));
assert_eq!(
result.index, expected,
result.index.0, expected,
"text={text:?}, cursor={cursor}, got={}, expected={expected}",
result.index
result.index.0
);
}
}

View File

@@ -57,9 +57,9 @@ pub fn paint_text_selection(
if !row.glyphs.is_empty() {
// Change color of the selected text:
let first_glyph_index = if ri == min.row { min.column } else { 0 };
let first_glyph_index = if ri == min.row { min.column.0 } else { 0 };
let last_glyph_index = if ri == max.row {
max.column
max.column.0
} else {
row.glyphs.len()
};

View File

@@ -899,7 +899,7 @@ impl TextEdit<'_> {
)
});
} else if selection_changed && let Some(cursor_range) = cursor_range {
let char_range = cursor_range.primary.index..=cursor_range.secondary.index;
let char_range = cursor_range.as_sorted_char_range();
let info = WidgetInfo::text_selection_changed(
ui.is_enabled(),
char_range,

View File

@@ -1,6 +1,11 @@
use std::{borrow::Cow, ops::Range};
use epaint::{Galley, text::cursor::CCursor};
use epaint::{
Galley,
text::{
ByteIndex, ByteRangeExt as _, CharIndex, CharRange, CharRangeExt as _, cursor::CCursor,
},
};
/// One `\t` character is this many spaces wide (for indentation purposes).
const TAB_SIZE: usize = 4;
@@ -31,36 +36,36 @@ pub trait TextBuffer {
///
/// # Return
/// Returns how many *characters* were successfully inserted
fn insert_text(&mut self, text: &str, char_index: usize) -> usize;
fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize;
/// Deletes a range of text `char_range` from this buffer.
///
/// # Notes
/// `char_range` is a *character range*, not a byte range.
fn delete_char_range(&mut self, char_range: Range<usize>);
fn delete_char_range(&mut self, char_range: Range<CharIndex>);
/// Reads the given character range.
fn char_range(&self, char_range: Range<usize>) -> &str {
fn char_range(&self, char_range: Range<CharIndex>) -> &str {
slice_char_range(self.as_str(), char_range)
}
fn byte_index_from_char_index(&self, char_index: usize) -> usize {
fn byte_index_from_char_index(&self, char_index: CharIndex) -> ByteIndex {
byte_index_from_char_index(self.as_str(), char_index)
}
fn char_index_from_byte_index(&self, char_index: usize) -> usize {
char_index_from_byte_index(self.as_str(), char_index)
fn char_index_from_byte_index(&self, byte_index: ByteIndex) -> CharIndex {
char_index_from_byte_index(self.as_str(), byte_index)
}
/// Clears all characters in this buffer
fn clear(&mut self) {
self.delete_char_range(0..self.as_str().len());
self.delete_char_range(CharRange::full(self.as_str()));
}
/// Replaces all contents of this string with `text`
fn replace_with(&mut self, text: &str) {
self.clear();
self.insert_text(text, 0);
self.insert_text(text, CharIndex(0));
}
/// Clears all characters in this buffer and returns a string of the contents.
@@ -90,12 +95,12 @@ pub trait TextBuffer {
fn decrease_indentation(&mut self, ccursor: &mut CCursor) {
let line_start = find_line_start(self.as_str(), *ccursor);
let remove_len = if self.as_str().chars().nth(line_start.index) == Some('\t') {
let remove_len = if self.as_str().chars().nth(line_start.index.0) == Some('\t') {
Some(1)
} else if self
.as_str()
.chars()
.skip(line_start.index)
.skip(line_start.index.0)
.take(TAB_SIZE)
.all(|c| c == ' ')
{
@@ -126,7 +131,7 @@ pub trait TextBuffer {
}
fn delete_previous_char(&mut self, ccursor: CCursor) -> CCursor {
if ccursor.index > 0 {
if CharIndex::ZERO < ccursor.index {
let max_ccursor = ccursor;
let min_ccursor = max_ccursor - 1;
self.delete_selected_ccursor_range([min_ccursor, max_ccursor])
@@ -190,8 +195,8 @@ pub trait TextBuffer {
/// impl TextBuffer for ExampleBuffer {
/// fn is_mutable(&self) -> bool { unimplemented!() }
/// fn as_str(&self) -> &str { unimplemented!() }
/// fn insert_text(&mut self, text: &str, char_index: usize) -> usize { unimplemented!() }
/// fn delete_char_range(&mut self, char_range: std::ops::Range<usize>) { unimplemented!() }
/// fn insert_text(&mut self, text: &str, char_index: egui::text::CharIndex) -> usize { unimplemented!() }
/// fn delete_char_range(&mut self, char_range: std::ops::Range<egui::text::CharIndex>) { unimplemented!() }
///
/// // Implement it like the following:
/// fn type_id(&self) -> TypeId {
@@ -220,17 +225,17 @@ impl TextBuffer for String {
self.as_ref()
}
fn insert_text(&mut self, text: &str, char_index: usize) -> usize {
fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize {
// Get the byte index from the character index
let byte_idx = byte_index_from_char_index(self.as_str(), char_index);
// Then insert the string
self.insert_str(byte_idx, text);
self.insert_str(byte_idx.into(), text);
text.chars().count()
}
fn delete_char_range(&mut self, char_range: Range<usize>) {
fn delete_char_range(&mut self, char_range: Range<CharIndex>) {
assert!(
char_range.start <= char_range.end,
"start must be <= end, but got {char_range:?}"
@@ -241,7 +246,7 @@ impl TextBuffer for String {
let byte_end = byte_index_from_char_index(self.as_str(), char_range.end);
// Then drain all characters within this range
self.drain(byte_start..byte_end);
self.drain((byte_start..byte_end).as_usize());
}
fn clear(&mut self) {
@@ -270,11 +275,11 @@ impl TextBuffer for Cow<'_, str> {
self.as_ref()
}
fn insert_text(&mut self, text: &str, char_index: usize) -> usize {
fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize {
<String as TextBuffer>::insert_text(self.to_mut(), text, char_index)
}
fn delete_char_range(&mut self, char_range: Range<usize>) {
fn delete_char_range(&mut self, char_range: Range<CharIndex>) {
<String as TextBuffer>::delete_char_range(self.to_mut(), char_range);
}
@@ -305,11 +310,11 @@ impl TextBuffer for &str {
self
}
fn insert_text(&mut self, _text: &str, _ch_idx: usize) -> usize {
fn insert_text(&mut self, _text: &str, _ch_idx: CharIndex) -> usize {
0
}
fn delete_char_range(&mut self, _ch_range: Range<usize>) {}
fn delete_char_range(&mut self, _ch_range: Range<CharIndex>) {}
fn type_id(&self) -> std::any::TypeId {
std::any::TypeId::of::<&str>()

View File

@@ -604,7 +604,8 @@ impl Highlighter {
}
#[cfg(feature = "syntect")]
fn as_byte_range(whole: &str, range: &str) -> std::ops::Range<usize> {
fn as_byte_range(whole: &str, range: &str) -> std::ops::Range<egui::text::ByteIndex> {
use egui::text::ByteIndex;
let whole_start = whole.as_ptr() as usize;
let range_start = range.as_ptr() as usize;
assert!(
@@ -617,7 +618,7 @@ fn as_byte_range(whole: &str, range: &str) -> std::ops::Range<usize> {
range_start + range.len()
);
let offset = range_start - whole_start;
offset..(offset + range.len())
ByteIndex(offset)..ByteIndex(offset + range.len())
}
// ----------------------------------------------------------------------------

View File

@@ -1,5 +1,7 @@
//! Different types of text cursors, i.e. ways to point into a [`super::Galley`].
use super::index::CharIndex;
/// Character cursor.
///
/// The default cursor is zero.
@@ -7,7 +9,7 @@
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct CCursor {
/// Character offset (NOT byte offset!).
pub index: usize,
pub index: CharIndex,
/// If this cursors sits right at the border of a wrapped row break (NOT paragraph break)
/// do we prefer the next row?
@@ -18,9 +20,9 @@ pub struct CCursor {
impl CCursor {
#[inline]
pub fn new(index: usize) -> Self {
pub fn new(index: impl Into<CharIndex>) -> Self {
Self {
index,
index: index.into(),
prefer_next_row: false,
}
}
@@ -83,5 +85,5 @@ pub struct LayoutCursor {
/// Character based (NOT bytes).
/// It is fine if this points to something beyond the end of the current row.
/// When moving up/down it may again be within the next row.
pub column: usize,
pub column: CharIndex,
}

View File

@@ -10,7 +10,7 @@ use std::{
use crate::{
TextureAtlas,
text::{
Galley, LayoutJob, LayoutSection, TextOptions, VariationCoords,
ByteIndex, Galley, LayoutJob, LayoutSection, TextOptions, VariationCoords,
font::{Font, FontFace},
},
};
@@ -1070,10 +1070,10 @@ impl GalleyCache {
// `start` and `end` are the byte range of the current paragraph.
// How does the current section overlap with the paragraph range?
if section_range.end <= start {
if section_range.end <= ByteIndex(start) {
// The section is behind us
current_section += 1;
} else if end < section_range.start {
} else if ByteIndex(end) < section_range.start {
break; // Haven't reached this one yet.
} else {
// Section range overlaps with paragraph range
@@ -1082,13 +1082,13 @@ impl GalleyCache {
"Bad byte_range: {section_range:?}"
);
let new_range = section_range.start.saturating_sub(start)
..(section_range.end.at_most(end)).saturating_sub(start);
..(section_range.end.min(ByteIndex(end))).saturating_sub(start);
debug_assert!(
new_range.start <= new_range.end,
"Bad new section range: {new_range:?}"
);
paragraph_job.sections.push(LayoutSection {
leading_space: if start <= section_range.start {
leading_space: if ByteIndex(start) <= section_range.start {
*leading_space
} else {
0.0

View File

@@ -0,0 +1,202 @@
//! Strongly-typed offsets into text.
//!
//! UTF-8 text can be indexed either by _byte_ offset or by _character_
//! (Unicode scalar) offset. Mixing the two is a common source of bugs,
//! so we use distinct types to keep them apart.
use std::ops::Range;
/// A byte offset into a UTF-8 string.
///
/// This is what you use to slice a [`str`] (e.g. `&text[range.start.0..range.end.0]`).
/// Not to be confused with [`CharIndex`], which counts characters instead of bytes.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize, serde::Serialize),
serde(transparent)
)]
pub struct ByteIndex(pub usize);
/// A character (Unicode scalar) offset into a string.
///
/// Counts characters, not bytes, so it is independent of the UTF-8 encoding.
/// Not to be confused with [`ByteIndex`]. See also [`super::cursor::CCursor`].
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize, serde::Serialize),
serde(transparent)
)]
pub struct CharIndex(pub usize);
macro_rules! impl_text_index {
($Type:ident) => {
impl $Type {
/// The zero offset, i.e. the very start of the text.
pub const ZERO: Self = Self(0);
/// Saturating integer addition.
#[inline]
pub fn saturating_add(self, rhs: usize) -> Self {
Self(self.0.saturating_add(rhs))
}
/// Saturating integer subtraction.
#[inline]
pub fn saturating_sub(self, rhs: usize) -> Self {
Self(self.0.saturating_sub(rhs))
}
}
impl From<usize> for $Type {
#[inline]
fn from(index: usize) -> Self {
Self(index)
}
}
impl From<$Type> for usize {
#[inline]
fn from(index: $Type) -> Self {
index.0
}
}
impl std::ops::Add<usize> for $Type {
type Output = Self;
#[inline]
fn add(self, rhs: usize) -> Self {
Self(self.0 + rhs)
}
}
/// Compose offsets, e.g. a base position plus a relative one.
impl std::ops::Add<$Type> for $Type {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self {
Self(self.0 + rhs.0)
}
}
impl std::ops::Sub<usize> for $Type {
type Output = Self;
#[inline]
fn sub(self, rhs: usize) -> Self {
Self(self.0 - rhs)
}
}
impl std::ops::Sub<$Type> for $Type {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self {
Self(self.0 - rhs.0)
}
}
impl std::ops::AddAssign<usize> for $Type {
#[inline]
fn add_assign(&mut self, rhs: usize) {
self.0 += rhs;
}
}
impl std::ops::AddAssign<$Type> for $Type {
#[inline]
fn add_assign(&mut self, rhs: Self) {
self.0 += rhs.0;
}
}
impl std::ops::SubAssign<usize> for $Type {
#[inline]
fn sub_assign(&mut self, rhs: usize) {
self.0 -= rhs;
}
}
impl std::fmt::Display for $Type {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
};
}
impl_text_index!(ByteIndex);
impl_text_index!(CharIndex);
/// A range of [`ByteIndex`], i.e. a byte range into a [`str`].
pub type ByteRange = Range<ByteIndex>;
/// A range of [`CharIndex`], i.e. a character range into a [`str`].
pub type CharRange = Range<CharIndex>;
/// Extension methods for a [`ByteRange`].
pub trait ByteRangeExt {
/// The full byte range covering `text`, i.e. `0..text.len()`.
fn full(text: &str) -> Self;
/// The `start..end` byte range as plain `usize`, for slicing a [`str`].
fn as_usize(&self) -> Range<usize>;
/// Slice the given string by this byte range.
fn slice<'s>(&self, text: &'s str) -> &'s str;
}
impl ByteRangeExt for ByteRange {
#[inline]
fn full(text: &str) -> Self {
ByteIndex::ZERO..ByteIndex(text.len())
}
#[inline]
fn as_usize(&self) -> Range<usize> {
self.start.0..self.end.0
}
#[inline]
fn slice<'s>(&self, text: &'s str) -> &'s str {
&text[self.as_usize()]
}
}
/// Extension methods for a [`CharRange`].
pub trait CharRangeExt {
/// The full character range covering `text`, i.e. `0..text.chars().count()`.
fn full(text: &str) -> Self;
}
impl CharRangeExt for CharRange {
#[inline]
fn full(text: &str) -> Self {
CharIndex::ZERO..CharIndex(text.chars().count())
}
}
#[cfg(test)]
mod tests {
use super::CharIndex;
#[test]
fn arithmetic() {
// Add a relative offset to a base position:
assert_eq!(CharIndex(2) + CharIndex(3), CharIndex(5));
assert_eq!(CharIndex(2) + 3, CharIndex(5));
let mut idx = CharIndex(2);
idx += CharIndex(3);
assert_eq!(idx, CharIndex(5));
// Subtract a relative offset from a position:
assert_eq!(CharIndex(5) - CharIndex(2), CharIndex(3));
assert_eq!(CharIndex(5) - 2, CharIndex(3));
}
}

View File

@@ -3,6 +3,7 @@
pub mod cursor;
mod font;
mod fonts;
mod index;
mod text_layout;
mod text_layout_types;
@@ -11,6 +12,7 @@ pub use {
FontData, FontDefinitions, FontFamily, FontId, FontInsert, FontPriority, FontTweak, Fonts,
FontsImpl, FontsView, InsertFontFamily,
},
index::{ByteIndex, ByteRange, ByteRangeExt, CharIndex, CharRange, CharRangeExt},
text_layout::*,
text_layout_types::*,
};

View File

@@ -15,8 +15,8 @@ use crate::{
};
use super::{
FontsImpl, Galley, Glyph, LayoutJob, LayoutSection, PlacedRow, Row, RowVisuals,
VariationCoords,
ByteRangeExt as _, FontsImpl, Galley, Glyph, LayoutJob, LayoutSection, PlacedRow, Row,
RowVisuals, VariationCoords,
font::{Font, FontFace, ShapedGlyph},
};
@@ -454,7 +454,7 @@ fn layout_section(
}
paragraph.cursor_x_px += leading_space * pixels_per_point;
let section_text = &job.text[byte_range.clone()];
let section_text = &job.text[byte_range.as_usize()];
let mut ctx = ShapingContext {
pixels_per_point,
font_size,
@@ -1574,7 +1574,7 @@ mod tests {
pixels_per_point,
Arc::new(LayoutJob::single_section(
iter::chain(
(0..elided_galley.rows[0].char_count_excluding_newline()).map(|_| ch),
(0..elided_galley.rows[0].char_count_excluding_newline().0).map(|_| ch),
iter::once('…'),
)
.collect::<String>(),
@@ -1866,7 +1866,7 @@ mod tests {
// Verify cursor round-trip: end cursor index == char count.
assert_eq!(
galley.end().index,
galley.end().index.0,
expected_chars,
"Galley::end().index mismatch for {text:?}",
);
@@ -1892,9 +1892,9 @@ mod tests {
let galley = layout(&mut fonts, pixels_per_point, job.into());
// Walking through every cursor index should produce valid positions.
for i in 0..=galley.end().index {
for i in 0..=galley.end().index.0 {
let cursor = CCursor {
index: i,
index: CharIndex(i),
prefer_next_row: false,
};
let rect = galley.pos_from_cursor(cursor);

View File

@@ -4,6 +4,7 @@ use std::{ops::Range, str::FromStr as _};
use super::{
cursor::{CCursor, LayoutCursor},
font::UvRect,
index::{ByteIndex, ByteRange, ByteRangeExt as _, CharIndex},
};
use crate::{Color32, FontId, Mesh, Stroke, text::FontsView};
use emath::{Align, GuiRounding as _, NumExt as _, OrderedFloat, Pos2, Rect, Vec2, pos2, vec2};
@@ -119,7 +120,7 @@ impl LayoutJob {
Self {
sections: vec![LayoutSection {
leading_space: 0.0,
byte_range: 0..text.len(),
byte_range: ByteRange::full(&text),
format: TextFormat::simple(font_id, color),
}],
text,
@@ -138,7 +139,7 @@ impl LayoutJob {
Self {
sections: vec![LayoutSection {
leading_space: 0.0,
byte_range: 0..text.len(),
byte_range: ByteRange::full(&text),
format,
}],
text,
@@ -153,7 +154,7 @@ impl LayoutJob {
Self {
sections: vec![LayoutSection {
leading_space: 0.0,
byte_range: 0..text.len(),
byte_range: ByteRange::full(&text),
format: TextFormat::simple(font_id, color),
}],
text,
@@ -168,7 +169,7 @@ impl LayoutJob {
Self {
sections: vec![LayoutSection {
leading_space: 0.0,
byte_range: 0..text.len(),
byte_range: ByteRange::full(&text),
format,
}],
text,
@@ -192,7 +193,7 @@ impl LayoutJob {
pub fn append(&mut self, text: &str, leading_space: f32, format: TextFormat) {
let start = self.text.len();
self.text += text;
let byte_range = start..self.text.len();
let byte_range = ByteIndex(start)..ByteIndex(self.text.len());
// Optimization: merge into the previous section if it has the same format
// and this one adds no leading space.
@@ -217,7 +218,7 @@ impl LayoutJob {
///
/// Panics if the job has no sections.
/// Assumes [`LayoutJob::sections`] are ordered by increasing `byte_range` (as produced by [`Self::append`]).
pub fn format_at_byte(&self, byte_idx: usize) -> &TextFormat {
pub fn format_at_byte(&self, byte_idx: ByteIndex) -> &TextFormat {
self.debug_sanity_check();
let last = self.sections.last().expect("LayoutJob has no sections");
let idx = self
@@ -250,12 +251,12 @@ impl LayoutJob {
.expect("checked above")
.byte_range
.start,
0,
ByteIndex::ZERO,
"First LayoutSection must start at byte 0"
);
assert_eq!(
self.sections.last().expect("checked above").byte_range.end,
self.text.len(),
ByteIndex(self.text.len()),
"Last LayoutSection must end at the end of the text"
);
@@ -341,7 +342,7 @@ pub struct LayoutSection {
pub leading_space: f32,
/// Range into [`LayoutJob::text`].
pub byte_range: Range<usize>,
pub byte_range: Range<ByteIndex>,
/// How to format the text in this section (font, color, etc).
pub format: TextFormat,
@@ -946,23 +947,23 @@ impl Row {
/// Excludes the implicit `\n` after the [`Row`], if any.
#[inline]
pub fn char_count_excluding_newline(&self) -> usize {
self.glyphs.len()
pub fn char_count_excluding_newline(&self) -> CharIndex {
CharIndex(self.glyphs.len())
}
/// Closest char at the desired x coordinate in row-relative coordinates.
/// Returns something in the range `[0, char_count_excluding_newline()]`.
pub fn char_at(&self, desired_x: f32) -> usize {
pub fn char_at(&self, desired_x: f32) -> CharIndex {
for (i, glyph) in self.glyphs.iter().enumerate() {
if desired_x < glyph.logical_rect().center().x {
return i;
return CharIndex(i);
}
}
self.char_count_excluding_newline()
}
pub fn x_offset(&self, column: usize) -> f32 {
if let Some(glyph) = self.glyphs.get(column) {
pub fn x_offset(&self, column: CharIndex) -> f32 {
if let Some(glyph) = self.glyphs.get(column.0) {
glyph.pos.x
} else {
self.size.x
@@ -988,8 +989,8 @@ impl PlacedRow {
/// Includes the implicit `\n` after the [`PlacedRow`], if any.
#[inline]
pub fn char_count_including_newline(&self) -> usize {
self.row.glyphs.len() + (self.ends_with_newline as usize)
pub fn char_count_including_newline(&self) -> CharIndex {
CharIndex(self.row.glyphs.len() + (self.ends_with_newline as usize))
}
}
@@ -1188,7 +1189,7 @@ impl Galley {
let mut best_y_dist = f32::INFINITY;
let mut cursor = CCursor::default();
let mut ccursor_index = 0;
let mut ccursor_index = CharIndex::ZERO;
for row in &self.rows {
let min_y = row.min_y();
@@ -1234,7 +1235,7 @@ impl Galley {
return Default::default();
}
let mut ccursor = CCursor {
index: 0,
index: CharIndex::ZERO,
prefer_next_row: true,
};
for row in &self.rows {
@@ -1251,7 +1252,7 @@ impl Galley {
pub fn layout_from_cursor(&self, cursor: CCursor) -> LayoutCursor {
let prefer_next_row = cursor.prefer_next_row;
let mut ccursor_it = CCursor {
index: 0,
index: CharIndex::ZERO,
prefer_next_row,
};
@@ -1294,15 +1295,13 @@ impl Galley {
let prefer_next_row =
layout_cursor.column < self.rows[layout_cursor.row].char_count_excluding_newline();
let mut cursor_it = CCursor {
index: 0,
index: CharIndex::ZERO,
prefer_next_row,
};
for (row_nr, row) in self.rows.iter().enumerate() {
if row_nr == layout_cursor.row {
cursor_it.index += layout_cursor
.column
.at_most(row.char_count_excluding_newline());
cursor_it.index += layout_cursor.column.min(row.char_count_excluding_newline());
return cursor_it;
}
@@ -1316,7 +1315,7 @@ impl Galley {
impl Galley {
#[expect(clippy::unused_self)]
pub fn cursor_left_one_character(&self, cursor: &CCursor) -> CCursor {
if cursor.index == 0 {
if cursor.index == CharIndex::ZERO {
Default::default()
} else {
CCursor {
@@ -1392,7 +1391,7 @@ impl Galley {
let layout_cursor = self.layout_from_cursor(*cursor);
self.cursor_from_layout(LayoutCursor {
row: layout_cursor.row,
column: 0,
column: CharIndex::ZERO,
})
}
@@ -1406,7 +1405,7 @@ impl Galley {
pub fn cursor_begin_of_paragraph(&self, cursor: &CCursor) -> CCursor {
let mut layout_cursor = self.layout_from_cursor(*cursor);
layout_cursor.column = 0;
layout_cursor.column = CharIndex::ZERO;
loop {
let prev_row = layout_cursor