Use strongly typed CharIndex and ByteIndex + bug fixes (#8245)

Less risk of confusing the two. Found and fix a couple real bugs in the process! --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-26 14:49:06 -04:00 · 2026-06-21 02:24:00 +02:00
parent eac51da9ca
commit 13d6b5afcf
15 changed files with 396 additions and 138 deletions
--- a/crates/egui/src/data/output.rs
+++ b/crates/egui/src/data/output.rs
@@ -1,5 +1,9 @@
 //! All the data egui returns to the backend at the end of each frame.

+use std::ops::Range;
+
+use epaint::text::CharIndex;
+
 use crate::{OrderedViewportIdMap, RepaintCause, ViewportOutput, WidgetType};

 /// What egui emits each frame from [`crate::Context::run_ui`].
@@ -554,7 +558,9 @@ pub struct WidgetInfo {
    pub value: Option<f64>,

    /// Selected range of characters in [`Self::current_text_value`].
-    pub text_selection: Option<std::ops::RangeInclusive<usize>>,
+    ///
+    /// The range is `start..end` in *character* offsets (not bytes), with `end` exclusive.
+    pub text_selection: Option<Range<CharIndex>>,

    /// The hint text for text edit fields.
    pub hint_text: Option<String>,
@@ -689,7 +695,7 @@ impl WidgetInfo {
    #[expect(clippy::needless_pass_by_value)]
    pub fn text_selection_changed(
        enabled: bool,
-        text_selection: std::ops::RangeInclusive<usize>,
+        text_selection: Range<CharIndex>,
        current_text_value: impl ToString,
    ) -> Self {
        Self {
--- a/crates/egui/src/lib.rs
+++ b/crates/egui/src/lib.rs
@@ -454,8 +454,8 @@ pub use epaint::{
 pub mod text {
    pub use crate::text_selection::CCursorRange;
    pub use epaint::text::{
-        FontData, FontDefinitions, FontFamily, Fonts, Galley, LayoutJob, LayoutSection, TextFormat,
-        TextWrapping, cursor::CCursor,
+        ByteIndex, CharIndex, FontData, FontDefinitions, FontFamily, Fonts, Galley, LayoutJob,
+        LayoutSection, TextFormat, TextWrapping, cursor::CCursor,
    };
 }

--- a/crates/egui/src/text_selection/accesskit_text.rs
+++ b/crates/egui/src/text_selection/accesskit_text.rs
@@ -1,4 +1,5 @@
 use emath::TSTransform;
+use epaint::text::CharIndex;

 use crate::{Context, Galley, Id};

@@ -9,7 +10,8 @@ pub(crate) const MAX_CHARS_PER_TEXT_RUN: usize = 255;

 /// Convert a (row, column) layout cursor position to a text run node ID and character index,
 /// accounting for rows that are split into multiple text runs.
-fn text_run_position(parent_id: Id, row: usize, column: usize) -> accesskit::TextPosition {
+fn text_run_position(parent_id: Id, row: usize, column: CharIndex) -> accesskit::TextPosition {
+    let column = column.0;
    // When column lands exactly on a chunk boundary (e.g., 255), it refers to
    // the end of the previous chunk, not the start of a new one.
    let chunk_index = if column > 0 && column.is_multiple_of(MAX_CHARS_PER_TEXT_RUN) {
--- a/crates/egui/src/text_selection/cursor_range.rs
+++ b/crates/egui/src/text_selection/cursor_range.rs
@@ -1,4 +1,4 @@
-use epaint::{Galley, text::cursor::CCursor};
+use epaint::{Galley, text::CharIndex, text::cursor::CCursor};

 use crate::{Event, Id, Key, Modifiers, os::OperatingSystem};

@@ -49,7 +49,7 @@ impl CCursorRange {
    }

    /// The range of selected character indices.
-    pub fn as_sorted_char_range(&self) -> std::ops::Range<usize> {
+    pub fn as_sorted_char_range(&self) -> std::ops::Range<CharIndex> {
        let [start, end] = self.sorted_cursors();
        std::ops::Range {
            start: start.index,
@@ -237,7 +237,7 @@ fn ccursor_from_accesskit_text_position(
            if run_id.accesskit_id() == position.node {
                let column = chunk_idx * MAX_CHARS_PER_TEXT_RUN + position.character_index;
                return Some(CCursor {
-                    index: total_length + column,
+                    index: CharIndex(total_length + column),
                    prefer_next_row: !(column == row.glyphs.len()
                        && !row.ends_with_newline
                        && (i + 1) < galley.rows.len()),
--- a/crates/egui/src/text_selection/text_cursor_state.rs
+++ b/crates/egui/src/text_selection/text_cursor_state.rs
@@ -1,6 +1,6 @@
 //! Text cursor changes/interaction, without modifying the text.

-use epaint::text::{Galley, cursor::CCursor};
+use epaint::text::{ByteIndex, ByteRangeExt as _, CharIndex, Galley, cursor::CCursor};
 use unicode_segmentation::UnicodeSegmentation as _;

 use crate::{NumExt as _, Rect, Response, Ui, epaint};
@@ -129,11 +129,11 @@ fn select_word_at(text: &str, ccursor: CCursor) -> CCursorRange {
 }

 fn select_line_at(text: &str, ccursor: CCursor) -> CCursorRange {
-    if ccursor.index == 0 {
+    if ccursor.index == CharIndex::ZERO {
        CCursorRange::two(ccursor, ccursor_next_line(text, ccursor))
    } else {
        let it = text.chars();
-        let mut it = it.skip(ccursor.index - 1);
+        let mut it = it.skip(ccursor.index.0 - 1);
        if let Some(char_before_cursor) = it.next() {
            if let Some(char_after_cursor) = it.next() {
                if (!is_linebreak(char_before_cursor)) && (!is_linebreak(char_after_cursor)) {
@@ -178,26 +178,26 @@ fn ccursor_next_line(text: &str, ccursor: CCursor) -> CCursor {
 }

 pub fn ccursor_previous_word(text: &str, ccursor: CCursor) -> CCursor {
-    let num_chars = text.chars().count();
+    let num_chars = CharIndex(text.chars().count());
    let reversed: String = text.graphemes(true).rev().collect();
+    let boundary = next_word_boundary_char_index(&reversed, num_chars - ccursor.index);
    CCursor {
-        index: num_chars
-            - next_word_boundary_char_index(&reversed, num_chars - ccursor.index).min(num_chars),
+        index: num_chars - boundary.min(num_chars),
        prefer_next_row: true,
    }
 }

 fn ccursor_previous_line(text: &str, ccursor: CCursor) -> CCursor {
-    let num_chars = text.chars().count();
+    let num_chars = CharIndex(text.chars().count());
+    let boundary = next_line_boundary_char_index(text.chars().rev(), num_chars - ccursor.index);
    CCursor {
-        index: num_chars
-            - next_line_boundary_char_index(text.chars().rev(), num_chars - ccursor.index),
+        index: num_chars - boundary,
        prefer_next_row: true,
    }
 }

-fn next_word_boundary_char_index(text: &str, cursor_ci: usize) -> usize {
-    let mut current_char_idx = 0;
+fn next_word_boundary_char_index(text: &str, cursor_ci: CharIndex) -> CharIndex {
+    let mut current_char_idx = CharIndex::ZERO;

    for (_word_byte_index, word) in text.split_word_bound_indices() {
        let word_ci = current_char_idx;
@@ -231,8 +231,11 @@ fn all_word_chars(text: &str) -> bool {
    text.chars().all(is_word_char)
 }

-fn next_line_boundary_char_index(it: impl Iterator<Item = char>, mut index: usize) -> usize {
-    let mut it = it.skip(index);
+fn next_line_boundary_char_index(
+    it: impl Iterator<Item = char>,
+    mut index: CharIndex,
+) -> CharIndex {
+    let mut it = it.skip(index.0);
    if let Some(_first) = it.next() {
        index += 1;

@@ -260,36 +263,38 @@ fn is_linebreak(c: char) -> bool {
 /// Accepts and returns character offset (NOT byte offset!).
 pub fn find_line_start(text: &str, current_index: CCursor) -> CCursor {
    let byte_idx = byte_index_from_char_index(text, current_index.index);
-    let text_before = &text[..byte_idx];
+    let text_before = (ByteIndex::ZERO..byte_idx).slice(text);

    if let Some(last_newline_byte) = text_before.rfind('\n') {
-        let char_idx = char_index_from_byte_index(text, last_newline_byte + 1);
+        let char_idx = char_index_from_byte_index(text, ByteIndex(last_newline_byte + 1));
        CCursor::new(char_idx)
    } else {
        CCursor::new(0)
    }
 }

-pub fn byte_index_from_char_index(s: &str, char_index: usize) -> usize {
+pub fn byte_index_from_char_index(s: &str, char_index: CharIndex) -> ByteIndex {
    for (ci, (bi, _)) in s.char_indices().enumerate() {
-        if ci == char_index {
-            return bi;
+        if ci == char_index.0 {
+            return ByteIndex(bi);
        }
    }
-    s.len()
+    ByteIndex(s.len())
 }

-pub fn char_index_from_byte_index(input: &str, byte_index: usize) -> usize {
+pub fn char_index_from_byte_index(input: &str, byte_index: ByteIndex) -> CharIndex {
    for (ci, (bi, _)) in input.char_indices().enumerate() {
-        if bi == byte_index {
-            return ci;
+        if bi == byte_index.0 {
+            return CharIndex(ci);
        }
    }

-    input.char_indices().last().map_or(0, |(i, _)| i + 1)
+    // `byte_index` is at or past the end of the string (or not on a char boundary):
+    // return the total number of characters.
+    CharIndex(input.chars().count())
 }

-pub fn slice_char_range(s: &str, char_range: std::ops::Range<usize>) -> &str {
+pub fn slice_char_range(s: &str, char_range: std::ops::Range<CharIndex>) -> &str {
    assert!(
        char_range.start <= char_range.end,
        "Invalid range, start must be less than end, but start = {}, end = {}",
@@ -298,7 +303,7 @@ pub fn slice_char_range(s: &str, char_range: std::ops::Range<usize>) -> &str {
    );
    let start_byte = byte_index_from_char_index(s, char_range.start);
    let end_byte = byte_index_from_char_index(s, char_range.end);
-    &s[start_byte..end_byte]
+    (start_byte..end_byte).slice(s)
 }

 /// The thin rectangle of one end of the selection, e.g. the primary cursor, in local galley coordinates.
@@ -321,21 +326,21 @@ mod test {
    fn test_next_word_boundary_char_index() {
        // ASCII only
        let text = "abc d3f g_h i-j";
-        assert_eq!(next_word_boundary_char_index(text, 1), 3);
-        assert_eq!(next_word_boundary_char_index(text, 3), 7);
-        assert_eq!(next_word_boundary_char_index(text, 9), 11);
-        assert_eq!(next_word_boundary_char_index(text, 12), 13);
-        assert_eq!(next_word_boundary_char_index(text, 13), 15);
-        assert_eq!(next_word_boundary_char_index(text, 15), 15);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(1)).0, 3);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(3)).0, 7);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(9)).0, 11);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(12)).0, 13);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(13)).0, 15);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(15)).0, 15);

-        assert_eq!(next_word_boundary_char_index("", 0), 0);
-        assert_eq!(next_word_boundary_char_index("", 1), 0);
+        assert_eq!(next_word_boundary_char_index("", CharIndex(0)).0, 0);
+        assert_eq!(next_word_boundary_char_index("", CharIndex(1)).0, 0);

        // ASCII only
        let text = "abc.def.ghi";
-        assert_eq!(next_word_boundary_char_index(text, 1), 3);
-        assert_eq!(next_word_boundary_char_index(text, 3), 7);
-        assert_eq!(next_word_boundary_char_index(text, 7), 11);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(1)).0, 3);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(3)).0, 7);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(7)).0, 11);

        // Unicode graphemes, some of which consist of multiple Unicode characters,
        // !!! Unicode character is not always what is tranditionally considered a character,
@@ -343,32 +348,66 @@ mod test {
        // handling of and around emojis is kind of weird and is not consistent across
        // text editors and browsers
        let text = "❤️👍 skvělá knihovna 👍❤️";
-        assert_eq!(next_word_boundary_char_index(text, 0), 2);
-        assert_eq!(next_word_boundary_char_index(text, 2), 3); // this does not skip the space between thumbs-up and 'skvělá'
-        assert_eq!(next_word_boundary_char_index(text, 6), 10);
-        assert_eq!(next_word_boundary_char_index(text, 9), 10);
-        assert_eq!(next_word_boundary_char_index(text, 12), 19);
-        assert_eq!(next_word_boundary_char_index(text, 15), 19);
-        assert_eq!(next_word_boundary_char_index(text, 19), 20);
-        assert_eq!(next_word_boundary_char_index(text, 20), 21);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(0)).0, 2);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(2)).0, 3); // this does not skip the space between thumbs-up and 'skvělá'
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(6)).0, 10);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(9)).0, 10);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(12)).0, 19);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(15)).0, 19);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(19)).0, 20);
+        assert_eq!(next_word_boundary_char_index(text, CharIndex(20)).0, 21);
    }

    #[test]
    fn test_previous_word() {
        let text = "abc def ghi";
-        assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index, 4);
-        assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index, 4);
-        assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index, 0);
-        assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index, 0);
+        assert_eq!(ccursor_previous_word(text, CCursor::new(7)).index.0, 4);
+        assert_eq!(ccursor_previous_word(text, CCursor::new(5)).index.0, 4);
+        assert_eq!(ccursor_previous_word(text, CCursor::new(4)).index.0, 0);
+        assert_eq!(ccursor_previous_word(text, CCursor::new(0)).index.0, 0);
    }

    #[test]
    fn test_next_word() {
        let text = "abc def ghi";
-        assert_eq!(ccursor_next_word(text, CCursor::new(0)).index, 3);
-        assert_eq!(ccursor_next_word(text, CCursor::new(3)).index, 7);
-        assert_eq!(ccursor_next_word(text, CCursor::new(7)).index, 11);
-        assert_eq!(ccursor_next_word(text, CCursor::new(11)).index, 11);
+        assert_eq!(ccursor_next_word(text, CCursor::new(0)).index.0, 3);
+        assert_eq!(ccursor_next_word(text, CCursor::new(3)).index.0, 7);
+        assert_eq!(ccursor_next_word(text, CCursor::new(7)).index.0, 11);
+        assert_eq!(ccursor_next_word(text, CCursor::new(11)).index.0, 11);
+    }
+
+    #[test]
+    fn test_index_conversion_roundtrip() {
+        // "é" is 2 bytes, "👍" is 4 bytes.
+        let text = "aé👍b";
+        let char_count = text.chars().count(); // 4
+        assert_eq!(char_count, 4);
+
+        // char -> byte, including the end index
+        assert_eq!(byte_index_from_char_index(text, CharIndex(0)).0, 0);
+        assert_eq!(byte_index_from_char_index(text, CharIndex(1)).0, 1);
+        assert_eq!(byte_index_from_char_index(text, CharIndex(2)).0, 3);
+        assert_eq!(byte_index_from_char_index(text, CharIndex(3)).0, 7);
+        assert_eq!(byte_index_from_char_index(text, CharIndex(4)).0, 8);
+        // Past the end clamps to the byte length:
+        assert_eq!(
+            byte_index_from_char_index(text, CharIndex(99)).0,
+            text.len()
+        );
+
+        // byte -> char, including the end index
+        assert_eq!(char_index_from_byte_index(text, ByteIndex(0)).0, 0);
+        assert_eq!(char_index_from_byte_index(text, ByteIndex(1)).0, 1);
+        assert_eq!(char_index_from_byte_index(text, ByteIndex(3)).0, 2);
+        assert_eq!(char_index_from_byte_index(text, ByteIndex(7)).0, 3);
+        // The end byte index must map to the character count, not to some byte offset:
+        assert_eq!(char_index_from_byte_index(text, ByteIndex(text.len())).0, 4);
+        // Past the end clamps to the character count:
+        assert_eq!(char_index_from_byte_index(text, ByteIndex(99)).0, 4);
+
+        // Empty string:
+        assert_eq!(byte_index_from_char_index("", CharIndex(0)).0, 0);
+        assert_eq!(char_index_from_byte_index("", ByteIndex(0)).0, 0);
    }

    #[test]
@@ -380,16 +419,16 @@ mod test {
            range.primary.index.min(range.secondary.index),
            range.primary.index.max(range.secondary.index),
        );
-        assert_eq!(lo, 0);
-        assert_eq!(hi, 5);
+        assert_eq!(lo.0, 0);
+        assert_eq!(hi.0, 5);

        let range = select_word_at(text, CCursor::new(8));
        let (lo, hi) = (
            range.primary.index.min(range.secondary.index),
            range.primary.index.max(range.secondary.index),
        );
-        assert_eq!(lo, 6);
-        assert_eq!(hi, 11);
+        assert_eq!(lo.0, 6);
+        assert_eq!(hi.0, 11);
    }

    #[test]
@@ -401,10 +440,10 @@ mod test {
        let start = std::time::Instant::now();

        let next = ccursor_next_word(&large_text, CCursor::new(len - 10));
-        assert!(next.index <= len);
+        assert!(next.index.0 <= len);

        let prev = ccursor_previous_word(&large_text, CCursor::new(len - 10));
-        assert!(prev.index < len);
+        assert!(prev.index.0 < len);

        let range = select_word_at(&large_text, CCursor::new(len - 3));
        let lo = range.primary.index.min(range.secondary.index);
@@ -459,9 +498,9 @@ mod tests {
        for (text, cursor, expected) in cases {
            let result = ccursor_previous_word(text, CCursor::new(cursor));
            assert_eq!(
-                result.index, expected,
+                result.index.0, expected,
                "text={text:?}, cursor={cursor}, got={}, expected={expected}",
-                result.index
+                result.index.0
            );
        }
    }
--- a/crates/egui/src/text_selection/visuals.rs
+++ b/crates/egui/src/text_selection/visuals.rs
@@ -57,9 +57,9 @@ pub fn paint_text_selection(

        if !row.glyphs.is_empty() {
            // Change color of the selected text:
-            let first_glyph_index = if ri == min.row { min.column } else { 0 };
+            let first_glyph_index = if ri == min.row { min.column.0 } else { 0 };
            let last_glyph_index = if ri == max.row {
-                max.column
+                max.column.0
            } else {
                row.glyphs.len()
            };
--- a/crates/egui/src/widgets/text_edit/builder.rs
+++ b/crates/egui/src/widgets/text_edit/builder.rs
@@ -899,7 +899,7 @@ impl TextEdit<'_> {
                )
            });
        } else if selection_changed && let Some(cursor_range) = cursor_range {
-            let char_range = cursor_range.primary.index..=cursor_range.secondary.index;
+            let char_range = cursor_range.as_sorted_char_range();
            let info = WidgetInfo::text_selection_changed(
                ui.is_enabled(),
                char_range,
--- a/crates/egui/src/widgets/text_edit/text_buffer.rs
+++ b/crates/egui/src/widgets/text_edit/text_buffer.rs
@@ -1,6 +1,11 @@
 use std::{borrow::Cow, ops::Range};

-use epaint::{Galley, text::cursor::CCursor};
+use epaint::{
+    Galley,
+    text::{
+        ByteIndex, ByteRangeExt as _, CharIndex, CharRange, CharRangeExt as _, cursor::CCursor,
+    },
+};

 /// One `\t` character is this many spaces wide (for indentation purposes).
 const TAB_SIZE: usize = 4;
@@ -31,36 +36,36 @@ pub trait TextBuffer {
    ///
    /// # Return
    /// Returns how many *characters* were successfully inserted
-    fn insert_text(&mut self, text: &str, char_index: usize) -> usize;
+    fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize;

    /// Deletes a range of text `char_range` from this buffer.
    ///
    /// # Notes
    /// `char_range` is a *character range*, not a byte range.
-    fn delete_char_range(&mut self, char_range: Range<usize>);
+    fn delete_char_range(&mut self, char_range: Range<CharIndex>);

    /// Reads the given character range.
-    fn char_range(&self, char_range: Range<usize>) -> &str {
+    fn char_range(&self, char_range: Range<CharIndex>) -> &str {
        slice_char_range(self.as_str(), char_range)
    }

-    fn byte_index_from_char_index(&self, char_index: usize) -> usize {
+    fn byte_index_from_char_index(&self, char_index: CharIndex) -> ByteIndex {
        byte_index_from_char_index(self.as_str(), char_index)
    }

-    fn char_index_from_byte_index(&self, char_index: usize) -> usize {
-        char_index_from_byte_index(self.as_str(), char_index)
+    fn char_index_from_byte_index(&self, byte_index: ByteIndex) -> CharIndex {
+        char_index_from_byte_index(self.as_str(), byte_index)
    }

    /// Clears all characters in this buffer
    fn clear(&mut self) {
-        self.delete_char_range(0..self.as_str().len());
+        self.delete_char_range(CharRange::full(self.as_str()));
    }

    /// Replaces all contents of this string with `text`
    fn replace_with(&mut self, text: &str) {
        self.clear();
-        self.insert_text(text, 0);
+        self.insert_text(text, CharIndex(0));
    }

    /// Clears all characters in this buffer and returns a string of the contents.
@@ -90,12 +95,12 @@ pub trait TextBuffer {
    fn decrease_indentation(&mut self, ccursor: &mut CCursor) {
        let line_start = find_line_start(self.as_str(), *ccursor);

-        let remove_len = if self.as_str().chars().nth(line_start.index) == Some('\t') {
+        let remove_len = if self.as_str().chars().nth(line_start.index.0) == Some('\t') {
            Some(1)
        } else if self
            .as_str()
            .chars()
-            .skip(line_start.index)
+            .skip(line_start.index.0)
            .take(TAB_SIZE)
            .all(|c| c == ' ')
        {
@@ -126,7 +131,7 @@ pub trait TextBuffer {
    }

    fn delete_previous_char(&mut self, ccursor: CCursor) -> CCursor {
-        if ccursor.index > 0 {
+        if CharIndex::ZERO < ccursor.index {
            let max_ccursor = ccursor;
            let min_ccursor = max_ccursor - 1;
            self.delete_selected_ccursor_range([min_ccursor, max_ccursor])
@@ -190,8 +195,8 @@ pub trait TextBuffer {
    /// impl TextBuffer for ExampleBuffer {
    ///     fn is_mutable(&self) -> bool { unimplemented!() }
    ///     fn as_str(&self) -> &str { unimplemented!() }
-    ///     fn insert_text(&mut self, text: &str, char_index: usize) -> usize { unimplemented!() }
-    ///     fn delete_char_range(&mut self, char_range: std::ops::Range<usize>) { unimplemented!() }
+    ///     fn insert_text(&mut self, text: &str, char_index: egui::text::CharIndex) -> usize { unimplemented!() }
+    ///     fn delete_char_range(&mut self, char_range: std::ops::Range<egui::text::CharIndex>) { unimplemented!() }
    ///
    ///     // Implement it like the following:
    ///     fn type_id(&self) -> TypeId {
@@ -220,17 +225,17 @@ impl TextBuffer for String {
        self.as_ref()
    }

-    fn insert_text(&mut self, text: &str, char_index: usize) -> usize {
+    fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize {
        // Get the byte index from the character index
        let byte_idx = byte_index_from_char_index(self.as_str(), char_index);

        // Then insert the string
-        self.insert_str(byte_idx, text);
+        self.insert_str(byte_idx.into(), text);

        text.chars().count()
    }

-    fn delete_char_range(&mut self, char_range: Range<usize>) {
+    fn delete_char_range(&mut self, char_range: Range<CharIndex>) {
        assert!(
            char_range.start <= char_range.end,
            "start must be <= end, but got {char_range:?}"
@@ -241,7 +246,7 @@ impl TextBuffer for String {
        let byte_end = byte_index_from_char_index(self.as_str(), char_range.end);

        // Then drain all characters within this range
-        self.drain(byte_start..byte_end);
+        self.drain((byte_start..byte_end).as_usize());
    }

    fn clear(&mut self) {
@@ -270,11 +275,11 @@ impl TextBuffer for Cow<'_, str> {
        self.as_ref()
    }

-    fn insert_text(&mut self, text: &str, char_index: usize) -> usize {
+    fn insert_text(&mut self, text: &str, char_index: CharIndex) -> usize {
        <String as TextBuffer>::insert_text(self.to_mut(), text, char_index)
    }

-    fn delete_char_range(&mut self, char_range: Range<usize>) {
+    fn delete_char_range(&mut self, char_range: Range<CharIndex>) {
        <String as TextBuffer>::delete_char_range(self.to_mut(), char_range);
    }

@@ -305,11 +310,11 @@ impl TextBuffer for &str {
        self
    }

-    fn insert_text(&mut self, _text: &str, _ch_idx: usize) -> usize {
+    fn insert_text(&mut self, _text: &str, _ch_idx: CharIndex) -> usize {
        0
    }

-    fn delete_char_range(&mut self, _ch_range: Range<usize>) {}
+    fn delete_char_range(&mut self, _ch_range: Range<CharIndex>) {}

    fn type_id(&self) -> std::any::TypeId {
        std::any::TypeId::of::<&str>()
--- a/crates/egui_extras/src/syntax_highlighting.rs
+++ b/crates/egui_extras/src/syntax_highlighting.rs
@@ -604,7 +604,8 @@ impl Highlighter {
 }

 #[cfg(feature = "syntect")]
-fn as_byte_range(whole: &str, range: &str) -> std::ops::Range<usize> {
+fn as_byte_range(whole: &str, range: &str) -> std::ops::Range<egui::text::ByteIndex> {
+    use egui::text::ByteIndex;
    let whole_start = whole.as_ptr() as usize;
    let range_start = range.as_ptr() as usize;
    assert!(
@@ -617,7 +618,7 @@ fn as_byte_range(whole: &str, range: &str) -> std::ops::Range<usize> {
        range_start + range.len()
    );
    let offset = range_start - whole_start;
-    offset..(offset + range.len())
+    ByteIndex(offset)..ByteIndex(offset + range.len())
 }

 // ----------------------------------------------------------------------------
--- a/crates/epaint/src/text/cursor.rs
+++ b/crates/epaint/src/text/cursor.rs
@@ -1,5 +1,7 @@
 //! Different types of text cursors, i.e. ways to point into a [`super::Galley`].

+use super::index::CharIndex;
+
 /// Character cursor.
 ///
 /// The default cursor is zero.
@@ -7,7 +9,7 @@
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
 pub struct CCursor {
    /// Character offset (NOT byte offset!).
-    pub index: usize,
+    pub index: CharIndex,

    /// If this cursors sits right at the border of a wrapped row break (NOT paragraph break)
    /// do we prefer the next row?
@@ -18,9 +20,9 @@ pub struct CCursor {

 impl CCursor {
    #[inline]
-    pub fn new(index: usize) -> Self {
+    pub fn new(index: impl Into<CharIndex>) -> Self {
        Self {
-            index,
+            index: index.into(),
            prefer_next_row: false,
        }
    }
@@ -83,5 +85,5 @@ pub struct LayoutCursor {
    /// Character based (NOT bytes).
    /// It is fine if this points to something beyond the end of the current row.
    /// When moving up/down it may again be within the next row.
-    pub column: usize,
+    pub column: CharIndex,
 }
--- a/crates/epaint/src/text/fonts.rs
+++ b/crates/epaint/src/text/fonts.rs
@@ -10,7 +10,7 @@ use std::{
 use crate::{
    TextureAtlas,
    text::{
-        Galley, LayoutJob, LayoutSection, TextOptions, VariationCoords,
+        ByteIndex, Galley, LayoutJob, LayoutSection, TextOptions, VariationCoords,
        font::{Font, FontFace},
    },
 };
@@ -1070,10 +1070,10 @@ impl GalleyCache {
                // `start` and `end` are the byte range of the current paragraph.
                // How does the current section overlap with the paragraph range?

-                if section_range.end <= start {
+                if section_range.end <= ByteIndex(start) {
                    // The section is behind us
                    current_section += 1;
-                } else if end < section_range.start {
+                } else if ByteIndex(end) < section_range.start {
                    break; // Haven't reached this one yet.
                } else {
                    // Section range overlaps with paragraph range
@@ -1082,13 +1082,13 @@ impl GalleyCache {
                        "Bad byte_range: {section_range:?}"
                    );
                    let new_range = section_range.start.saturating_sub(start)
-                        ..(section_range.end.at_most(end)).saturating_sub(start);
+                        ..(section_range.end.min(ByteIndex(end))).saturating_sub(start);
                    debug_assert!(
                        new_range.start <= new_range.end,
                        "Bad new section range: {new_range:?}"
                    );
                    paragraph_job.sections.push(LayoutSection {
-                        leading_space: if start <= section_range.start {
+                        leading_space: if ByteIndex(start) <= section_range.start {
                            *leading_space
                        } else {
                            0.0
--- a/crates/epaint/src/text/index.rs
+++ b/crates/epaint/src/text/index.rs
@@ -0,0 +1,202 @@
+//! Strongly-typed offsets into text.
+//!
+//! UTF-8 text can be indexed either by _byte_ offset or by _character_
+//! (Unicode scalar) offset. Mixing the two is a common source of bugs,
+//! so we use distinct types to keep them apart.
+
+use std::ops::Range;
+
+/// A byte offset into a UTF-8 string.
+///
+/// This is what you use to slice a [`str`] (e.g. `&text[range.start.0..range.end.0]`).
+/// Not to be confused with [`CharIndex`], which counts characters instead of bytes.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(
+    feature = "serde",
+    derive(serde::Deserialize, serde::Serialize),
+    serde(transparent)
+)]
+pub struct ByteIndex(pub usize);
+
+/// A character (Unicode scalar) offset into a string.
+///
+/// Counts characters, not bytes, so it is independent of the UTF-8 encoding.
+/// Not to be confused with [`ByteIndex`]. See also [`super::cursor::CCursor`].
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(
+    feature = "serde",
+    derive(serde::Deserialize, serde::Serialize),
+    serde(transparent)
+)]
+pub struct CharIndex(pub usize);
+
+macro_rules! impl_text_index {
+    ($Type:ident) => {
+        impl $Type {
+            /// The zero offset, i.e. the very start of the text.
+            pub const ZERO: Self = Self(0);
+
+            /// Saturating integer addition.
+            #[inline]
+            pub fn saturating_add(self, rhs: usize) -> Self {
+                Self(self.0.saturating_add(rhs))
+            }
+
+            /// Saturating integer subtraction.
+            #[inline]
+            pub fn saturating_sub(self, rhs: usize) -> Self {
+                Self(self.0.saturating_sub(rhs))
+            }
+        }
+
+        impl From<usize> for $Type {
+            #[inline]
+            fn from(index: usize) -> Self {
+                Self(index)
+            }
+        }
+
+        impl From<$Type> for usize {
+            #[inline]
+            fn from(index: $Type) -> Self {
+                index.0
+            }
+        }
+
+        impl std::ops::Add<usize> for $Type {
+            type Output = Self;
+
+            #[inline]
+            fn add(self, rhs: usize) -> Self {
+                Self(self.0 + rhs)
+            }
+        }
+
+        /// Compose offsets, e.g. a base position plus a relative one.
+        impl std::ops::Add<$Type> for $Type {
+            type Output = Self;
+
+            #[inline]
+            fn add(self, rhs: Self) -> Self {
+                Self(self.0 + rhs.0)
+            }
+        }
+
+        impl std::ops::Sub<usize> for $Type {
+            type Output = Self;
+
+            #[inline]
+            fn sub(self, rhs: usize) -> Self {
+                Self(self.0 - rhs)
+            }
+        }
+
+        impl std::ops::Sub<$Type> for $Type {
+            type Output = Self;
+
+            #[inline]
+            fn sub(self, rhs: Self) -> Self {
+                Self(self.0 - rhs.0)
+            }
+        }
+
+        impl std::ops::AddAssign<usize> for $Type {
+            #[inline]
+            fn add_assign(&mut self, rhs: usize) {
+                self.0 += rhs;
+            }
+        }
+
+        impl std::ops::AddAssign<$Type> for $Type {
+            #[inline]
+            fn add_assign(&mut self, rhs: Self) {
+                self.0 += rhs.0;
+            }
+        }
+
+        impl std::ops::SubAssign<usize> for $Type {
+            #[inline]
+            fn sub_assign(&mut self, rhs: usize) {
+                self.0 -= rhs;
+            }
+        }
+
+        impl std::fmt::Display for $Type {
+            #[inline]
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                self.0.fmt(f)
+            }
+        }
+    };
+}
+
+impl_text_index!(ByteIndex);
+impl_text_index!(CharIndex);
+
+/// A range of [`ByteIndex`], i.e. a byte range into a [`str`].
+pub type ByteRange = Range<ByteIndex>;
+
+/// A range of [`CharIndex`], i.e. a character range into a [`str`].
+pub type CharRange = Range<CharIndex>;
+
+/// Extension methods for a [`ByteRange`].
+pub trait ByteRangeExt {
+    /// The full byte range covering `text`, i.e. `0..text.len()`.
+    fn full(text: &str) -> Self;
+
+    /// The `start..end` byte range as plain `usize`, for slicing a [`str`].
+    fn as_usize(&self) -> Range<usize>;
+
+    /// Slice the given string by this byte range.
+    fn slice<'s>(&self, text: &'s str) -> &'s str;
+}
+
+impl ByteRangeExt for ByteRange {
+    #[inline]
+    fn full(text: &str) -> Self {
+        ByteIndex::ZERO..ByteIndex(text.len())
+    }
+
+    #[inline]
+    fn as_usize(&self) -> Range<usize> {
+        self.start.0..self.end.0
+    }
+
+    #[inline]
+    fn slice<'s>(&self, text: &'s str) -> &'s str {
+        &text[self.as_usize()]
+    }
+}
+
+/// Extension methods for a [`CharRange`].
+pub trait CharRangeExt {
+    /// The full character range covering `text`, i.e. `0..text.chars().count()`.
+    fn full(text: &str) -> Self;
+}
+
+impl CharRangeExt for CharRange {
+    #[inline]
+    fn full(text: &str) -> Self {
+        CharIndex::ZERO..CharIndex(text.chars().count())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::CharIndex;
+
+    #[test]
+    fn arithmetic() {
+        // Add a relative offset to a base position:
+        assert_eq!(CharIndex(2) + CharIndex(3), CharIndex(5));
+        assert_eq!(CharIndex(2) + 3, CharIndex(5));
+
+        let mut idx = CharIndex(2);
+        idx += CharIndex(3);
+        assert_eq!(idx, CharIndex(5));
+
+        // Subtract a relative offset from a position:
+        assert_eq!(CharIndex(5) - CharIndex(2), CharIndex(3));
+        assert_eq!(CharIndex(5) - 2, CharIndex(3));
+    }
+}
--- a/crates/epaint/src/text/mod.rs
+++ b/crates/epaint/src/text/mod.rs
@@ -3,6 +3,7 @@
 pub mod cursor;
 mod font;
 mod fonts;
+mod index;
 mod text_layout;
 mod text_layout_types;

@@ -11,6 +12,7 @@ pub use {
        FontData, FontDefinitions, FontFamily, FontId, FontInsert, FontPriority, FontTweak, Fonts,
        FontsImpl, FontsView, InsertFontFamily,
    },
+    index::{ByteIndex, ByteRange, ByteRangeExt, CharIndex, CharRange, CharRangeExt},
    text_layout::*,
    text_layout_types::*,
 };
--- a/crates/epaint/src/text/text_layout.rs
+++ b/crates/epaint/src/text/text_layout.rs
@@ -15,8 +15,8 @@ use crate::{
 };

 use super::{
-    FontsImpl, Galley, Glyph, LayoutJob, LayoutSection, PlacedRow, Row, RowVisuals,
-    VariationCoords,
+    ByteRangeExt as _, FontsImpl, Galley, Glyph, LayoutJob, LayoutSection, PlacedRow, Row,
+    RowVisuals, VariationCoords,
    font::{Font, FontFace, ShapedGlyph},
 };

@@ -454,7 +454,7 @@ fn layout_section(
    }
    paragraph.cursor_x_px += leading_space * pixels_per_point;

-    let section_text = &job.text[byte_range.clone()];
+    let section_text = &job.text[byte_range.as_usize()];
    let mut ctx = ShapingContext {
        pixels_per_point,
        font_size,
@@ -1574,7 +1574,7 @@ mod tests {
                    pixels_per_point,
                    Arc::new(LayoutJob::single_section(
                        iter::chain(
-                            (0..elided_galley.rows[0].char_count_excluding_newline()).map(|_| ch),
+                            (0..elided_galley.rows[0].char_count_excluding_newline().0).map(|_| ch),
                            iter::once('…'),
                        )
                        .collect::<String>(),
@@ -1866,7 +1866,7 @@ mod tests {

            // Verify cursor round-trip: end cursor index == char count.
            assert_eq!(
-                galley.end().index,
+                galley.end().index.0,
                expected_chars,
                "Galley::end().index mismatch for {text:?}",
            );
@@ -1892,9 +1892,9 @@ mod tests {
        let galley = layout(&mut fonts, pixels_per_point, job.into());

        // Walking through every cursor index should produce valid positions.
-        for i in 0..=galley.end().index {
+        for i in 0..=galley.end().index.0 {
            let cursor = CCursor {
-                index: i,
+                index: CharIndex(i),
                prefer_next_row: false,
            };
            let rect = galley.pos_from_cursor(cursor);
--- a/crates/epaint/src/text/text_layout_types.rs
+++ b/crates/epaint/src/text/text_layout_types.rs
@@ -4,6 +4,7 @@ use std::{ops::Range, str::FromStr as _};
 use super::{
    cursor::{CCursor, LayoutCursor},
    font::UvRect,
+    index::{ByteIndex, ByteRange, ByteRangeExt as _, CharIndex},
 };
 use crate::{Color32, FontId, Mesh, Stroke, text::FontsView};
 use emath::{Align, GuiRounding as _, NumExt as _, OrderedFloat, Pos2, Rect, Vec2, pos2, vec2};
@@ -119,7 +120,7 @@ impl LayoutJob {
        Self {
            sections: vec![LayoutSection {
                leading_space: 0.0,
-                byte_range: 0..text.len(),
+                byte_range: ByteRange::full(&text),
                format: TextFormat::simple(font_id, color),
            }],
            text,
@@ -138,7 +139,7 @@ impl LayoutJob {
        Self {
            sections: vec![LayoutSection {
                leading_space: 0.0,
-                byte_range: 0..text.len(),
+                byte_range: ByteRange::full(&text),
                format,
            }],
            text,
@@ -153,7 +154,7 @@ impl LayoutJob {
        Self {
            sections: vec![LayoutSection {
                leading_space: 0.0,
-                byte_range: 0..text.len(),
+                byte_range: ByteRange::full(&text),
                format: TextFormat::simple(font_id, color),
            }],
            text,
@@ -168,7 +169,7 @@ impl LayoutJob {
        Self {
            sections: vec![LayoutSection {
                leading_space: 0.0,
-                byte_range: 0..text.len(),
+                byte_range: ByteRange::full(&text),
                format,
            }],
            text,
@@ -192,7 +193,7 @@ impl LayoutJob {
    pub fn append(&mut self, text: &str, leading_space: f32, format: TextFormat) {
        let start = self.text.len();
        self.text += text;
-        let byte_range = start..self.text.len();
+        let byte_range = ByteIndex(start)..ByteIndex(self.text.len());

        // Optimization: merge into the previous section if it has the same format
        // and this one adds no leading space.
@@ -217,7 +218,7 @@ impl LayoutJob {
    ///
    /// Panics if the job has no sections.
    /// Assumes [`LayoutJob::sections`] are ordered by increasing `byte_range` (as produced by [`Self::append`]).
-    pub fn format_at_byte(&self, byte_idx: usize) -> &TextFormat {
+    pub fn format_at_byte(&self, byte_idx: ByteIndex) -> &TextFormat {
        self.debug_sanity_check();
        let last = self.sections.last().expect("LayoutJob has no sections");
        let idx = self
@@ -250,12 +251,12 @@ impl LayoutJob {
                    .expect("checked above")
                    .byte_range
                    .start,
-                0,
+                ByteIndex::ZERO,
                "First LayoutSection must start at byte 0"
            );
            assert_eq!(
                self.sections.last().expect("checked above").byte_range.end,
-                self.text.len(),
+                ByteIndex(self.text.len()),
                "Last LayoutSection must end at the end of the text"
            );

@@ -341,7 +342,7 @@ pub struct LayoutSection {
    pub leading_space: f32,

    /// Range into [`LayoutJob::text`].
-    pub byte_range: Range<usize>,
+    pub byte_range: Range<ByteIndex>,

    /// How to format the text in this section (font, color, etc).
    pub format: TextFormat,
@@ -946,23 +947,23 @@ impl Row {

    /// Excludes the implicit `\n` after the [`Row`], if any.
    #[inline]
-    pub fn char_count_excluding_newline(&self) -> usize {
-        self.glyphs.len()
+    pub fn char_count_excluding_newline(&self) -> CharIndex {
+        CharIndex(self.glyphs.len())
    }

    /// Closest char at the desired x coordinate in row-relative coordinates.
    /// Returns something in the range `[0, char_count_excluding_newline()]`.
-    pub fn char_at(&self, desired_x: f32) -> usize {
+    pub fn char_at(&self, desired_x: f32) -> CharIndex {
        for (i, glyph) in self.glyphs.iter().enumerate() {
            if desired_x < glyph.logical_rect().center().x {
-                return i;
+                return CharIndex(i);
            }
        }
        self.char_count_excluding_newline()
    }

-    pub fn x_offset(&self, column: usize) -> f32 {
-        if let Some(glyph) = self.glyphs.get(column) {
+    pub fn x_offset(&self, column: CharIndex) -> f32 {
+        if let Some(glyph) = self.glyphs.get(column.0) {
            glyph.pos.x
        } else {
            self.size.x
@@ -988,8 +989,8 @@ impl PlacedRow {

    /// Includes the implicit `\n` after the [`PlacedRow`], if any.
    #[inline]
-    pub fn char_count_including_newline(&self) -> usize {
-        self.row.glyphs.len() + (self.ends_with_newline as usize)
+    pub fn char_count_including_newline(&self) -> CharIndex {
+        CharIndex(self.row.glyphs.len() + (self.ends_with_newline as usize))
    }
 }

@@ -1188,7 +1189,7 @@ impl Galley {
        let mut best_y_dist = f32::INFINITY;
        let mut cursor = CCursor::default();

-        let mut ccursor_index = 0;
+        let mut ccursor_index = CharIndex::ZERO;

        for row in &self.rows {
            let min_y = row.min_y();
@@ -1234,7 +1235,7 @@ impl Galley {
            return Default::default();
        }
        let mut ccursor = CCursor {
-            index: 0,
+            index: CharIndex::ZERO,
            prefer_next_row: true,
        };
        for row in &self.rows {
@@ -1251,7 +1252,7 @@ impl Galley {
    pub fn layout_from_cursor(&self, cursor: CCursor) -> LayoutCursor {
        let prefer_next_row = cursor.prefer_next_row;
        let mut ccursor_it = CCursor {
-            index: 0,
+            index: CharIndex::ZERO,
            prefer_next_row,
        };

@@ -1294,15 +1295,13 @@ impl Galley {
        let prefer_next_row =
            layout_cursor.column < self.rows[layout_cursor.row].char_count_excluding_newline();
        let mut cursor_it = CCursor {
-            index: 0,
+            index: CharIndex::ZERO,
            prefer_next_row,
        };

        for (row_nr, row) in self.rows.iter().enumerate() {
            if row_nr == layout_cursor.row {
-                cursor_it.index += layout_cursor
-                    .column
-                    .at_most(row.char_count_excluding_newline());
+                cursor_it.index += layout_cursor.column.min(row.char_count_excluding_newline());

                return cursor_it;
            }
@@ -1316,7 +1315,7 @@ impl Galley {
 impl Galley {
    #[expect(clippy::unused_self)]
    pub fn cursor_left_one_character(&self, cursor: &CCursor) -> CCursor {
-        if cursor.index == 0 {
+        if cursor.index == CharIndex::ZERO {
            Default::default()
        } else {
            CCursor {
@@ -1392,7 +1391,7 @@ impl Galley {
        let layout_cursor = self.layout_from_cursor(*cursor);
        self.cursor_from_layout(LayoutCursor {
            row: layout_cursor.row,
-            column: 0,
+            column: CharIndex::ZERO,
        })
    }

@@ -1406,7 +1405,7 @@ impl Galley {

    pub fn cursor_begin_of_paragraph(&self, cursor: &CCursor) -> CCursor {
        let mut layout_cursor = self.layout_from_cursor(*cursor);
-        layout_cursor.column = 0;
+        layout_cursor.column = CharIndex::ZERO;

        loop {
            let prev_row = layout_cursor