From fef269277b1e3bbcd8855fb8552bed7d8cfdbf1d Mon Sep 17 00:00:00 2001 From: Gautier Cailly <109429289+gcailly@users.noreply.github.com> Date: Wed, 15 Apr 2026 11:27:48 +0200 Subject: [PATCH] Fix grapheme cluster glyph count to restore cursor/selection invariant (#8088) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit May close #8087, but cannot test macOS builtin Japanese IME. ## Summary PR #8031 (harfrust text shaping) introduced a regression: when harfrust shapes multi-codepoint clusters (flag emojis, ligatures, combining marks) into fewer glyphs than input characters, the invariant `glyphs.len() == char_count` breaks. This causes IME composition to duplicate characters and text selection to behave incorrectly. ## Fix In `layout_shaped_run()`, after emitting shaped glyphs for a cluster, we now check if the cluster had more characters than glyphs. If so, zero-width "continuation" glyphs are emitted for the extra characters, restoring the 1:1 glyph-to-character mapping. Continuation glyphs have `UvRect::default()` (`is_nothing() == true`), so `tessellate_glyphs` skips them entirely. Background, underline, and strikethrough rendering handle zero-width glyphs naturally. Only `crates/epaint/src/text/text_layout.rs` is modified. No changes to cursor logic, selection code, or public API. ## Test plan - [x] `cargo fmt --all -- --check` - [x] `cargo clippy -p epaint --tests` - [x] `cargo test -p epaint -p egui` (all pass) - [x] New test `test_grapheme_cluster_glyph_count`: verifies glyph count == char count for flag emojis, combining marks, and plain ASCII - [x] New test `test_grapheme_cluster_cursor_roundtrip`: verifies cursor position stability through `pos_from_cursor` -> `cursor_from_pos` round-trips on text containing flag emojis - [x] Manual testing with demo app: selection and cursor navigation work correctly on `A๐Ÿ‡ฏ๐Ÿ‡ตB` - [ ] IME testing (macOS Japanese IME) needs to be validated by someone on macOS --- **This PR was developed with the assistance of Claude Code.** --------- Co-authored-by: Emil Ernerfeldt --- crates/epaint/src/text/text_layout.rs | 181 +++++++++++++++++++++++++- 1 file changed, 178 insertions(+), 3 deletions(-) diff --git a/crates/epaint/src/text/text_layout.rs b/crates/epaint/src/text/text_layout.rs index 75cbb5be0..21dd7996c 100644 --- a/crates/epaint/src/text/text_layout.rs +++ b/crates/epaint/src/text/text_layout.rs @@ -1,5 +1,6 @@ #![expect(clippy::unwrap_used)] // TODO(emilk): remove unwraps +use std::ops::Range; use std::sync::Arc; use emath::{Align, GuiRounding as _, NumExt as _, Pos2, Rect, Vec2, pos2, vec2}; @@ -217,6 +218,11 @@ struct TextRun { } /// Emit shaped glyphs from a [`harfrust::GlyphBuffer`] into a [`Paragraph`]. +/// +/// When a cluster maps multiple characters to fewer glyphs (e.g. flag emojis, +/// ligatures), zero-width "continuation" glyphs are emitted for the extra +/// characters so that `glyphs.len() == char_count` โ€” an invariant that all +/// cursor and selection code relies on. fn layout_shaped_run( font: &mut Font<'_>, run: &TextRun, @@ -232,6 +238,11 @@ fn layout_shaped_run( // so they are not comparable across runs. ctx.prev_cluster = None; + // Track how many glyphs we emit per cluster so we can add zero-width + // continuation glyphs when a cluster has more chars than glyphs. + let mut cluster_start_byte: usize = 0; + let mut cluster_glyph_count: usize = 0; + for (info, pos) in glyph_buffer .glyph_infos() .iter() @@ -271,10 +282,22 @@ fn layout_shaped_run( // Apply extra_letter_spacing only at cluster boundaries, // never between glyphs within the same cluster (e.g. base + mark). let is_new_cluster = ctx.prev_cluster.is_none_or(|pc| pc != cluster); - if !ctx.is_first_glyph_in_section && is_new_cluster { - paragraph.cursor_x_px += ctx.extra_letter_spacing * ctx.pixels_per_point; - } if is_new_cluster { + if ctx.prev_cluster.is_some() { + emit_continuation_glyphs( + ctx, + paragraph, + run_text, + cluster_start_byte..cluster as usize, + cluster_glyph_count, + face_metrics, + ); + } + if !ctx.is_first_glyph_in_section { + paragraph.cursor_x_px += ctx.extra_letter_spacing * ctx.pixels_per_point; + } + cluster_start_byte = cluster as usize; + cluster_glyph_count = 0; ctx.is_first_glyph_in_section = false; } ctx.prev_cluster = Some(cluster); @@ -353,6 +376,50 @@ fn layout_shaped_run( ) }; paragraph.glyphs.push(glyph); + cluster_glyph_count += 1; + } + + // Emit continuation glyphs for the last cluster in the run. + if ctx.prev_cluster.is_some() { + emit_continuation_glyphs( + ctx, + paragraph, + run_text, + cluster_start_byte..run_text.len(), + cluster_glyph_count, + face_metrics, + ); + } +} + +/// Emit zero-width continuation glyphs when a cluster has more characters than +/// shaped glyphs. +/// +/// This preserves the invariant `glyphs.len() == char_count` that all cursor +/// and text-selection code depends on. Continuation glyphs have +/// [`UvRect::default()`] so [`tessellate_glyphs`] skips them entirely. +fn emit_continuation_glyphs( + ctx: &ShapingContext, + paragraph: &mut Paragraph, + run_text: &str, + cluster_bytes: Range, + cluster_glyph_count: usize, + face_metrics: &StyledMetrics, +) { + let Some(cluster_text) = run_text.get(cluster_bytes) else { + return; + }; + let char_count = cluster_text.chars().count(); + if char_count <= cluster_glyph_count { + return; + } + + let physical_x = paragraph.cursor_x_px.round() as i32; + + for chr in cluster_text.chars().skip(cluster_glyph_count) { + paragraph + .glyphs + .push(ctx.glyph(chr, physical_x, 0.0, face_metrics, UvRect::default())); } } @@ -1369,6 +1436,7 @@ fn shape_text( mod tests { use super::{super::*, *}; + use crate::text::cursor::CCursor; #[test] fn test_zero_max_width() { @@ -1738,6 +1806,113 @@ mod tests { } } + /// Regression test for . + /// + /// Multi-codepoint grapheme clusters (flag emojis, combining marks) must + /// produce exactly as many glyphs as characters so that cursor positioning + /// and text selection remain correct. + #[test] + fn test_grapheme_cluster_glyph_count() { + let pixels_per_point = 1.0; + let mut fonts = FontsImpl::new(TextOptions::default(), FontDefinitions::default()); + let font_id = FontId::default(); + + // Each test case: (input text, expected char count) + let cases: &[(&str, usize)] = &[ + // Flag emoji: two Regional Indicator codepoints โ†’ one visual glyph + ("\u{1F1EF}\u{1F1F5}", 2), // ๐Ÿ‡ฏ๐Ÿ‡ต + // Flag surrounded by ASCII + ("A\u{1F1EB}\u{1F1F7}B", 4), // A๐Ÿ‡ซ๐Ÿ‡ทB + // Base char + combining acute accent + ("e\u{0301}", 2), // รฉ as decomposed + // Multiple combining marks + ("o\u{0302}\u{0323}", 3), // แป™ + // Plain ASCII (sanity check) + ("Hello", 5), + ]; + + for &(text, expected_chars) in cases { + let job = LayoutJob::simple( + text.to_owned(), + font_id.clone(), + Color32::WHITE, + f32::INFINITY, + ); + let galley = layout(&mut fonts, pixels_per_point, job.into()); + + let total_glyphs: usize = galley.rows.iter().map(|r| r.row.glyphs.len()).sum(); + + assert_eq!( + total_glyphs, + expected_chars, + "Glyph count mismatch for {text:?}: \ + expected {expected_chars} glyphs (one per char), got {total_glyphs}. \ + Glyphs: {:?}", + galley.rows[0] + .row + .glyphs + .iter() + .map(|g| (g.chr, g.advance_width)) + .collect::>(), + ); + + // Verify that Row::text() reconstructs the input text. + let row_text: String = galley.rows.iter().map(|r| r.text()).collect(); + assert_eq!(row_text, text, "Row::text() mismatch for {text:?}",); + + // Verify cursor round-trip: end cursor index == char count. + assert_eq!( + galley.end().index, + expected_chars, + "Galley::end().index mismatch for {text:?}", + ); + } + } + + /// Verify that cursor positioning round-trips correctly for text + /// containing multi-codepoint grapheme clusters (regression test for #8087). + #[test] + fn test_grapheme_cluster_cursor_roundtrip() { + let pixels_per_point = 1.0; + let mut fonts = FontsImpl::new(TextOptions::default(), FontDefinitions::default()); + let font_id = FontId::default(); + + // "A" + flag emoji (2 codepoints) + "B" = 4 chars + let text = "A\u{1F1EF}\u{1F1F5}B"; + let job = LayoutJob::simple( + text.to_owned(), + font_id.clone(), + Color32::WHITE, + f32::INFINITY, + ); + let galley = layout(&mut fonts, pixels_per_point, job.into()); + + // Walking through every cursor index should produce valid positions. + for i in 0..=galley.end().index { + let cursor = CCursor { + index: i, + prefer_next_row: false, + }; + let rect = galley.pos_from_cursor(cursor); + assert!( + rect.is_finite(), + "pos_from_cursor returned non-finite rect for index {i}", + ); + + // Round-trip: position โ†’ cursor โ†’ position should be stable. + let cursor2 = galley.cursor_from_pos(Vec2::new(rect.center().x, rect.center().y)); + let rect2 = galley.pos_from_cursor(cursor2); + assert!( + (rect.min.x - rect2.min.x).abs() < 1.0, + "Cursor round-trip unstable at index {i}: \ + first={}, second={}, cursor2.index={}", + rect.min.x, + rect2.min.x, + cursor2.index, + ); + } + } + fn measure_text( fonts: &mut FontsImpl, text: &str,