1
0
mirror of https://github.com/emilk/egui.git synced 2026-06-26 22:53:14 -04:00

Fix grapheme cluster glyph count to restore cursor/selection invariant (#8088)

May close #8087, but cannot test macOS builtin Japanese IME.

## Summary

PR #8031 (harfrust text shaping) introduced a regression: when harfrust
shapes multi-codepoint clusters (flag emojis, ligatures, combining
marks) into fewer glyphs than input characters, the invariant
`glyphs.len() == char_count` breaks. This causes IME composition to
duplicate characters and text selection to behave incorrectly.

## Fix

In `layout_shaped_run()`, after emitting shaped glyphs for a cluster, we
now check if the cluster had more characters than glyphs. If so,
zero-width "continuation" glyphs are emitted for the extra characters,
restoring the 1:1 glyph-to-character mapping.

Continuation glyphs have `UvRect::default()` (`is_nothing() == true`),
so `tessellate_glyphs` skips them entirely. Background, underline, and
strikethrough rendering handle zero-width glyphs naturally.

Only `crates/epaint/src/text/text_layout.rs` is modified. No changes to
cursor logic, selection code, or public API.

## Test plan

- [x] `cargo fmt --all -- --check`
- [x] `cargo clippy -p epaint --tests`
- [x] `cargo test -p epaint -p egui` (all pass)
- [x] New test `test_grapheme_cluster_glyph_count`: verifies glyph count
== char count for flag emojis, combining marks, and plain ASCII
- [x] New test `test_grapheme_cluster_cursor_roundtrip`: verifies cursor
position stability through `pos_from_cursor` -> `cursor_from_pos`
round-trips on text containing flag emojis
- [x] Manual testing with demo app: selection and cursor navigation work
correctly on `A🇯🇵B`
- [ ] IME testing (macOS Japanese IME) needs to be validated by someone
on macOS

---

**This PR was developed with the assistance of Claude Code.**

---------

Co-authored-by: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
This commit is contained in:
Gautier Cailly
2026-04-15 11:27:48 +02:00
committed by GitHub
parent 4016dee2bd
commit fef269277b

View File

@@ -1,5 +1,6 @@
#![expect(clippy::unwrap_used)] // TODO(emilk): remove unwraps
use std::ops::Range;
use std::sync::Arc;
use emath::{Align, GuiRounding as _, NumExt as _, Pos2, Rect, Vec2, pos2, vec2};
@@ -217,6 +218,11 @@ struct TextRun {
}
/// Emit shaped glyphs from a [`harfrust::GlyphBuffer`] into a [`Paragraph`].
///
/// When a cluster maps multiple characters to fewer glyphs (e.g. flag emojis,
/// ligatures), zero-width "continuation" glyphs are emitted for the extra
/// characters so that `glyphs.len() == char_count` — an invariant that all
/// cursor and selection code relies on.
fn layout_shaped_run(
font: &mut Font<'_>,
run: &TextRun,
@@ -232,6 +238,11 @@ fn layout_shaped_run(
// so they are not comparable across runs.
ctx.prev_cluster = None;
// Track how many glyphs we emit per cluster so we can add zero-width
// continuation glyphs when a cluster has more chars than glyphs.
let mut cluster_start_byte: usize = 0;
let mut cluster_glyph_count: usize = 0;
for (info, pos) in glyph_buffer
.glyph_infos()
.iter()
@@ -271,10 +282,22 @@ fn layout_shaped_run(
// Apply extra_letter_spacing only at cluster boundaries,
// never between glyphs within the same cluster (e.g. base + mark).
let is_new_cluster = ctx.prev_cluster.is_none_or(|pc| pc != cluster);
if !ctx.is_first_glyph_in_section && is_new_cluster {
paragraph.cursor_x_px += ctx.extra_letter_spacing * ctx.pixels_per_point;
}
if is_new_cluster {
if ctx.prev_cluster.is_some() {
emit_continuation_glyphs(
ctx,
paragraph,
run_text,
cluster_start_byte..cluster as usize,
cluster_glyph_count,
face_metrics,
);
}
if !ctx.is_first_glyph_in_section {
paragraph.cursor_x_px += ctx.extra_letter_spacing * ctx.pixels_per_point;
}
cluster_start_byte = cluster as usize;
cluster_glyph_count = 0;
ctx.is_first_glyph_in_section = false;
}
ctx.prev_cluster = Some(cluster);
@@ -353,6 +376,50 @@ fn layout_shaped_run(
)
};
paragraph.glyphs.push(glyph);
cluster_glyph_count += 1;
}
// Emit continuation glyphs for the last cluster in the run.
if ctx.prev_cluster.is_some() {
emit_continuation_glyphs(
ctx,
paragraph,
run_text,
cluster_start_byte..run_text.len(),
cluster_glyph_count,
face_metrics,
);
}
}
/// Emit zero-width continuation glyphs when a cluster has more characters than
/// shaped glyphs.
///
/// This preserves the invariant `glyphs.len() == char_count` that all cursor
/// and text-selection code depends on. Continuation glyphs have
/// [`UvRect::default()`] so [`tessellate_glyphs`] skips them entirely.
fn emit_continuation_glyphs(
ctx: &ShapingContext,
paragraph: &mut Paragraph,
run_text: &str,
cluster_bytes: Range<usize>,
cluster_glyph_count: usize,
face_metrics: &StyledMetrics,
) {
let Some(cluster_text) = run_text.get(cluster_bytes) else {
return;
};
let char_count = cluster_text.chars().count();
if char_count <= cluster_glyph_count {
return;
}
let physical_x = paragraph.cursor_x_px.round() as i32;
for chr in cluster_text.chars().skip(cluster_glyph_count) {
paragraph
.glyphs
.push(ctx.glyph(chr, physical_x, 0.0, face_metrics, UvRect::default()));
}
}
@@ -1369,6 +1436,7 @@ fn shape_text(
mod tests {
use super::{super::*, *};
use crate::text::cursor::CCursor;
#[test]
fn test_zero_max_width() {
@@ -1738,6 +1806,113 @@ mod tests {
}
}
/// Regression test for <https://github.com/emilk/egui/issues/8087>.
///
/// Multi-codepoint grapheme clusters (flag emojis, combining marks) must
/// produce exactly as many glyphs as characters so that cursor positioning
/// and text selection remain correct.
#[test]
fn test_grapheme_cluster_glyph_count() {
let pixels_per_point = 1.0;
let mut fonts = FontsImpl::new(TextOptions::default(), FontDefinitions::default());
let font_id = FontId::default();
// Each test case: (input text, expected char count)
let cases: &[(&str, usize)] = &[
// Flag emoji: two Regional Indicator codepoints → one visual glyph
("\u{1F1EF}\u{1F1F5}", 2), // 🇯🇵
// Flag surrounded by ASCII
("A\u{1F1EB}\u{1F1F7}B", 4), // A🇫🇷B
// Base char + combining acute accent
("e\u{0301}", 2), // é as decomposed
// Multiple combining marks
("o\u{0302}\u{0323}", 3), // ộ
// Plain ASCII (sanity check)
("Hello", 5),
];
for &(text, expected_chars) in cases {
let job = LayoutJob::simple(
text.to_owned(),
font_id.clone(),
Color32::WHITE,
f32::INFINITY,
);
let galley = layout(&mut fonts, pixels_per_point, job.into());
let total_glyphs: usize = galley.rows.iter().map(|r| r.row.glyphs.len()).sum();
assert_eq!(
total_glyphs,
expected_chars,
"Glyph count mismatch for {text:?}: \
expected {expected_chars} glyphs (one per char), got {total_glyphs}. \
Glyphs: {:?}",
galley.rows[0]
.row
.glyphs
.iter()
.map(|g| (g.chr, g.advance_width))
.collect::<Vec<_>>(),
);
// Verify that Row::text() reconstructs the input text.
let row_text: String = galley.rows.iter().map(|r| r.text()).collect();
assert_eq!(row_text, text, "Row::text() mismatch for {text:?}",);
// Verify cursor round-trip: end cursor index == char count.
assert_eq!(
galley.end().index,
expected_chars,
"Galley::end().index mismatch for {text:?}",
);
}
}
/// Verify that cursor positioning round-trips correctly for text
/// containing multi-codepoint grapheme clusters (regression test for #8087).
#[test]
fn test_grapheme_cluster_cursor_roundtrip() {
let pixels_per_point = 1.0;
let mut fonts = FontsImpl::new(TextOptions::default(), FontDefinitions::default());
let font_id = FontId::default();
// "A" + flag emoji (2 codepoints) + "B" = 4 chars
let text = "A\u{1F1EF}\u{1F1F5}B";
let job = LayoutJob::simple(
text.to_owned(),
font_id.clone(),
Color32::WHITE,
f32::INFINITY,
);
let galley = layout(&mut fonts, pixels_per_point, job.into());
// Walking through every cursor index should produce valid positions.
for i in 0..=galley.end().index {
let cursor = CCursor {
index: i,
prefer_next_row: false,
};
let rect = galley.pos_from_cursor(cursor);
assert!(
rect.is_finite(),
"pos_from_cursor returned non-finite rect for index {i}",
);
// Round-trip: position → cursor → position should be stable.
let cursor2 = galley.cursor_from_pos(Vec2::new(rect.center().x, rect.center().y));
let rect2 = galley.pos_from_cursor(cursor2);
assert!(
(rect.min.x - rect2.min.x).abs() < 1.0,
"Cursor round-trip unstable at index {i}: \
first={}, second={}, cursor2.index={}",
rect.min.x,
rect2.min.x,
cursor2.index,
);
}
}
fn measure_text(
fonts: &mut FontsImpl,
text: &str,