1
0
mirror of https://github.com/emilk/egui.git synced 2026-06-26 14:49:06 -04:00

Allow downscaling image in GetScreenshot inspection request (#8248)

When an agent screenshots the app using the mcp and then interacts with
the app by clicking at coords, they see the coords in the native image
coords. Since the mcp does everything else in logical coordinates, it
helps if the image they see is also in logical resoltution, so we always
downscale it to 1.0.

I've added this here to avioid having to decode and re-encode the image
in the mcp.
Unfortunately it only does downscaling for now, since adding some way to
upscale the image just for the screenshot would add a lot of complexity,
and might be invasive from a plugin.

I've also changed the submit call to take a closure, to make it easier
to use other transport channel (makes the implementation for reruns mcp
nicer).

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Lucas Meurer <lucas@rerun.io>
This commit is contained in:
Lucas Meurer
2026-06-23 10:51:42 +02:00
committed by GitHub
parent 7be2e9a2ae
commit 5ca09dc0b5
6 changed files with 158 additions and 58 deletions

View File

@@ -4859,9 +4859,9 @@ dependencies = [
[[package]]
name = "tracing"
version = "0.1.40"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
"pin-project-lite",
"tracing-attributes",
@@ -4870,9 +4870,9 @@ dependencies = [
[[package]]
name = "tracing-attributes"
version = "0.1.27"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
dependencies = [
"proc-macro2",
"quote",
@@ -4881,9 +4881,9 @@ dependencies = [
[[package]]
name = "tracing-core"
version = "0.1.32"
version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
dependencies = [
"once_cell",
]

View File

@@ -21,7 +21,8 @@ rustdoc-args = ["--generate-link-to-definition"]
default = []
## Screenshot PNG encoding — the `EncodedPng::from_color_image` / `from_rgba` constructors.
png = ["dep:image", "image/png"]
## `egui/bytemuck` is required for `ColorImage::as_raw` in `from_color_image`.
png = ["dep:image", "image/png", "egui/bytemuck"]
## `InspectionPlugin` — an `egui::Plugin` that serves the request/response inspection
## protocol over TCP. Apps usually enable inspection by setting the `EGUI_INSPECTION` env var

View File

@@ -15,7 +15,7 @@ serves it. An external inspector — such as the
- resize the window (`Resize`).
The protocol is strictly request → response, which maps cleanly onto both a TCP socket and a
unary RPC (so the same machinery can be tunnelled over e.g. gRPC).
unary RPC (so the same machinery can be tunnelled over another transport).
> **Screenshots need a visible window.** Reading the tree and injecting input work even while
> the app is in the background, but capturing a screenshot requires a rendered frame — which
@@ -26,8 +26,8 @@ unary RPC (so the same machinery can be tunnelled over e.g. gRPC).
## What it's for
`egui_inspection` is the shared foundation for tools that observe or drive an egui app from
the outside. Anything that speaks the protocol (over TCP, or another transport like gRPC)
can be a consumer:
the outside. Anything that speaks the protocol (over TCP, or another transport) can be a
consumer:
- **[`egui_mcp`](https://crates.io/crates/egui_mcp)** — an MCP server that exposes the app to
AI agents and other tooling: query the widget tree, click / type / scroll, take screenshots.

View File

@@ -6,11 +6,12 @@
//!
//! The plugin owns a list of in-flight requests. A connection thread (or a host with its own
//! transport) submits a [`Request`] through egui's own plugin
//! handle — `ctx.with_plugin::<InspectionPlugin, _>(|p| p.submit(req))` — which appends it and
//! returns a channel to await the single [`Response`] on, then calls `ctx.request_repaint()`
//! handle — `ctx.with_plugin::<InspectionPlugin, _>(|p| p.submit(req, on_reply))` — passing a
//! closure that is called once with the single [`Response`], then calls `ctx.request_repaint()`
//! so an idle app wakes up to service it. The reply is produced on the UI thread inside the
//! plugin's hooks, which receive the [`egui::Context`] to issue repaints and viewport
//! commands — so the plugin never has to store a `Context` itself.
//! plugin's hooks (so `on_reply` runs there too — keep it cheap, e.g. forward onto a channel),
//! which receive the [`egui::Context`] to issue repaints and viewport commands — so the plugin
//! never has to store a `Context` itself.
//!
//! [`serve`] binds a TCP listener; each accepted connection gets a thread that first writes
//! the protocol handshake, then loops reading framed [`Request`]s, submitting them, and
@@ -59,7 +60,10 @@ enum Phase {
struct InFlight {
req: Request,
reply: mpsc::Sender<Response>,
/// Called once, on the UI thread, with this request's reply. `Option` so it can be moved out
/// during `retain_mut` (which only hands out `&mut`) when the request completes.
reply: Option<Box<dyn FnOnce(Response) + Send + Sync>>,
phase: Phase,
}
@@ -80,7 +84,7 @@ pub struct InspectionPlugin {
impl InspectionPlugin {
/// Create the plugin and register it with [`Context::add_plugin`], then call [`serve`] to
/// listen on TCP (or feed it directly via `ctx.with_plugin(|p| p.submit(req))`).
/// listen on TCP (or feed it directly via `ctx.with_plugin(|p| p.submit(req, on_reply))`).
pub fn new(label: Option<String>) -> Self {
Self {
in_flight: Vec::new(),
@@ -90,17 +94,23 @@ impl InspectionPlugin {
}
}
/// Submit a request; returns a channel that receives its single reply once the UI thread
/// services it. Call this through [`Context::with_plugin`] so it runs under egui's plugin
/// lock, then `request_repaint` and await the receiver *after* the lock is released.
pub fn submit(&mut self, req: Request) -> mpsc::Receiver<Response> {
let (tx, rx) = mpsc::channel();
/// Submit an inspection [`Request`].
///
/// The closure will be called later once the result comes in (for screenshot that could mean
/// a couple frames delay).
///
/// You usually call this via [`Context::with_plugin`]. You should [`Context::request_repaint`]
/// after calling this.
pub fn submit(
&mut self,
req: Request,
on_reply: impl FnOnce(Response) + Send + Sync + 'static,
) {
self.in_flight.push(InFlight {
req,
reply: tx,
reply: Some(Box::new(on_reply)),
phase: Phase::New,
});
rx
}
/// While requests are still in flight, keep the UI loop spinning — reactive apps would
@@ -130,6 +140,7 @@ impl egui::Plugin for InspectionPlugin {
// Match screenshot replies to the requests that asked for them, by `user_data` id. We
// observe (don't consume) the event so the host app still receives it.
let pixels_per_point = ctx.pixels_per_point();
for ev in &input.events {
let egui::Event::Screenshot {
user_data, image, ..
@@ -145,21 +156,32 @@ impl egui::Plugin for InspectionPlugin {
else {
continue; // not one of ours
};
let png = match EncodedPng::from_color_image(image.as_ref()) {
Ok(png) => png,
Err(err) => {
// Shouldn't happen for a valid framebuffer; surface it loudly.
log::error!("egui_inspection: PNG encode failed: {err}");
continue;
}
};
self.in_flight.retain_mut(|item| {
if item.phase == (Phase::AwaitScreenshot { id }) {
let _ = item.reply.send(Response::Screenshot(png.clone()));
false
} else {
true
if item.phase != (Phase::AwaitScreenshot { id }) {
return true;
}
// Downscale to the request's requested pixels-per-point (px per logical point);
// the framebuffer is at the app's `pixels_per_point` px per point, so the scale
// factor is their ratio. `None` means native resolution (scale 1.0).
let scale = match item.req {
Request::GetScreenshot {
pixels_per_point: Some(requested_ppp),
} => requested_ppp / pixels_per_point,
_ => 1.0,
};
let png = match EncodedPng::from_color_image_scaled(image.as_ref(), scale) {
Ok(png) => png,
Err(err) => {
// Shouldn't happen for a valid framebuffer; surface it loudly and drop
// the request rather than hang on it.
log::error!("egui_inspection: PNG encode failed: {err}");
return false;
}
};
if let Some(reply) = item.reply.take() {
reply(Response::Screenshot(png));
}
false
});
}
@@ -174,10 +196,12 @@ impl egui::Plugin for InspectionPlugin {
}
match &item.req {
Request::GetInfo => {
let _ = item.reply.send(Response::Info {
label: label.clone(),
egui_version: env!("CARGO_PKG_VERSION").to_owned(),
});
if let Some(reply) = item.reply.take() {
reply(Response::Info {
label: label.clone(),
egui_version: env!("CARGO_PKG_VERSION").to_owned(),
});
}
false
}
Request::GetTree => {
@@ -200,7 +224,7 @@ impl egui::Plugin for InspectionPlugin {
item.phase = Phase::AwaitOutput;
true
}
Request::GetScreenshot => {
Request::GetScreenshot { .. } => {
// Dispatch now so the command lands in this frame's output and the capture
// is one frame sooner; the pixels arrive in a later `input_hook`. The id
// ties that `Event::Screenshot` back to this request.
@@ -229,15 +253,19 @@ impl egui::Plugin for InspectionPlugin {
self.in_flight
.retain_mut(|item| match (&item.phase, &item.req) {
(Phase::AwaitOutput, Request::GetTree) => {
let _ = item.reply.send(Response::Tree {
step,
pixels_per_point: output.pixels_per_point,
accesskit: output.platform_output.accesskit_update.clone(),
});
if let Some(reply) = item.reply.take() {
reply(Response::Tree {
step,
pixels_per_point: output.pixels_per_point,
accesskit: output.platform_output.accesskit_update.clone(),
});
}
false
}
(Phase::AwaitOutput, Request::ApplyEvents { .. } | Request::Resize { .. }) => {
let _ = item.reply.send(Response::Done);
if let Some(reply) = item.reply.take() {
reply(Response::Done);
}
false
}
_ => true,
@@ -336,14 +364,23 @@ fn serve_connection(stream: std::net::TcpStream, ctx: &Context) -> std::io::Resu
Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(()), // client gone
Err(err) => return Err(err),
};
let Some(rx) = ctx.with_plugin::<InspectionPlugin, _>(|p| p.submit(req)) else {
let (tx, rx) = mpsc::channel();
let registered = ctx
.with_plugin::<InspectionPlugin, _>(|p| {
p.submit(req, move |resp| {
let _ = tx.send(resp);
});
})
.is_some();
if !registered {
return write_message(
&mut writer,
&Response::Error {
message: "egui_inspection plugin not registered".to_owned(),
},
);
};
}
// Wake the (possibly idle) UI loop so it services the request.
ctx.request_repaint();
let resp = rx.recv_timeout(REQUEST_TIMEOUT).unwrap_or_else(|_| {

View File

@@ -12,6 +12,28 @@ impl EncodedPng {
Self::from_rgba(size, image.as_raw())
}
/// Encode an [`egui::ColorImage`] downscaled by `scale` — a factor in `(0.0, 1.0]` of the
/// captured pixel dimensions. `scale >= 1.0` encodes at native resolution unchanged: the
/// framebuffer is the most detail available, so we never upscale.
///
/// # Errors
/// When the encoder fails.
pub fn from_color_image_scaled(
image: &egui::ColorImage,
scale: f32,
) -> Result<Self, image::ImageError> {
let [w, h] = [image.size[0] as u32, image.size[1] as u32];
if scale >= 1.0 || w == 0 || h == 0 {
return Self::from_rgba([w, h], image.as_raw());
}
let tw = ((w as f32 * scale).round() as u32).max(1);
let th = ((h as f32 * scale).round() as u32).max(1);
let src = image::RgbaImage::from_raw(w, h, image.as_raw().to_vec())
.expect("ColorImage backing buffer is always width * height * 4 bytes");
let resized = image::imageops::resize(&src, tw, th, image::imageops::FilterType::Triangle);
Self::from_rgba([tw, th], resized.as_raw())
}
/// Encode tightly-packed RGBA8 pixels (`width * height * 4` bytes) as PNG.
///
/// PNG keeps high-resolution captures off the hot path of socket throughput — a 1550×2114

View File

@@ -43,7 +43,14 @@ pub enum Request {
///
/// The peer issues an [`egui::ViewportCommand::Screenshot`] and replies once the
/// resulting [`egui::Event::Screenshot`] arrives (one extra frame).
GetScreenshot,
///
/// `pixels_per_point` is the requested output resolution in pixels per logical point: the
/// captured framebuffer (native resolution = the app's `pixels_per_point` px per point) is
/// downscaled to this many px per point before encoding. `1.0` yields a logical-point-sized
/// image so screenshot pixels align with the logical coordinates used everywhere else. Never
/// upscaled beyond native, so values above the app's `pixels_per_point` have no effect.
/// `None` captures at the framebuffer's native resolution, with no downscaling.
GetScreenshot { pixels_per_point: Option<f32> },
/// Inject raw egui input events and run a frame. Reply: [`Response::Done`], returned only
/// *after* the events have been applied by a frame — so a subsequent [`Self::GetTree`]
@@ -130,21 +137,34 @@ pub fn write_handshake<W: Write>(mut writer: W) -> io::Result<()> {
writer.flush()
}
/// Read and validate the connection handshake, returning the peer's protocol version.
/// Validate the 8 handshake bytes and return the peer's protocol version.
///
/// The bytes are [`PROTOCOL_MAGIC`] (4) followed by a big-endian version (4). Pure (no I/O) so
/// sync ([`read_handshake`]) and async readers share the validation, mirroring
/// [`decode_frame_len`].
///
/// # Errors
/// If the magic bytes don't match (not an egui inspection peer), or on I/O failure.
pub fn read_handshake<R: Read>(mut reader: R) -> io::Result<u32> {
let mut magic = [0u8; 4];
reader.read_exact(&mut magic)?;
/// If the magic bytes don't match (not an egui inspection peer).
pub fn decode_handshake(bytes: [u8; 8]) -> io::Result<u32> {
let (magic, version) = bytes.split_at(4);
if magic != PROTOCOL_MAGIC {
return Err(invalid_data(
"not an egui_inspection peer (bad handshake magic)",
));
}
let mut version = [0u8; 4];
reader.read_exact(&mut version)?;
Ok(u32::from_be_bytes(version))
Ok(u32::from_be_bytes(
version.try_into().expect("split_at(4) leaves 4 bytes"),
))
}
/// Read and validate the connection handshake, returning the peer's protocol version.
///
/// # Errors
/// If the magic bytes don't match (not an egui inspection peer), or on I/O failure.
pub fn read_handshake<R: Read>(mut reader: R) -> io::Result<u32> {
let mut bytes = [0u8; 8];
reader.read_exact(&mut bytes)?;
decode_handshake(bytes)
}
/// Encode a value into a length-prefixed `MessagePack` frame (4-byte big-endian length + body).
@@ -183,6 +203,26 @@ pub fn decode_frame_body<T: for<'de> serde::Deserialize<'de>>(body: &[u8]) -> io
rmp_serde::from_slice(body).map_err(invalid_data)
}
/// Encode a value as a bare `MessagePack` body, *without* the 4-byte length prefix of
/// [`encode_frame`].
///
/// For transports that delimit messages themselves — e.g. a gRPC unary call carrying the
/// bytes in a `bytes` field — the length prefix is redundant. Pair with [`decode_body`].
///
/// # Errors
/// On encode failure.
pub fn encode_body<T: serde::Serialize>(value: &T) -> io::Result<Vec<u8>> {
rmp_serde::to_vec(value).map_err(invalid_data)
}
/// Decode a bare `MessagePack` body produced by [`encode_body`] into a value.
///
/// # Errors
/// On decode failure.
pub fn decode_body<T: for<'de> serde::Deserialize<'de>>(body: &[u8]) -> io::Result<T> {
rmp_serde::from_slice(body).map_err(invalid_data)
}
/// Read one length-prefixed `MessagePack` message.
///
/// # Errors