From e15ba138d6fbf9585e5090aa643d138e4db8a0c0 Mon Sep 17 00:00:00 2001 From: lucasmerlin Date: Thu, 21 May 2026 12:11:00 +0200 Subject: [PATCH] Add egui_kittest_mcp server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New binary crate that exposes an MCP (Model Context Protocol) server backed by the `egui_inspection` protocol. The server bridges a running egui peer — a spawned `egui_kittest` harness child process or an attached live `eframe` app — to MCP tool handlers that drive it. Components: - `bridge.rs`: spawns / attaches a peer over a unix socket, runs reader+writer Tokio tasks that pump `HarnessMessage` ↔ `InspectorCommand` and track the peer's `Hello`, latest frame, accesskit tree, and blocked / finished state. - `tools.rs`: `rmcp`-derived tool router with commands for stepping, event injection (click / type / scroll / hover / drag / keys), resizing, screenshot capture, accesskit tree queries, and lifecycle (launch / attach / kill). - `tree.rs`: accesskit-tree projection helpers shared by the tools. - `shim.rs` / `main.rs`: shim role that lets the same binary act as the child inspector for kittest harnesses, relaying bytes between the harness stdio and the MCP server's unix socket. - `server.rs`: rmcp stdio entry point. Live-app example added at `examples/egui_mcp/`. --- Cargo.lock | 403 +++++++- Cargo.toml | 1 + crates/egui_kittest_mcp/Cargo.toml | 49 + crates/egui_kittest_mcp/src/bridge.rs | 435 ++++++++ crates/egui_kittest_mcp/src/main.rs | 45 + crates/egui_kittest_mcp/src/server.rs | 16 + crates/egui_kittest_mcp/src/shim.rs | 50 + crates/egui_kittest_mcp/src/tools.rs | 1313 +++++++++++++++++++++++++ crates/egui_kittest_mcp/src/tree.rs | 206 ++++ examples/egui_mcp/Cargo.toml | 17 + examples/egui_mcp/src/main.rs | 27 + 11 files changed, 2553 insertions(+), 9 deletions(-) create mode 100644 crates/egui_kittest_mcp/Cargo.toml create mode 100644 crates/egui_kittest_mcp/src/bridge.rs create mode 100644 crates/egui_kittest_mcp/src/main.rs create mode 100644 crates/egui_kittest_mcp/src/server.rs create mode 100644 crates/egui_kittest_mcp/src/shim.rs create mode 100644 crates/egui_kittest_mcp/src/tools.rs create mode 100644 crates/egui_kittest_mcp/src/tree.rs create mode 100644 examples/egui_mcp/Cargo.toml create mode 100644 examples/egui_mcp/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 1740978b4..1c7c26bd0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -450,9 +450,9 @@ checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.83" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", @@ -621,7 +621,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" dependencies = [ "memchr", - "regex-automata", + "regex-automata 0.4.8", "serde", ] @@ -734,6 +734,18 @@ dependencies = [ "libc", ] +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "num-traits", + "serde", + "windows-link", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -1087,6 +1099,40 @@ dependencies = [ "env_logger", ] +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "data-url" version = "0.3.1" @@ -1201,6 +1247,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f25c0e292a7ca6d6498557ff1df68f32c99850012b6ea401cf8daf771f22ff53" +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "ecolor" version = "0.34.2" @@ -1433,6 +1485,38 @@ dependencies = [ "wgpu", ] +[[package]] +name = "egui_kittest_mcp" +version = "0.34.2" +dependencies = [ + "accesskit", + "accesskit_consumer", + "anyhow", + "base64", + "egui", + "egui_inspection", + "egui_kittest", + "image", + "rmcp", + "rmp-serde", + "schemars", + "serde", + "serde_json", + "tempfile", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "egui_mcp" +version = "0.1.0" +dependencies = [ + "egui", + "egui_demo_lib", + "egui_kittest", +] + [[package]] name = "egui_tests" version = "0.34.2" @@ -1673,8 +1757,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f" dependencies = [ "bit-set 0.8.0", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", ] [[package]] @@ -1823,12 +1907,48 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + [[package]] name = "futures-core" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.32" @@ -1859,6 +1979,12 @@ dependencies = [ "syn", ] +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + [[package]] name = "futures-task" version = "0.3.32" @@ -1871,9 +1997,13 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ + "futures-channel", "futures-core", + "futures-io", "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "slab", ] @@ -2217,6 +2347,30 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.0.0" @@ -2303,6 +2457,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -2713,6 +2873,15 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef0d4ed8669f8f8826eb00dc878084aa8f253506c4fd5e8f58f5bce72ddb97e" +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "memchr" version = "2.7.4" @@ -2874,6 +3043,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-conv" version = "0.2.0" @@ -3293,6 +3472,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "owned_ttf_parser" version = "0.25.0" @@ -3341,6 +3526,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pastey" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee67f1008b1ba2321834326597b8e186293b049a023cdef258527550b9935b4" + [[package]] name = "pathdiff" version = "0.2.3" @@ -3889,6 +4080,26 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regex" version = "1.11.1" @@ -3897,8 +4108,17 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -3909,9 +4129,15 @@ checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.5", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -3988,6 +4214,41 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmcp" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0810a9f717d9828f475fe1f629f4c305c8464b7f496c3a854b58d29e65f4058e" +dependencies = [ + "async-trait", + "base64", + "chrono", + "futures", + "pastey", + "pin-project-lite", + "rmcp-macros", + "schemars", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "rmcp-macros" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aefac48c364756e97f04c0401ba3231e8607882c7c1d92da0437dc16307904d" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "serde_json", + "syn", +] + [[package]] name = "rmp" version = "0.8.15" @@ -4148,6 +4409,32 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "chrono", + "dyn-clone", + "ref-cast", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -4234,6 +4521,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -4276,6 +4574,15 @@ dependencies = [ "log", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -4467,6 +4774,12 @@ dependencies = [ "float-cmp", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "subtle" version = "2.6.1" @@ -4517,7 +4830,7 @@ dependencies = [ "fnv", "once_cell", "plist", - "regex-syntax", + "regex-syntax 0.8.5", "serde", "serde_derive", "serde_json", @@ -4637,6 +4950,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tiff" version = "0.9.1" @@ -4746,13 +5068,40 @@ version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ + "bytes", "libc", "mio", "pin-project-lite", + "signal-hook-registry", "socket2", + "tokio-macros", "windows-sys 0.61.2", ] +[[package]] +name = "tokio-macros" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml" version = "1.0.6+spec-1.1.0" @@ -4825,6 +5174,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -5058,6 +5437,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vello_common" version = "0.0.8" diff --git a/Cargo.toml b/Cargo.toml index e46fbb611..f338fd112 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "crates/egui_glow", "crates/egui_inspection", "crates/egui_kittest", + "crates/egui_kittest_mcp", "crates/egui-wgpu", "crates/egui-winit", "crates/egui", diff --git a/crates/egui_kittest_mcp/Cargo.toml b/crates/egui_kittest_mcp/Cargo.toml new file mode 100644 index 000000000..6435da073 --- /dev/null +++ b/crates/egui_kittest_mcp/Cargo.toml @@ -0,0 +1,49 @@ +[package] +name = "egui_kittest_mcp" +version.workspace = true +authors = ["Lucas Meurer "] +description = "MCP server that drives egui apps via the kittest inspector protocol" +edition.workspace = true +rust-version.workspace = true +homepage = "https://github.com/emilk/egui" +license.workspace = true +repository = "https://github.com/emilk/egui" +categories = ["gui", "development-tools::testing"] +keywords = ["egui", "kittest", "mcp", "testing", "accesskit"] +publish = false + +[[bin]] +name = "kittest-mcp" +path = "src/main.rs" + +[dependencies] +egui_kittest = { workspace = true, features = ["inspector_api", "wgpu", "snapshot"] } +egui_inspection = { workspace = true, features = ["protocol"] } +egui.workspace = true +accesskit.workspace = true +accesskit_consumer.workspace = true +image = { workspace = true, features = ["png"] } +rmp-serde.workspace = true +serde = { workspace = true, features = ["derive"] } +serde_json = "1.0" +schemars = "1.0" +rmcp = { version = "1.7", features = ["server", "macros", "transport-io", "schemars"] } +tempfile.workspace = true +tokio = { version = "1.49", features = [ + "rt-multi-thread", + "io-std", + "io-util", + "process", + "net", + "sync", + "macros", + "time", + "signal", +] } +base64 = "0.22" +anyhow = "1.0" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +[lints] +workspace = true diff --git a/crates/egui_kittest_mcp/src/bridge.rs b/crates/egui_kittest_mcp/src/bridge.rs new file mode 100644 index 000000000..8928c173d --- /dev/null +++ b/crates/egui_kittest_mcp/src/bridge.rs @@ -0,0 +1,435 @@ +//! Bridge between the MCP server and a running kittest harness child process. +//! +//! Lifecycle: +//! 1. [`Bridge::launch`] binds a unix domain socket, spawns the target binary with +//! [`crate::HANDSHAKE_ENV_VAR`] + `KITTEST_INSPECTOR=1` + +//! `KITTEST_INSPECTOR_PATH=`, and waits for the shim to connect. +//! 2. A reader task decodes [`HarnessMessage`]s from the socket and updates [`SharedState`]. +//! 3. A writer task drains [`InspectorCommand`]s queued by MCP tool handlers and writes +//! them to the socket. +//! 4. Tool handlers observe [`SharedState`] via [`Bridge::snapshot`] and wait for new +//! frames or `Finished` via [`Bridge::wait_for_frame_after`]. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use anyhow::{Context as _, anyhow, bail}; +use egui_inspection::protocol::{Frame, HarnessMessage, InspectorCommand, SourceView}; +use serde::Serialize; +use tokio::io::{AsyncReadExt as _, AsyncWriteExt as _}; +use tokio::net::UnixListener; +use tokio::process::{Child, Command}; +use tokio::sync::{Mutex, Notify, mpsc}; +use tokio::task::JoinHandle; +use tokio::time::timeout; + +/// Hard cap matching `inspector_api::MAX_MESSAGE_BYTES` so framing-level DoS is bounded. +const MAX_MESSAGE_BYTES: usize = 256 * 1024 * 1024; + +/// One in-flight peer (a spawned kittest harness or an attached live app) + the tasks +/// that talk to it. +pub struct Bridge { + pub state: Arc, + /// Outgoing command queue → writer task → socket. + cmd_tx: mpsc::UnboundedSender, + /// Tokio task handles. Aborted on `Drop`; the child is killed too. + _reader_task: JoinHandle<()>, + _writer_task: JoinHandle<()>, + /// `Child` wrapped in a `Mutex` so a `kill` tool can take it. `None` in attach mode — + /// we don't own the lifecycle of an externally-started app. + child: Arc>>, + /// Temp dir holding the unix socket — kept alive while the bridge is. + _socket_dir: tempfile::TempDir, + /// How this bridge was created (informational). + pub peer_info: PeerInfo, +} + +#[derive(Debug, Clone, Serialize)] +#[serde(tag = "mode", rename_all = "snake_case")] +pub enum PeerInfo { + /// Bridge spawned a child harness process. + Launched { + bin: PathBuf, + args: Vec, + pid: u32, + }, + /// Bridge bound a socket and accepted an incoming connection from a live app. + Attached { socket_path: PathBuf }, +} + +/// Mutable state observed by MCP tool handlers. +/// +/// Guarded by a `Mutex` (not `RwLock`) because writers and readers contend on the same +/// fields and acquire-cost is dominated by the rare `Frame` arrival, not lock contention. +pub struct SharedState { + inner: Mutex, + /// Notified whenever `inner` changes in a way a waiter might care about (new frame, + /// blocked transition, finished). Coarse-grained on purpose. + notify: Notify, +} + +#[derive(Default)] +struct Inner { + /// Set on receipt of [`HarnessMessage::Hello`]. `None` until the peer connects. + pub hello: Option, + pub latest_frame: Option>, + pub blocked: bool, + pub finished: Option, + /// Latest accesskit tree (re-built each time a `TreeUpdate` arrives). + pub accesskit_tree: Option, +} + +#[derive(Debug, Clone, Serialize)] +pub struct FinishedInfo { + pub ok: bool, + pub message: Option, + pub source: Option, +} + +/// Snapshot returned to tool handlers so they can drop the mutex before responding. +#[derive(Clone)] +pub struct StateSnapshot { + /// Peer identity + capabilities, captured at connect time. Used by tool handlers to + /// gate commands the peer doesn't honor (Step/Run/Pause against a live app, etc.). + #[expect(dead_code, reason = "consumed by upcoming capability-gating in tool handlers")] + pub hello: Option, + pub frame: Option>, + pub blocked: bool, + pub finished: Option, +} + +impl SharedState { + fn new() -> Arc { + Arc::new(Self { + inner: Mutex::new(Inner::default()), + notify: Notify::new(), + }) + } + + pub async fn snapshot(&self) -> StateSnapshot { + let g = self.inner.lock().await; + StateSnapshot { + hello: g.hello.clone(), + frame: g.latest_frame.clone(), + blocked: g.blocked, + finished: g.finished.clone(), + } + } + + /// Read-only access to the accesskit tree via a closure. The tree isn't `Clone`, so + /// callers project the data they need (node list, lookup by id) before returning. + pub async fn with_tree( + &self, + f: impl FnOnce(Option<&accesskit_consumer::Tree>) -> R, + ) -> R { + let g = self.inner.lock().await; + f(g.accesskit_tree.as_ref()) + } + + /// Await the next state-change notification. Used by tools that poll (e.g. `wait_for`) + /// to wake on a new frame / blocked transition without busy-looping. + pub async fn notified(&self) { + self.notify.notified().await; + } +} + +impl Bridge { + pub async fn launch( + bin: PathBuf, + args: Vec, + env: Vec<(String, String)>, + cwd: Option, + ) -> anyhow::Result { + let self_path = std::env::current_exe() + .context("get current_exe for KITTEST_INSPECTOR_PATH")?; + + let socket_dir = tempfile::Builder::new() + .prefix("kittest-mcp-") + .tempdir() + .context("create temp dir for handshake socket")?; + let socket_path = socket_dir.path().join("kittest.sock"); + + let listener = UnixListener::bind(&socket_path) + .with_context(|| format!("bind {}", socket_path.display()))?; + + let mut cmd = Command::new(&bin); + cmd.args(&args) + .env("KITTEST_INSPECTOR", "1") + .env("KITTEST_INSPECTOR_PATH", &self_path) + .env(crate::HANDSHAKE_ENV_VAR, &socket_path) + .stdin(std::process::Stdio::null()) + // Harness inspector path: the child's stdout/stderr aren't ours — they get + // captured by the shim. We don't need them in the MCP server. + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .kill_on_drop(true); + for (k, v) in &env { + cmd.env(k, v); + } + if let Some(d) = &cwd { + cmd.current_dir(d); + } + + let mut child = cmd + .spawn() + .with_context(|| format!("spawn {}", bin.display()))?; + let pid = child.id().unwrap_or(0); + + // Accept with a short timeout. If the binary fails to start, exits early, or + // doesn't have the inspector wired up, we surface that instead of hanging forever. + let (stream, _addr) = match timeout(Duration::from_secs(10), listener.accept()).await { + Ok(Ok(pair)) => pair, + Ok(Err(e)) => { + let _ = child.kill().await; + bail!("accept on handshake socket: {e}"); + } + Err(_) => { + let _ = child.kill().await; + // Try to report the child's exit status if it died early. + let status_hint = match child.try_wait() { + Ok(Some(s)) => format!(" (child exited {s})"), + _ => String::new(), + }; + bail!("timed out waiting for inspector handshake{status_hint}"); + } + }; + + let (reader, writer) = stream.into_split(); + let state = SharedState::new(); + let (cmd_tx, cmd_rx) = mpsc::unbounded_channel(); + let child_arc: Arc>> = Arc::new(Mutex::new(Some(child))); + + let reader_task = tokio::spawn(read_loop(reader, state.clone(), child_arc.clone())); + let writer_task = tokio::spawn(write_loop(writer, cmd_rx)); + + Ok(Self { + state, + cmd_tx, + _reader_task: reader_task, + _writer_task: writer_task, + child: child_arc, + _socket_dir: socket_dir, + peer_info: PeerInfo::Launched { bin, args, pid }, + }) + } + + /// Bind a unix socket and return the path immediately. The caller is responsible for + /// starting the app with `EGUI_INSPECTION_SOCKET` set to this path. Call + /// [`Self::accept_pending`] once the app is running. + /// + /// Returns the temp-dir handle (must be kept alive) and the listener. + pub async fn prepare_attach() -> anyhow::Result<(tempfile::TempDir, UnixListener, PathBuf)> { + let socket_dir = tempfile::Builder::new() + .prefix("egui-inspection-") + .tempdir() + .context("create temp dir for inspection socket")?; + let socket_path = socket_dir.path().join("inspection.sock"); + let listener = UnixListener::bind(&socket_path) + .with_context(|| format!("bind {}", socket_path.display()))?; + Ok((socket_dir, listener, socket_path)) + } + + /// Finish an attach started with [`Self::prepare_attach`] — wait for an inbound + /// connection and spawn the reader/writer tasks. + /// + /// `child` is the optional child process that was spawned with the socket env var + /// pre-set. Passing it here lets `kill` reach it and `kill_on_drop` clean up if the + /// bridge is dropped. + pub async fn accept_pending( + socket_dir: tempfile::TempDir, + listener: UnixListener, + socket_path: PathBuf, + child: Option, + accept_timeout: Duration, + ) -> anyhow::Result { + let (stream, _addr) = match timeout(accept_timeout, listener.accept()).await { + Ok(Ok(pair)) => pair, + Ok(Err(e)) => bail!("accept on inspection socket: {e}"), + Err(_) => bail!("timed out waiting for inbound connection at {}", socket_path.display()), + }; + + let (reader, writer) = stream.into_split(); + let state = SharedState::new(); + let (cmd_tx, cmd_rx) = mpsc::unbounded_channel(); + let child_arc: Arc>> = Arc::new(Mutex::new(child)); + let reader_task = tokio::spawn(read_loop(reader, state.clone(), child_arc.clone())); + let writer_task = tokio::spawn(write_loop(writer, cmd_rx)); + + Ok(Self { + state, + cmd_tx, + _reader_task: reader_task, + _writer_task: writer_task, + child: child_arc, + _socket_dir: socket_dir, + peer_info: PeerInfo::Attached { socket_path }, + }) + } + + pub fn send(&self, cmd: InspectorCommand) -> anyhow::Result<()> { + self.cmd_tx + .send(cmd) + .map_err(|_| anyhow!("inspector writer task is gone")) + } + + /// Wait for either a new frame whose `step > prev_step`, or a `Finished` signal, + /// whichever comes first. Returns the resulting snapshot or times out. + pub async fn wait_for_frame_after( + &self, + prev_step: u64, + wait: Duration, + ) -> anyhow::Result { + let deadline = tokio::time::Instant::now() + wait; + loop { + let snap = self.state.snapshot().await; + if snap.finished.is_some() { + return Ok(snap); + } + if let Some(f) = &snap.frame { + if f.step > prev_step { + return Ok(snap); + } + } + let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); + if remaining.is_zero() { + bail!("timed out waiting for next frame after step {prev_step}"); + } + let _ = timeout(remaining, self.state.notify.notified()).await; + } + } + + pub async fn kill(&self) { + if let Some(mut c) = self.child.lock().await.take() { + let _ = c.kill().await; + } + } +} + +impl Drop for Bridge { + fn drop(&mut self) { + // Best-effort: ensure the child is reaped. `kill_on_drop(true)` on `Command` also + // guarantees this, but we set it explicitly for the case where someone replaces the + // `Child` and forgets the flag. + if let Ok(mut g) = self.child.try_lock() { + if let Some(mut c) = g.take() { + let _ = c.start_kill(); + } + } + } +} + +async fn read_loop( + mut reader: tokio::net::unix::OwnedReadHalf, + state: Arc, + child: Arc>>, +) { + loop { + let msg = match read_message(&mut reader).await { + Ok(m) => m, + Err(e) => { + tracing::debug!("inspector socket read ended: {e}"); + break; + } + }; + apply_message(&state, msg).await; + } + // Reader ended → harness is gone. Make sure we eventually reap the child. + if let Some(mut c) = child.lock().await.take() { + let _ = c.kill().await; + } + // Wake any waiter so they can observe disconnection. + state.notify.notify_waiters(); +} + +async fn apply_message(state: &SharedState, msg: HarnessMessage) { + let mut g = state.inner.lock().await; + match msg { + HarnessMessage::Hello(hello) => { + g.hello = Some(hello); + } + HarnessMessage::Frame(frame) => { + if let Some(update) = &frame.accesskit { + let mut noop = NoopChangeHandler; + match g.accesskit_tree.as_mut() { + Some(tree) => tree.update_and_process_changes(update.clone(), &mut noop), + None => { + g.accesskit_tree = + Some(accesskit_consumer::Tree::new(update.clone(), false)); + } + } + } + g.latest_frame = Some(frame); + } + HarnessMessage::Blocked(b) => g.blocked = b, + HarnessMessage::Finished { + ok, + message, + source, + } => { + g.finished = Some(FinishedInfo { + ok, + message, + source, + }); + g.blocked = true; + } + } + drop(g); + state.notify.notify_waiters(); +} + +struct NoopChangeHandler; + +impl accesskit_consumer::TreeChangeHandler for NoopChangeHandler { + fn node_added(&mut self, _: &accesskit_consumer::Node<'_>) {} + fn node_updated( + &mut self, + _: &accesskit_consumer::Node<'_>, + _: &accesskit_consumer::Node<'_>, + ) { + } + fn focus_moved( + &mut self, + _: Option<&accesskit_consumer::Node<'_>>, + _: Option<&accesskit_consumer::Node<'_>>, + ) { + } + fn node_removed(&mut self, _: &accesskit_consumer::Node<'_>) {} +} + +async fn write_loop( + mut writer: tokio::net::unix::OwnedWriteHalf, + mut rx: mpsc::UnboundedReceiver, +) { + while let Some(cmd) = rx.recv().await { + if let Err(e) = write_message(&mut writer, &cmd).await { + tracing::debug!("inspector socket write ended: {e}"); + break; + } + } +} + +async fn read_message(stream: &mut tokio::net::unix::OwnedReadHalf) -> anyhow::Result { + let mut len_buf = [0u8; 4]; + stream.read_exact(&mut len_buf).await?; + let len = u32::from_be_bytes(len_buf) as usize; + if len > MAX_MESSAGE_BYTES { + bail!("message too large: {len} bytes"); + } + let mut buf = vec![0u8; len]; + stream.read_exact(&mut buf).await?; + rmp_serde::from_slice(&buf).map_err(|e| anyhow!("decode: {e}")) +} + +async fn write_message( + stream: &mut tokio::net::unix::OwnedWriteHalf, + msg: &InspectorCommand, +) -> anyhow::Result<()> { + let bytes = rmp_serde::to_vec(msg).map_err(|e| anyhow!("encode: {e}"))?; + let len = u32::try_from(bytes.len())?; + stream.write_all(&len.to_be_bytes()).await?; + stream.write_all(&bytes).await?; + stream.flush().await?; + Ok(()) +} diff --git a/crates/egui_kittest_mcp/src/main.rs b/crates/egui_kittest_mcp/src/main.rs new file mode 100644 index 000000000..d47c77fc8 --- /dev/null +++ b/crates/egui_kittest_mcp/src/main.rs @@ -0,0 +1,45 @@ +//! `kittest-mcp` — dual-role binary. +//! +//! Default role: **MCP server**. Speaks MCP JSON-RPC over stdio to an agent. Exposes a +//! `launch` tool that spawns a target egui kittest binary with the inspector protocol +//! pointed back at this same executable in shim mode. +//! +//! Shim role: activated when [`HANDSHAKE_ENV_VAR`] is set. The target binary's +//! [`egui_kittest::InspectorPlugin`] thinks it's talking to the regular `kittest_inspector` +//! over stdio; in reality it's talking to us, and we relay the bytes to the MCP server +//! over a unix domain socket. + +mod bridge; +mod server; +mod shim; +mod tools; +mod tree; + +/// Env var carrying the unix socket path the shim should connect to. +pub const HANDSHAKE_ENV_VAR: &str = "KITTEST_MCP_HANDSHAKE"; + +fn main() -> anyhow::Result<()> { + if let Ok(socket_path) = std::env::var(HANDSHAKE_ENV_VAR) { + // Shim role: relay bytes between harness stdio and the MCP server's socket. + // No tokio runtime — keep the dependency surface tiny and the relay deterministic. + shim::run(&socket_path) + } else { + // Server role: MCP over stdio. + init_tracing(); + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; + rt.block_on(server::run()) + } +} + +fn init_tracing() { + use tracing_subscriber::EnvFilter; + let filter = EnvFilter::try_from_env("KITTEST_MCP_LOG") + .unwrap_or_else(|_| EnvFilter::new("kittest_mcp=info,warn")); + // stderr only — stdout is reserved for MCP JSON-RPC traffic. + let _ = tracing_subscriber::fmt() + .with_env_filter(filter) + .with_writer(std::io::stderr) + .try_init(); +} diff --git a/crates/egui_kittest_mcp/src/server.rs b/crates/egui_kittest_mcp/src/server.rs new file mode 100644 index 000000000..32ae6e1ce --- /dev/null +++ b/crates/egui_kittest_mcp/src/server.rs @@ -0,0 +1,16 @@ +//! MCP server entry point, built on the official `rmcp` SDK over stdio. +//! +//! [`run`] constructs a [`crate::tools::Server`] (which derives its tool router via +//! `#[tool_router]`) and serves it on `(stdin, stdout)`. Returns once the client closes +//! the connection (EOF on stdin) or the runtime is shut down. + +use rmcp::{ServiceExt, transport}; + +use crate::tools::Server; + +pub async fn run() -> anyhow::Result<()> { + let server = Server::new(); + let running = server.serve(transport::stdio()).await?; + let _reason = running.waiting().await?; + Ok(()) +} diff --git a/crates/egui_kittest_mcp/src/shim.rs b/crates/egui_kittest_mcp/src/shim.rs new file mode 100644 index 000000000..93bf0c1c6 --- /dev/null +++ b/crates/egui_kittest_mcp/src/shim.rs @@ -0,0 +1,50 @@ +//! Inspector shim role. +//! +//! Connects to the MCP server's unix domain socket and relays bytes in both directions +//! between the harness's stdio and that socket. +//! +//! From the harness's perspective we're an ordinary `kittest_inspector` (msgpack framed +//! messages on stdin/stdout). The MCP server sees the same framed bytes on the other end of +//! the socket. We don't parse or interpret anything here — pure byte relay keeps the shim +//! independent of protocol revisions. + +use std::io::{Read as _, Write as _}; +use std::os::unix::net::UnixStream; +use std::thread; + +pub fn run(socket_path: &str) -> anyhow::Result<()> { + let stream = UnixStream::connect(socket_path) + .map_err(|e| anyhow::anyhow!("connect {socket_path}: {e}"))?; + let stream_to_stdout = stream.try_clone()?; + let mut stdin_to_socket = stream; + + // Thread A: stdin (from harness) → socket (to MCP server). + let t_in = thread::Builder::new() + .name("kittest-mcp-shim-stdin".into()) + .spawn(move || { + let mut stdin = std::io::stdin().lock(); + let _ = std::io::copy(&mut stdin, &mut stdin_to_socket); + // EOF on stdin or write error → shutdown write side so peer sees EOF. + let _ = stdin_to_socket.shutdown(std::net::Shutdown::Write); + })?; + + // Thread B: socket (from MCP server) → stdout (to harness). + // Runs on main thread so the process exits when stdout closes. + let mut stdout = std::io::stdout().lock(); + let mut buf = vec![0u8; 64 * 1024]; + let mut reader = stream_to_stdout; + loop { + match reader.read(&mut buf) { + Ok(0) | Err(_) => break, + Ok(n) => { + if stdout.write_all(&buf[..n]).is_err() { + break; + } + let _ = stdout.flush(); + } + } + } + + let _ = t_in.join(); + Ok(()) +} diff --git a/crates/egui_kittest_mcp/src/tools.rs b/crates/egui_kittest_mcp/src/tools.rs new file mode 100644 index 000000000..4ceb7090f --- /dev/null +++ b/crates/egui_kittest_mcp/src/tools.rs @@ -0,0 +1,1313 @@ +//! MCP tool definitions + dispatch, built on the official `rmcp` SDK. +//! +//! Each tool is an async method on [`Server`] annotated with `#[tool]`. The macro derives +//! the input schema from the typed parameter struct (via `schemars::JsonSchema`) and wires +//! the method into a [`ToolRouter`] returned by [`Server::tool_router`]. +//! +//! Tools that need a running app go through [`Server::run_inner`], which holds the +//! `AppState` lock for the duration of one call. Lifecycle tools (`launch`, `attach`, +//! `kill`, `status`) manage the bridge themselves. +//! +//! Recoverable failures (no app running, node not found, etc.) are returned as a tool +//! result with `isError: true`, not as a JSON-RPC error — per MCP spec, recoverable tool +//! failures belong in `result`. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use anyhow::{Context as _, anyhow, bail}; +use base64::Engine as _; +use egui::Event; +use egui_inspection::protocol::InspectorCommand; +use rmcp::{ + ErrorData as McpError, ServerHandler, + handler::server::{router::tool::ToolRouter, wrapper::Parameters}, + model::{CallToolResult, Content, Implementation, ServerCapabilities, ServerInfo}, + schemars, tool, tool_handler, tool_router, +}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_json::{Value, json}; +use tokio::sync::Mutex; + +use crate::bridge::{Bridge, StateSnapshot}; +use crate::tree::{self, Locator, NodeView, QueryFilter}; + +// --------------------------------------------------------------------------------------- +// App state + Server wrapper +// --------------------------------------------------------------------------------------- + +/// Holds the single in-flight bridge to a kittest harness / live egui app. Shared between +/// all `#[tool]` handlers on [`Server`]. +#[derive(Default)] +pub struct AppState { + bridge: Mutex>, +} + +impl AppState { + pub fn new() -> Arc { + Arc::new(Self::default()) + } +} + +#[derive(Clone)] +pub struct Server { + state: Arc, + #[allow(dead_code, reason = "read by the `#[tool_router]` macro expansion")] + tool_router: ToolRouter, +} + +impl Server { + pub fn new() -> Self { + Self { + state: AppState::new(), + tool_router: Self::tool_router(), + } + } + + /// Acquire the bridge lock, run `f(bridge)`, and shape the result into a + /// `CallToolResult`. Returns `is_error: true` if no app is running, matching MCP + /// spec for recoverable failures. The future returned by `f` is boxed so the closure + /// can borrow the bridge across an `await` point. + async fn run_inner(&self, f: F) -> Result + where + R: Serialize, + F: for<'a> FnOnce( + &'a Bridge, + ) -> std::pin::Pin< + Box> + Send + 'a>, + >, + { + let guard = self.state.bridge.lock().await; + let Some(bridge) = guard.as_ref() else { + return Ok(text_error( + "no app running — call `launch` or `attach` first", + )); + }; + match f(bridge).await { + Ok(v) => Ok(text_ok(&v)), + Err(e) => Ok(text_error(format!("{e:#}"))), + } + } +} + +impl Default for Server { + fn default() -> Self { + Self::new() + } +} + +// --------------------------------------------------------------------------------------- +// Helpers: ToolResult shaping +// --------------------------------------------------------------------------------------- + +fn text_ok(value: &T) -> CallToolResult { + match serde_json::to_string(value) { + Ok(s) => CallToolResult::success(vec![Content::text(s)]), + Err(e) => text_error(format!("serialize result: {e}")), + } +} + +fn text_error(msg: impl Into) -> CallToolResult { + CallToolResult::error(vec![Content::text(msg.into())]) +} + +// --------------------------------------------------------------------------------------- +// Target — locator OR raw position, shared by click / hover / scroll / drag. +// --------------------------------------------------------------------------------------- + +#[derive(Debug, Clone, Default, Deserialize, JsonSchema)] +pub struct Target { + /// Decimal AccessKit node id from `query_tree`. + #[serde(default)] + pub id: Option, + #[serde(default)] + pub role: Option, + #[serde(default)] + pub label_contains: Option, + /// Raw position in logical points (use instead of locator fields). + #[serde(default)] + pub pos: Option, +} + +#[derive(Debug, Clone, Copy, Deserialize, JsonSchema)] +pub struct Pos2Lit { + pub x: f32, + pub y: f32, +} + +impl Target { + fn has_any(&self) -> bool { + self.id.is_some() + || self.role.is_some() + || self.label_contains.is_some() + || self.pos.is_some() + } + + fn as_locator(&self) -> Option { + if self.id.is_none() && self.role.is_none() && self.label_contains.is_none() { + return None; + } + let locator_json = json!({ + "id": self.id, + "role": self.role, + "label_contains": self.label_contains, + }); + serde_json::from_value(locator_json).ok() + } +} + +async fn resolve_target( + bridge: &Bridge, + target: &Target, +) -> anyhow::Result<(Option, egui::Pos2)> { + if !target.has_any() { + bail!("target requires `id`, `role`, `label_contains`, or `pos`"); + } + if let Some(p) = target.pos { + return Ok((None, egui::Pos2::new(p.x, p.y))); + } + let locator = target + .as_locator() + .ok_or_else(|| anyhow!("target requires `id`, `role`, `label_contains`, or `pos`"))?; + let snap = bridge.state.snapshot().await; + let pixels_per_point = snap.frame.as_ref().map(|f| f.pixels_per_point).unwrap_or(1.0); + bridge + .state + .with_tree(|t| { + let tree = t.ok_or_else(|| anyhow!("no accesskit tree yet"))?; + let node = tree::resolve_node(tree, &locator) + .ok_or_else(|| anyhow!("node not found"))?; + let view = tree::node_view(&node); + let bounds = view + .bounds + .ok_or_else(|| anyhow!("node has no bounds — can't target"))?; + let (cx, cy) = bounds.center(); + let center = egui::Pos2::new( + (cx as f32) / pixels_per_point, + (cy as f32) / pixels_per_point, + ); + Ok::<_, anyhow::Error>((Some(view.id), center)) + }) + .await +} + +// --------------------------------------------------------------------------------------- +// Args structs +// --------------------------------------------------------------------------------------- + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct LaunchArgs { + /// Path to the binary to spawn. + pub bin: PathBuf, + #[serde(default)] + pub args: Vec, + #[serde(default)] + pub env: std::collections::HashMap, + #[serde(default)] + pub cwd: Option, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct AttachArgs { + /// Optional binary to spawn with the inspection socket pre-wired. + #[serde(default)] + pub bin: Option, + #[serde(default)] + pub args: Vec, + #[serde(default)] + pub env: std::collections::HashMap, + #[serde(default)] + pub cwd: Option, + #[serde(default = "default_attach_timeout")] + pub timeout_secs: u64, +} + +fn default_attach_timeout() -> u64 { + 60 +} + +#[derive(Debug, Default, Deserialize, JsonSchema)] +pub struct EmptyArgs {} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct StepArgs { + #[serde(default = "default_one")] + pub count: u32, +} + +fn default_one() -> u32 { + 1 +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GetNodeArgs { + pub id: String, +} + +#[derive(Debug, Default, Deserialize, JsonSchema)] +pub struct PressKeyModifiers { + #[serde(default)] + pub ctrl: bool, + #[serde(default)] + pub shift: bool, + #[serde(default)] + pub alt: bool, + #[serde(default)] + pub mac_cmd: bool, + /// = Cmd on Mac / Ctrl on Win+Linux. + #[serde(default)] + pub command: bool, +} + +impl PressKeyModifiers { + fn to_egui(&self) -> egui::Modifiers { + egui::Modifiers { + alt: self.alt, + ctrl: self.ctrl, + shift: self.shift, + mac_cmd: self.mac_cmd, + command: self.command, + } + } +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ClickArgs { + #[serde(flatten)] + pub target: Target, + /// `primary`/`secondary`/`middle`/`extra1`/`extra2` (or aliases `left`/`right`). + #[serde(default = "default_click_button")] + pub button: String, + /// `2` → double-click; `3` → triple-click (multi-click detected via egui's timing). + #[serde(default = "default_one")] + pub count: u32, + #[serde(default)] + pub modifiers: PressKeyModifiers, +} + +fn default_click_button() -> String { + "primary".into() +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct HoverArgs { + #[serde(flatten)] + pub target: Target, + #[serde(default = "default_settle_frames")] + pub settle_frames: u32, +} + +fn default_settle_frames() -> u32 { + 2 +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ScrollArgs { + #[serde(flatten)] + pub target: Target, + /// Logical points. Positive Y scrolls content down (revealing content below). + pub delta: Pos2Lit, + #[serde(default)] + pub modifiers: PressKeyModifiers, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct DragArgs { + pub start: Target, + pub end: Target, + #[serde(default = "default_drag_steps")] + pub steps: u32, + #[serde(default)] + pub modifiers: PressKeyModifiers, +} + +fn default_drag_steps() -> u32 { + 8 +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ResizeArgs { + pub width: u32, + pub height: u32, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct WaitForArgs { + #[serde(default)] + pub role: Option, + #[serde(default)] + pub label_contains: Option, + #[serde(default = "default_wait_timeout")] + pub timeout_secs: u64, + #[serde(default = "default_min_matches")] + pub min_matches: u32, +} + +fn default_wait_timeout() -> u64 { + 5 +} + +fn default_min_matches() -> u32 { + 1 +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct TypeTextArgs { + pub text: String, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub role: Option, + #[serde(default)] + pub label_contains: Option, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct PressKeyArgs { + pub key: String, + #[serde(default)] + pub modifiers: PressKeyModifiers, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BatchArgs { + pub actions: Vec, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct BatchAction { + pub name: String, + #[serde(default)] + #[schemars(with = "serde_json::Map")] + pub args: Value, +} + +// --------------------------------------------------------------------------------------- +// Tool router — each tool is a thin wrapper around an inner async fn. +// --------------------------------------------------------------------------------------- + +#[tool_router] +impl Server { + #[tool( + description = "Spawn a kittest harness binary as a child process. The binary must \ + link `egui_kittest` and call `Harness::run()` — `InspectorPlugin` \ + auto-attaches via the `KITTEST_INSPECTOR` env var this tool sets." + )] + async fn launch( + &self, + Parameters(args): Parameters, + ) -> Result { + let mut guard = self.state.bridge.lock().await; + if guard.is_some() { + return Ok(text_error( + "an app is already running — call `kill` first to start a new one", + )); + } + let env: Vec<(String, String)> = args.env.into_iter().collect(); + let bridge = match Bridge::launch(args.bin, args.args, env, args.cwd).await { + Ok(b) => b, + Err(e) => return Ok(text_error(format!("launch failed: {e:#}"))), + }; + let _ = bridge + .wait_for_frame_after(0, Duration::from_secs(5)) + .await; + let snap = bridge.state.snapshot().await; + let info = bridge.peer_info.clone(); + *guard = Some(bridge); + Ok(text_ok(&json!({ + "ok": true, + "launched": info, + "step": snap.frame.as_ref().map(|f| f.step).unwrap_or(0), + "blocked": snap.blocked, + }))) + } + + #[tool( + description = "Bind a unix socket and wait for a live egui app (built with the \ + `egui_inspection` plugin, e.g. eframe + the `inspection` feature) \ + to dial in. If `bin` is provided, also spawns it with \ + `EGUI_INSPECTION_SOCKET` pre-set. Otherwise prints the path and \ + waits for an externally-launched app." + )] + async fn attach( + &self, + Parameters(args): Parameters, + ) -> Result { + let mut guard = self.state.bridge.lock().await; + if guard.is_some() { + return Ok(text_error( + "an app is already running — call `kill` first before attaching", + )); + } + let (socket_dir, listener, socket_path) = match Bridge::prepare_attach().await { + Ok(t) => t, + Err(e) => return Ok(text_error(format!("attach prepare failed: {e:#}"))), + }; + + let mut spawned: Option = None; + if let Some(bin) = args.bin.clone() { + let mut cmd = tokio::process::Command::new(&bin); + cmd.args(&args.args) + .env(egui_inspection::INSPECTION_SOCKET_ENV_VAR, &socket_path) + .stdin(std::process::Stdio::null()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .kill_on_drop(true); + for (k, v) in &args.env { + cmd.env(k, v); + } + if let Some(d) = &args.cwd { + cmd.current_dir(d); + } + match cmd.spawn() { + Ok(c) => spawned = Some(c), + Err(e) => return Ok(text_error(format!("spawn {}: {e}", bin.display()))), + } + } + + let timeout = Duration::from_secs(args.timeout_secs); + let bridge = match Bridge::accept_pending( + socket_dir, + listener, + socket_path.clone(), + spawned, + timeout, + ) + .await + { + Ok(b) => b, + Err(e) => return Ok(text_error(format!("attach failed: {e:#}"))), + }; + + // Live plugins push a frame on each `output_hook`; we wait briefly for the first + // one so the AccessKit tree is populated before any inspector tool call. + let _ = bridge.wait_for_frame_after(0, Duration::from_secs(5)).await; + let snap = bridge.state.snapshot().await; + let info = bridge.peer_info.clone(); + *guard = Some(bridge); + Ok(text_ok(&json!({ + "ok": true, + "attached": info, + "step": snap.frame.as_ref().map(|f| f.step).unwrap_or(0), + }))) + } + + #[tool( + description = "Terminate the running harness child (or detach from an attached live \ + app). After kill, other tools return `not_running` until `launch` / \ + `attach` is called again." + )] + async fn kill(&self, _p: Parameters) -> Result { + let mut guard = self.state.bridge.lock().await; + match guard.take() { + Some(bridge) => { + bridge.kill().await; + Ok(text_ok(&json!({ "ok": true }))) + } + None => Ok(text_error("no app running")), + } + } + + #[tool( + description = "Report whether a harness is running and its current step/blocked state." + )] + async fn status(&self, _p: Parameters) -> Result { + let guard = self.state.bridge.lock().await; + let body = match guard.as_ref() { + None => json!({ "state": "idle" }), + Some(bridge) => { + let snap = bridge.state.snapshot().await; + if let Some(fin) = &snap.finished { + json!({ + "state": "finished", + "ok": fin.ok, + "message": fin.message, + "step": snap.frame.as_ref().map(|f| f.step), + }) + } else { + json!({ + "state": "running", + "blocked": snap.blocked, + "step": snap.frame.as_ref().map(|f| f.step), + "peer": bridge.peer_info, + }) + } + } + }; + Ok(text_ok(&body)) + } + + #[tool(description = "Return the latest rendered frame as PNG.")] + async fn screenshot( + &self, + _p: Parameters, + ) -> Result { + let guard = self.state.bridge.lock().await; + let Some(bridge) = guard.as_ref() else { + return Ok(text_error("no app running — call `launch` or `attach` first")); + }; + match screenshot_inner(bridge).await { + Ok((meta, png_b64)) => { + let meta_text = match serde_json::to_string(&meta) { + Ok(s) => s, + Err(e) => return Ok(text_error(format!("serialize: {e}"))), + }; + Ok(CallToolResult::success(vec![ + Content::text(meta_text), + Content::image(png_b64, "image/png"), + ])) + } + Err(e) => Ok(text_error(format!("{e:#}"))), + } + } + + #[tool( + description = "Walk the AccessKit tree and return nodes matching the filter. Use \ + the returned `id` (a decimal string) with `click`, `type_text`, or \ + `get_node`." + )] + async fn query_tree( + &self, + Parameters(filter): Parameters, + ) -> Result { + self.run_inner(|bridge| { + Box::pin(async move { + let results = bridge + .state + .with_tree(|t| match t { + Some(tree) => tree::query(tree, &filter), + None => Vec::new(), + }) + .await; + Ok(results) + }) + }) + .await + } + + #[tool(description = "Return a single AccessKit node by id (decimal string).")] + async fn get_node( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| { + Box::pin(async move { + let locator_json = json!({ "id": args.id }); + let locator: Locator = + serde_json::from_value(locator_json).context("invalid id")?; + let view = bridge + .state + .with_tree(|t| { + let tree = t?; + tree::resolve_node(tree, &locator).map(|n| tree::node_view(&n)) + }) + .await; + Ok(view) + }) + }) + .await + } + + #[tool( + description = "Click the center of a node's bounding box, or a raw `pos` in logical \ + points. Specify either a locator (`id` from `query_tree` or \ + `role`/`label_contains`) or `pos: { x, y }`. `button` defaults to \ + `primary` (accepts `primary`/`secondary`/`middle`/`extra1`/`extra2`, \ + or aliases `left`/`right`). `count` sends repeated press/release \ + pairs in one batch — egui's multi-click detection turns `count: 2` \ + into a double-click and `count: 3` into a triple-click." + )] + async fn click( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(click_inner(bridge, args))) + .await + } + + #[tool( + description = "Move the pointer over a node (or raw `pos`) without clicking, then \ + step a few frames so tooltips / hover popups settle." + )] + async fn hover( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(hover_inner(bridge, args))) + .await + } + + #[tool( + description = "Send a mouse wheel scroll over a node (or raw `pos`). `delta` is in \ + logical points: positive Y scrolls content down (revealing content \ + below); positive X scrolls right." + )] + async fn scroll( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(scroll_inner(bridge, args))) + .await + } + + #[tool( + description = "Primary-button drag from `start` to `end`. Each target accepts the \ + same fields as `click`: locator (`id`/`role`/`label_contains`) or \ + `pos: {x, y}`. `steps` controls how many intermediate pointer-move \ + events are emitted between press and release." + )] + async fn drag( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(drag_inner(bridge, args))) + .await + } + + #[tool( + description = "Resize the peer's viewport (live app: `ViewportCommand::InnerSize`) \ + or the kittest harness window to the given logical-point dimensions." + )] + async fn resize( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(resize_inner(bridge, args))) + .await + } + + #[tool( + description = "Poll the AccessKit tree until at least `min_matches` visible nodes \ + match the filter, or until `timeout_secs` elapses." + )] + async fn wait_for( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(wait_for_inner(bridge, args))) + .await + } + + #[tool(description = "Advance the harness by N frames (default 1) and return the new screenshot.")] + async fn step( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(step_inner(bridge, args))) + .await + } + + #[tool( + description = "Type text into the currently focused widget. Sends one `Event::Text` \ + per character and waits for a frame between characters so each \ + keystroke is applied independently. Optionally first focuses a node \ + (by `id` or `role`/`label_contains`) via a click before typing." + )] + async fn type_text( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(type_text_inner(bridge, args))) + .await + } + + #[tool( + description = "Send a key press (down + up) to the focused widget. `key` is an egui \ + key name such as `Backspace`, `Delete`, `Enter`, `Tab`, `A`–`Z`, \ + `ArrowLeft`, `ArrowRight`, `Home`, `End`, `Escape`." + )] + async fn press_key( + &self, + Parameters(args): Parameters, + ) -> Result { + self.run_inner(|bridge| Box::pin(press_key_inner(bridge, args))) + .await + } + + #[tool( + description = "Execute a sequence of tool calls in one round trip. Stops on the \ + first error. Results are emitted in execution order, interleaved: \ + each step contributes one JSON text item followed by any image \ + items it produced (e.g. screenshots) — so position in the content \ + stream tells you which step each image belongs to. `batch` cannot \ + be nested." + )] + async fn batch( + &self, + Parameters(args): Parameters, + ) -> Result { + if args.actions.iter().any(|a| a.name == "batch") { + return Ok(text_error("nested `batch` is not allowed")); + } + // Interleaved layout: for each step, emit a Text item carrying the step's JSON + // metadata, then any Image items the step produced. Matches `browser_batch` in + // claude-in-chrome — callers can tell which screenshot belongs to which step by + // position in the content stream. + let mut content: Vec = Vec::new(); + let mut any_error = false; + for action in args.actions { + let result = Box::pin(self.dispatch_internal(&action.name, action.args)).await; + let mut step_texts: Vec = Vec::new(); + let mut step_images: Vec = Vec::new(); + for item in &result.content { + if let Some(text) = content_as_text(item) { + step_texts.push(text.to_string()); + } else if content_is_image(item) { + step_images.push(item.clone()); + } + } + let entry = json!({ + "name": action.name, + "isError": result.is_error.unwrap_or(false), + "content": step_texts, + }); + let entry_text = match serde_json::to_string(&entry) { + Ok(s) => s, + Err(e) => return Ok(text_error(format!("serialize batch step: {e}"))), + }; + content.push(Content::text(entry_text)); + content.extend(step_images); + if result.is_error.unwrap_or(false) { + any_error = true; + break; + } + } + let mut result = if any_error { + CallToolResult::error(content) + } else { + CallToolResult::success(content) + }; + result.is_error = Some(any_error); + Ok(result) + } +} + +#[tool_handler] +impl ServerHandler for Server { + fn get_info(&self) -> ServerInfo { + ServerInfo::new(ServerCapabilities::builder().enable_tools().build()) + .with_server_info(Implementation::new( + "kittest-mcp", + env!("CARGO_PKG_VERSION"), + )) + } +} + +// --------------------------------------------------------------------------------------- +// Batch internal dispatch +// --------------------------------------------------------------------------------------- + +impl Server { + /// Route a tool call by name. Used by `batch` to recurse without going back through + /// the rmcp router (which would require a self-handle). + async fn dispatch_internal(&self, name: &str, args: Value) -> CallToolResult { + match name { + "launch" => unpack_then(args, |p| async move { self.launch(p).await }).await, + "attach" => unpack_then(args, |p| async move { self.attach(p).await }).await, + "kill" => unpack_then(args, |p| async move { self.kill(p).await }).await, + "status" => unpack_then(args, |p| async move { self.status(p).await }).await, + "screenshot" => { + unpack_then(args, |p| async move { self.screenshot(p).await }).await + } + "query_tree" => unpack_then(args, |p| async move { self.query_tree(p).await }).await, + "get_node" => unpack_then(args, |p| async move { self.get_node(p).await }).await, + "click" => unpack_then(args, |p| async move { self.click(p).await }).await, + "hover" => unpack_then(args, |p| async move { self.hover(p).await }).await, + "scroll" => unpack_then(args, |p| async move { self.scroll(p).await }).await, + "drag" => unpack_then(args, |p| async move { self.drag(p).await }).await, + "resize" => unpack_then(args, |p| async move { self.resize(p).await }).await, + "wait_for" => unpack_then(args, |p| async move { self.wait_for(p).await }).await, + "step" => unpack_then(args, |p| async move { self.step(p).await }).await, + "type_text" => unpack_then(args, |p| async move { self.type_text(p).await }).await, + "press_key" => unpack_then(args, |p| async move { self.press_key(p).await }).await, + other => text_error(format!("unknown tool `{other}`")), + } + } +} + +async fn unpack_then(args: Value, f: F) -> CallToolResult +where + A: for<'de> serde::Deserialize<'de>, + F: FnOnce(Parameters) -> Fut, + Fut: std::future::Future>, +{ + let parsed: A = match serde_json::from_value(args) { + Ok(p) => p, + Err(e) => return text_error(format!("invalid arguments: {e}")), + }; + match f(Parameters(parsed)).await { + Ok(r) => r, + Err(e) => text_error(e.message.to_string()), + } +} + +fn content_as_text(c: &Content) -> Option<&str> { + match &c.raw { + rmcp::model::RawContent::Text(t) => Some(t.text.as_str()), + _ => None, + } +} + +fn content_is_image(c: &Content) -> bool { + matches!(&c.raw, rmcp::model::RawContent::Image(_)) +} + +// --------------------------------------------------------------------------------------- +// Inner action handlers (shared with batch) +// --------------------------------------------------------------------------------------- + +#[derive(Serialize)] +struct ScreenshotMeta { + step: u64, + width: u32, + height: u32, + pixels_per_point: f32, +} + +async fn screenshot_inner(bridge: &Bridge) -> anyhow::Result<(ScreenshotMeta, String)> { + // Fast path: kittest harnesses (and live apps in continuous-screenshot mode) attach a + // `FrameScreenshot` to every frame. Just use the latest one if it already has pixels. + let initial = bridge.state.snapshot().await; + if let Some(frame) = initial.frame.as_ref() { + if let Some(shot) = frame.screenshot.as_ref() { + let meta = ScreenshotMeta { + step: frame.step, + width: shot.width, + height: shot.height, + pixels_per_point: frame.pixels_per_point, + }; + let png = encode_png(shot).context("encode PNG")?; + return Ok((meta, base64::engine::general_purpose::STANDARD.encode(png))); + } + } + + // Live apps only attach a screenshot on demand. Ask for a fresh capture, then wait + // until a frame with `screenshot: Some(_)` arrives. + let prev_step = initial.frame.as_ref().map(|f| f.step).unwrap_or(0); + bridge.send(InspectorCommand::Screenshot)?; + + // The viewport screenshot needs at least two frames to round-trip on live apps: + // one to issue the request, one to emit `Event::Screenshot`. Poll until the pixels + // show up (or we time out). + let deadline = tokio::time::Instant::now() + Duration::from_secs(5); + let mut last_step = prev_step; + let frame = loop { + let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); + if remaining.is_zero() { + return Err(anyhow!("timed out waiting for screenshot frame")); + } + let snap = bridge.wait_for_frame_after(last_step, remaining).await?; + let Some(frame) = snap.frame else { continue }; + last_step = frame.step; + if frame.screenshot.is_some() { + break frame; + } + }; + let shot = frame.screenshot.as_ref().expect("checked above"); + let png = encode_png(shot).context("encode PNG")?; + let meta = ScreenshotMeta { + step: frame.step, + width: shot.width, + height: shot.height, + pixels_per_point: frame.pixels_per_point, + }; + Ok(( + meta, + base64::engine::general_purpose::STANDARD.encode(png), + )) +} + +fn encode_png(shot: &egui_inspection::protocol::FrameScreenshot) -> anyhow::Result> { + let img = image::RgbaImage::from_raw(shot.width, shot.height, shot.rgba.clone()) + .ok_or_else(|| anyhow!("frame rgba length mismatch"))?; + let mut out = std::io::Cursor::new(Vec::new()); + img.write_to(&mut out, image::ImageFormat::Png)?; + Ok(out.into_inner()) +} + +fn parse_pointer_button(name: &str) -> anyhow::Result { + match name.to_ascii_lowercase().as_str() { + "primary" | "left" => Ok(egui::PointerButton::Primary), + "secondary" | "right" => Ok(egui::PointerButton::Secondary), + "middle" => Ok(egui::PointerButton::Middle), + "extra1" => Ok(egui::PointerButton::Extra1), + "extra2" => Ok(egui::PointerButton::Extra2), + other => bail!( + "unknown button `{other}` — expected primary/secondary/middle/extra1/extra2 (or left/right)" + ), + } +} + +async fn click_inner(bridge: &Bridge, args: ClickArgs) -> anyhow::Result { + let button = parse_pointer_button(&args.button)?; + let count = args.count.max(1); + let modifiers = args.modifiers.to_egui(); + let (node_id, center) = resolve_target(bridge, &args.target).await?; + let prev_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + + // Send `count` press/release pairs in one batch — they share the frame's input time, + // which egui treats as consecutive clicks within `multi_click_delay`, so double / + // triple clicks register naturally. + let mut events = vec![Event::PointerMoved(center)]; + for _ in 0..count { + events.push(Event::PointerButton { + pos: center, + button, + pressed: true, + modifiers, + }); + events.push(Event::PointerButton { + pos: center, + button, + pressed: false, + modifiers, + }); + } + bridge.send(InspectorCommand::Handle { events })?; + let snap = bridge + .wait_for_frame_after(prev_step, Duration::from_secs(5)) + .await?; + Ok(json!({ + "ok": true, + "clicked_id": node_id, + "pos": [center.x, center.y], + "button": args.button, + "count": count, + "step": snap.frame.as_ref().map(|f| f.step), + })) +} + +async fn hover_inner(bridge: &Bridge, args: HoverArgs) -> anyhow::Result { + let (node_id, pos) = resolve_target(bridge, &args.target).await?; + let mut last_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + + bridge.send(InspectorCommand::Handle { + events: vec![Event::PointerMoved(pos)], + })?; + let snap = bridge + .wait_for_frame_after(last_step, Duration::from_secs(5)) + .await?; + if let Some(f) = &snap.frame { + last_step = f.step; + } + for _ in 0..args.settle_frames { + bridge.send(InspectorCommand::Step)?; + let snap = bridge + .wait_for_frame_after(last_step, Duration::from_secs(5)) + .await?; + if let Some(f) = &snap.frame { + last_step = f.step; + } + if snap.finished.is_some() { + break; + } + } + Ok(json!({ + "ok": true, + "hovered_id": node_id, + "pos": [pos.x, pos.y], + "step": last_step, + })) +} + +async fn scroll_inner(bridge: &Bridge, args: ScrollArgs) -> anyhow::Result { + let (node_id, pos) = resolve_target(bridge, &args.target).await?; + let modifiers = args.modifiers.to_egui(); + let prev_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + let events = vec![ + Event::PointerMoved(pos), + Event::MouseWheel { + unit: egui::MouseWheelUnit::Point, + delta: egui::Vec2::new(args.delta.x, args.delta.y), + phase: egui::TouchPhase::Move, + modifiers, + }, + ]; + bridge.send(InspectorCommand::Handle { events })?; + let snap = bridge + .wait_for_frame_after(prev_step, Duration::from_secs(5)) + .await?; + Ok(json!({ + "ok": true, + "scrolled_id": node_id, + "pos": [pos.x, pos.y], + "delta": [args.delta.x, args.delta.y], + "step": snap.frame.as_ref().map(|f| f.step), + })) +} + +async fn drag_inner(bridge: &Bridge, args: DragArgs) -> anyhow::Result { + let (start_id, start_pos) = resolve_target(bridge, &args.start).await?; + let (end_id, end_pos) = resolve_target(bridge, &args.end).await?; + let modifiers = args.modifiers.to_egui(); + let steps = args.steps.max(1); + + let mut last_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + + bridge.send(InspectorCommand::Handle { + events: vec![ + Event::PointerMoved(start_pos), + Event::PointerButton { + pos: start_pos, + button: egui::PointerButton::Primary, + pressed: true, + modifiers, + }, + ], + })?; + let snap = bridge + .wait_for_frame_after(last_step, Duration::from_secs(5)) + .await?; + if let Some(f) = &snap.frame { + last_step = f.step; + } + + for i in 1..=steps { + let t = i as f32 / steps as f32; + let waypoint = egui::Pos2::new( + start_pos.x + (end_pos.x - start_pos.x) * t, + start_pos.y + (end_pos.y - start_pos.y) * t, + ); + bridge.send(InspectorCommand::Handle { + events: vec![Event::PointerMoved(waypoint)], + })?; + let snap = bridge + .wait_for_frame_after(last_step, Duration::from_secs(5)) + .await?; + if let Some(f) = &snap.frame { + last_step = f.step; + } + } + + bridge.send(InspectorCommand::Handle { + events: vec![Event::PointerButton { + pos: end_pos, + button: egui::PointerButton::Primary, + pressed: false, + modifiers, + }], + })?; + let snap = bridge + .wait_for_frame_after(last_step, Duration::from_secs(5)) + .await?; + if let Some(f) = &snap.frame { + last_step = f.step; + } + Ok(json!({ + "ok": true, + "start_id": start_id, + "end_id": end_id, + "start_pos": [start_pos.x, start_pos.y], + "end_pos": [end_pos.x, end_pos.y], + "steps": steps, + "step": last_step, + })) +} + +async fn resize_inner(bridge: &Bridge, args: ResizeArgs) -> anyhow::Result { + let prev_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + bridge.send(InspectorCommand::Resize { + width: args.width, + height: args.height, + })?; + let snap = bridge + .wait_for_frame_after(prev_step, Duration::from_secs(5)) + .await?; + Ok(json!({ + "ok": true, + "width": args.width, + "height": args.height, + "step": snap.frame.as_ref().map(|f| f.step), + })) +} + +async fn wait_for_inner(bridge: &Bridge, args: WaitForArgs) -> anyhow::Result { + if args.role.is_none() && args.label_contains.is_none() { + bail!("wait_for requires at least `role` or `label_contains`"); + } + let filter = QueryFilter { + role: args.role.clone(), + label_contains: args.label_contains.clone(), + visible_only: true, + limit: args.min_matches as usize, + }; + let deadline = tokio::time::Instant::now() + Duration::from_secs(args.timeout_secs); + loop { + let matches: Vec = bridge + .state + .with_tree(|t| match t { + Some(tree) => tree::query(tree, &filter), + None => Vec::new(), + }) + .await; + if matches.len() as u32 >= args.min_matches { + return Ok(json!({ "ok": true, "matched": matches })); + } + let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); + if remaining.is_zero() { + bail!( + "wait_for timed out after {}s (role={:?}, label_contains={:?}, found {})", + args.timeout_secs, + args.role, + args.label_contains, + matches.len() + ); + } + let _ = tokio::time::timeout(remaining, bridge.state.notified()).await; + } +} + +async fn step_inner(bridge: &Bridge, args: StepArgs) -> anyhow::Result { + let count = if args.count == 0 { 1 } else { args.count }; + let mut last_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + for _ in 0..count { + bridge.send(InspectorCommand::Step)?; + let snap: StateSnapshot = bridge + .wait_for_frame_after(last_step, Duration::from_secs(5)) + .await?; + if let Some(f) = &snap.frame { + last_step = f.step; + } + if snap.finished.is_some() { + break; + } + } + Ok(json!({ "ok": true, "step": last_step })) +} + +async fn type_text_inner(bridge: &Bridge, args: TypeTextArgs) -> anyhow::Result { + // Optionally focus a target widget by clicking it first. + let focused_locator = if args.id.is_some() || args.role.is_some() || args.label_contains.is_some() { + let click_args = ClickArgs { + target: Target { + id: args.id.clone(), + role: args.role.clone(), + label_contains: args.label_contains.clone(), + pos: None, + }, + button: "primary".to_string(), + count: 1, + modifiers: PressKeyModifiers::default(), + }; + Some(click_inner(bridge, click_args).await?) + } else { + None + }; + + let mut last_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + + let mut chars_sent = 0u32; + for ch in args.text.chars() { + if ch.is_control() { + continue; + } + bridge.send(InspectorCommand::Handle { + events: vec![Event::Text(ch.to_string())], + })?; + let snap = bridge + .wait_for_frame_after(last_step, Duration::from_secs(5)) + .await?; + if let Some(f) = &snap.frame { + last_step = f.step; + } + chars_sent += 1; + } + + Ok(json!({ + "ok": true, + "chars_sent": chars_sent, + "focused": focused_locator, + "step": last_step, + })) +} + +async fn press_key_inner(bridge: &Bridge, args: PressKeyArgs) -> anyhow::Result { + let key = egui::Key::from_name(&args.key) + .ok_or_else(|| anyhow!("unknown key `{}`", args.key))?; + let modifiers = args.modifiers.to_egui(); + let prev_step = bridge + .state + .snapshot() + .await + .frame + .as_ref() + .map(|f| f.step) + .unwrap_or(0); + bridge.send(InspectorCommand::Handle { + events: vec![ + Event::Key { + key, + physical_key: None, + pressed: true, + repeat: false, + modifiers, + }, + Event::Key { + key, + physical_key: None, + pressed: false, + repeat: false, + modifiers, + }, + ], + })?; + let snap = bridge + .wait_for_frame_after(prev_step, Duration::from_secs(5)) + .await?; + Ok(json!({ + "ok": true, + "key": args.key, + "step": snap.frame.as_ref().map(|f| f.step), + })) +} diff --git a/crates/egui_kittest_mcp/src/tree.rs b/crates/egui_kittest_mcp/src/tree.rs new file mode 100644 index 000000000..0b7007621 --- /dev/null +++ b/crates/egui_kittest_mcp/src/tree.rs @@ -0,0 +1,206 @@ +//! Helpers that flatten the accesskit tree into MCP-friendly shapes. +//! +//! Note: `accesskit_consumer::NodeId` is a private composite (tree-index + local-id) and +//! can't be constructed from outside the crate. We project everything externally as the +//! original `accesskit::NodeId` (a `pub u64`), and look up by walking the tree. + +use accesskit_consumer::{Node, Tree}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, JsonSchema)] +pub struct NodeView { + /// Original `accesskit::NodeId` as a decimal string. Emitted as a string so the full + /// u64 round-trips through MCP clients whose JSON parsers go through `f64` (which + /// can't represent integers above 2^53 exactly). + pub id: String, + pub role: String, + pub label: Option, + pub value: Option, + pub bounds: Option, + pub focused: bool, + pub disabled: bool, + pub hidden: bool, + pub parent_id: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, JsonSchema)] +pub struct RectF { + pub x: f64, + pub y: f64, + pub w: f64, + pub h: f64, +} + +impl RectF { + fn from_rect(r: accesskit::Rect) -> Self { + Self { + x: r.x0, + y: r.y0, + w: r.x1 - r.x0, + h: r.y1 - r.y0, + } + } + + pub fn center(&self) -> (f64, f64) { + (self.x + self.w / 2.0, self.y + self.h / 2.0) + } +} + +#[derive(Debug, Clone, Deserialize, JsonSchema)] +pub struct QueryFilter { + pub role: Option, + pub label_contains: Option, + #[serde(default = "default_true")] + pub visible_only: bool, + #[serde(default = "default_limit")] + pub limit: usize, +} + +fn default_true() -> bool { + true +} + +fn default_limit() -> usize { + 200 +} + +impl Default for QueryFilter { + fn default() -> Self { + Self { + role: None, + label_contains: None, + visible_only: true, + limit: default_limit(), + } + } +} + +pub fn query(tree: &Tree, filter: &QueryFilter) -> Vec { + let root = tree.state().root(); + let mut out = Vec::new(); + walk(&root, filter, &mut out); + if out.len() > filter.limit { + out.truncate(filter.limit); + } + out +} + +fn walk(node: &Node<'_>, filter: &QueryFilter, out: &mut Vec) { + if matches(node, filter) { + out.push(node_view(node)); + } + for child in node.children() { + walk(&child, filter, out); + } +} + +fn matches(node: &Node<'_>, filter: &QueryFilter) -> bool { + if filter.visible_only && node.is_hidden() { + return false; + } + if let Some(role) = &filter.role + && !role.eq_ignore_ascii_case(&format!("{:?}", node.role())) + { + return false; + } + if let Some(needle) = &filter.label_contains { + let hay = node.label().unwrap_or_default(); + if !hay + .to_ascii_lowercase() + .contains(&needle.to_ascii_lowercase()) + { + return false; + } + } + true +} + +pub fn node_view(node: &Node<'_>) -> NodeView { + NodeView { + id: accesskit_id(node).to_string(), + role: format!("{:?}", node.role()), + label: node.label(), + value: node.value(), + bounds: node.bounding_box().map(RectF::from_rect), + focused: node.is_focused_in_tree(), + disabled: node.is_disabled(), + hidden: node.is_hidden(), + parent_id: node.parent().map(|p| accesskit_id(&p).to_string()), + } +} + +/// Project a consumer node to its original `accesskit::NodeId` as a `u64`. +fn accesskit_id(node: &Node<'_>) -> u64 { + let (local, _tree) = node.locate(); + local.0 +} + +#[derive(Debug, Clone, Deserialize, JsonSchema)] +#[serde(untagged)] +pub enum Locator { + Id { + /// Decimal string. Strings preserve the full u64 — JSON numbers above 2^53 lose + /// precision in clients whose parsers go through `f64`, so we don't accept them. + #[serde(deserialize_with = "deserialize_u64_from_string")] + id: u64, + }, + Match { + #[serde(default)] + role: Option, + #[serde(default)] + label_contains: Option, + }, +} + +fn deserialize_u64_from_string<'de, D>(d: D) -> Result +where + D: serde::Deserializer<'de>, +{ + use serde::de::Error as _; + let s = String::deserialize(d)?; + s.trim().parse::().map_err(D::Error::custom) +} + +pub fn resolve_node<'a>(tree: &'a Tree, locator: &Locator) -> Option> { + match locator { + Locator::Id { id } => find_by_id(&tree.state().root(), *id), + Locator::Match { + role, + label_contains, + } => { + let filter = QueryFilter { + role: role.clone(), + label_contains: label_contains.clone(), + visible_only: true, + limit: 1, + }; + let root = tree.state().root(); + first_match(&root, &filter) + } + } +} + +fn find_by_id<'a>(node: &Node<'a>, target: u64) -> Option> { + if accesskit_id(node) == target { + return Some(*node); + } + for child in node.children() { + if let Some(found) = find_by_id(&child, target) { + return Some(found); + } + } + None +} + +fn first_match<'a>(node: &Node<'a>, filter: &QueryFilter) -> Option> { + if matches(node, filter) { + return Some(*node); + } + for child in node.children() { + if let Some(found) = first_match(&child, filter) { + return Some(found); + } + } + None +} diff --git a/examples/egui_mcp/Cargo.toml b/examples/egui_mcp/Cargo.toml new file mode 100644 index 000000000..281c05490 --- /dev/null +++ b/examples/egui_mcp/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "egui_mcp" +version = "0.1.0" +authors = ["Lucas Meurer "] +license = "MIT OR Apache-2.0" +edition = "2024" +rust-version = "1.92" +publish = false + +[lints] +workspace = true + + +[dependencies] +egui = { workspace = true, features = ["default"] } +egui_demo_lib = { workspace = true, features = ["default"] } +egui_kittest = { workspace = true, features = ["wgpu", "inspector"] } diff --git a/examples/egui_mcp/src/main.rs b/examples/egui_mcp/src/main.rs new file mode 100644 index 000000000..7a0b18f70 --- /dev/null +++ b/examples/egui_mcp/src/main.rs @@ -0,0 +1,27 @@ +//! Headless `egui_demo_lib` target for the kittest MCP server. +//! +//! Build a [`Harness`] around [`DemoWindows`] and step forever. The +//! [`egui_kittest::InspectorPlugin`] auto-attaches whenever `KITTEST_INSPECTOR` is set +//! (the MCP server's `launch` tool sets it), drives the harness via stdio, and blocks +//! inside `after_step` until the agent requests the next frame. + +#![expect(rustdoc::missing_crate_level_docs)] + +use egui::Vec2; +use egui_demo_lib::DemoWindows; +use egui_kittest::Harness; + +fn main() { + let mut demo = DemoWindows::default(); + + let mut harness = Harness::builder() + .with_size(Vec2::new(1024.0, 768.0)) + .wgpu() + .build_ui(move |ui| { + demo.ui(ui); + }); + + loop { + harness.step(); + } +}