feat: add fast-path setup for nous account

adds a nous account specific fast flow & autolaunches into chat if gateway isn't set up
change: always run setup on no-config run
2026-04-24 00:07:23 -04:00 · 2026-04-24 00:06:48 -04:00 · 2026-04-23 19:40:43 -05:00 · 2026-04-23 19:38:33 -05:00 · 2026-04-23 19:35:18 -05:00 · 2026-04-23 19:32:21 -05:00
26 changed files with 628 additions and 3069 deletions
@@ -2821,6 +2821,7 @@ def _prompt_model_selection(
    pricing: Optional[Dict[str, Dict[str, str]]] = None,
    unavailable_models: Optional[List[str]] = None,
    portal_url: str = "",
+    allow_custom = True
 ) -> Optional[str]:
    """Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.

@@ -2909,8 +2910,16 @@ def _prompt_model_selection(
        from simple_term_menu import TerminalMenu

        choices = [f"  {_label(mid)}" for mid in ordered]
-        choices.append("  Enter custom model name")
-        choices.append("  Skip (keep current)")
+
+        custom_idx = None
+        if allow_custom:
+            custom_idx = len(choices)
+            choices.append("  Enter custom model name")
+
+        skip_idx = None
+        if current_model:
+            skip_idx = len(choices)
+            choices.append("  Skip (keep current)")

        # Print the unavailable block BEFORE the menu via regular print().
        # simple_term_menu pads title lines to terminal width (causes wrapping),
@@ -2947,21 +2956,29 @@ def _prompt_model_selection(
        print()
        if idx < len(ordered):
            return ordered[idx]
-        elif idx == len(ordered):
+        if idx == custom_idx:
            custom = input("Enter model name: ").strip()
            return custom if custom else None
+        if idx == skip_idx:
+            return None
        return None
    except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
        pass

    # Fallback: numbered list
    print(menu_title)
-    num_width = len(str(len(ordered) + 2))
+    n = len(ordered)
+    extra = []
+    if allow_custom:
+        extra.append("Enter custom model name")
+    if current_model:
+        extra.append("Skip (keep current)")
+    total = n + len(extra)
+    num_width = len(str(total))
    for i, mid in enumerate(ordered, 1):
        print(f"  {i:>{num_width}}. {_label(mid)}")
-    n = len(ordered)
-    print(f"  {n + 1:>{num_width}}. Enter custom model name")
-    print(f"  {n + 2:>{num_width}}. Skip (keep current)")
+    for j, label in enumerate(extra, n + 1):
+        print(f"  {j:>{num_width}}. {label}")

    if _unavailable:
        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
@@ -2973,18 +2990,19 @@ def _prompt_model_selection(

    while True:
        try:
-            choice = input(f"Choice [1-{n + 2}] (default: skip): ").strip()
+            choice = input(f"Choice [1-{total}]: ").strip()
            if not choice:
                return None
-            idx = int(choice)
-            if 1 <= idx <= n:
-                return ordered[idx - 1]
-            elif idx == n + 1:
-                custom = input("Enter model name: ").strip()
-                return custom if custom else None
-            elif idx == n + 2:
-                return None
-            print(f"Please enter 1-{n + 2}")
+            val = int(choice)
+            if 1 <= val <= n:
+                return ordered[val - 1]
+            extra_idx = val - n - 1
+            if 0 <= extra_idx < len(extra):
+                if extra[extra_idx] == "Enter custom model name":
+                    custom = input("Enter model name: ").strip()
+                    return custom if custom else None
+                return None  # skip
+            print(f"Please enter 1-{total}")
        except ValueError:
            print("Please enter a number")
        except (KeyboardInterrupt, EOFError):
@@ -3260,7 +3278,6 @@ def _nous_device_code_login(
        open_browser = False

    print(f"Starting Hermes login via {pconfig.name}...")
-    print(f"Portal: {portal_base_url}")
    if insecure:
        print("TLS verification: disabled (--insecure)")
    elif ca_bundle:
@@ -3280,19 +3297,18 @@ def _nous_device_code_login(
        interval = int(device_data["interval"])

        print()
-        print("To continue:")
-        print(f"  1. Open: {verification_url}")
-        print(f"  2. If prompted, enter code: {user_code}")
-
        if open_browser:
            opened = webbrowser.open(verification_url)
            if opened:
-                print("  (Opened browser for verification)")
+                print("If you don't see a browser window open, navigate to this URL:")
            else:
-                print("  Could not open browser automatically — use the URL above.")
+                print("Navigate to this URL to continue:")
+        print(verification_url)
+        print(f"If you're prompted for a code, use {user_code}")
+        print()

        effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
-        print(f"Waiting for approval (polling every {effective_interval}s)...")
+        print(f"Waiting for approval (checking every {effective_interval}s)...")

        token_data = _poll_for_token(
            client=client,
@@ -3357,7 +3373,7 @@ def _nous_device_code_login(
        raise


-def _login_nous(args, pconfig: ProviderConfig) -> None:
+def login_nous(args, pconfig: ProviderConfig) -> None:
    """Nous Portal device authorization flow."""
    timeout_seconds = getattr(args, "timeout", None) or 15.0
    insecure = bool(getattr(args, "insecure", False))
@@ -3419,7 +3435,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])

+            _portal = auth_state.get("portal_base_url", "")
+
            print()
+
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
@@ -3428,14 +3447,17 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
                        model_ids, pricing, free_tier=True,
                    )
-            _portal = auth_state.get("portal_base_url", "")
-            if model_ids:
-                print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
-                selected_model = _prompt_model_selection(
-                    model_ids, pricing=pricing,
-                    unavailable_models=unavailable_models,
-                    portal_url=_portal,
-                )
+                if not free_tier:
+                    print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
+                if len(model_ids) > 1:
+                    selected_model = _prompt_model_selection(
+                        model_ids, pricing=pricing,
+                        unavailable_models=unavailable_models,
+                        portal_url=_portal,
+                        allow_custom=not free_tier
+                    )
+                else:
+                    selected_model = model_ids[0]
            elif unavailable_models:
                _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
                print("No free models currently available.")
@@ -1085,9 +1085,6 @@ def cmd_chat(args):
        print(
            "It looks like Hermes isn't configured yet -- no API keys or providers found."
        )
-        print()
-        print("  Run:  hermes setup")
-        print()

        from hermes_cli.setup import (
            is_interactive_stdin,
@@ -1100,16 +1097,8 @@ def cmd_chat(args):
            )
            sys.exit(1)

-        try:
-            reply = input("Run setup now? [Y/n] ").strip().lower()
-        except (EOFError, KeyboardInterrupt):
-            reply = "n"
-        if reply in ("", "y", "yes"):
-            cmd_setup(args)
-            return
-        print()
-        print("You can run 'hermes setup' at any time to configure.")
-        sys.exit(1)
+        cmd_setup(args)
+        return

    # Start update check in background (runs while other init happens)
    try:
@@ -2135,7 +2124,7 @@ def _model_flow_nous(config, current_model="", args=None):
        resolve_nous_runtime_credentials,
        AuthError,
        format_auth_error,
-        _login_nous,
+        login_nous,
        PROVIDER_REGISTRY,
    )
    from hermes_cli.config import (
@@ -2148,8 +2137,6 @@ def _model_flow_nous(config, current_model="", args=None):

    state = get_provider_auth_state("nous")
    if not state or not state.get("access_token"):
-        print("Not logged into Nous Portal. Starting login...")
-        print()
        try:
            mock_args = argparse.Namespace(
                portal_url=getattr(args, "portal_url", None),
@@ -2161,7 +2148,7 @@ def _model_flow_nous(config, current_model="", args=None):
                ca_bundle=getattr(args, "ca_bundle", None),
                insecure=bool(getattr(args, "insecure", False)),
            )
-            _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
+            login_nous(mock_args, PROVIDER_REGISTRY["nous"])
            # Offer Tool Gateway enablement for paid subscribers
            try:
                _refreshed = load_config() or {}
@@ -2212,7 +2199,7 @@ def _model_flow_nous(config, current_model="", args=None):
                    ca_bundle=None,
                    insecure=False,
                )
-                _login_nous(mock_args, PROVIDER_REGISTRY["nous"])
+                login_nous(mock_args, PROVIDER_REGISTRY["nous"])
            except Exception as login_exc:
                print(f"Re-login failed: {login_exc}")
            return
@@ -18,9 +18,10 @@ import shutil
 import sys
 import copy
 from pathlib import Path
-from typing import Optional, Dict, Any
+from typing import Literal, Optional, Dict, Any

 from hermes_cli.nous_subscription import get_nous_subscription_features
+from hermes_cli.main import _model_flow_nous
 from tools.tool_backend_helpers import managed_nous_tools_enabled
 from utils import base_url_hostname
 from hermes_constants import get_optional_skills_dir
@@ -655,7 +656,7 @@ def _prompt_container_resources(config: dict):



-def setup_model_provider(config: dict, *, quick: bool = False):
+def setup_model_provider(config: dict, *, quick: bool | Literal["nous_portal"] = False):
    """Configure the inference provider and default model.

    Delegates to ``cmd_model()`` (the same flow used by ``hermes model``)
@@ -677,7 +678,11 @@ def setup_model_provider(config: dict, *, quick: bool = False):
    # credential prompting, model selection, and config persistence.
    from hermes_cli.main import select_provider_and_model
    try:
-        select_provider_and_model()
+        if quick == "nous_portal":
+            config = load_config()
+            _model_flow_nous(config)
+        else:
+            select_provider_and_model()
    except (SystemExit, KeyboardInterrupt):
        print()
        print_info("Provider setup skipped.")
@@ -3030,11 +3035,15 @@ def run_setup_wizard(args):
            config = load_config()

        setup_mode = prompt_choice("How would you like to set up Hermes?", [
-            "Quick setup — provider, model & messaging (recommended)",
+            "Nous Account setup — model & messaging (recommended)",
+            "Quick setup — provider, model & messaging",
            "Full setup — configure everything",
        ], 0)

        if setup_mode == 0:
+            _run_first_time_quick_setup(config, hermes_home, is_existing, nous_quick=True)
+            return
+        if setup_mode == 1:
            _run_first_time_quick_setup(config, hermes_home, is_existing)
            return

@@ -3095,7 +3104,7 @@ def _resolve_hermes_chat_argv() -> Optional[list[str]]:
    return None


-def _offer_launch_chat():
+def _offer_launch_chat(auto_launch = False):
    """Prompt the user to jump straight into chat after setup."""
    print()
    if not prompt_yes_no("Launch hermes chat now?", True):
@@ -3109,7 +3118,7 @@ def _offer_launch_chat():
    os.execvp(chat_argv[0], chat_argv)


-def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
+def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool, nous_quick=False):
    """Streamlined first-time setup: provider + model only.

    Applies sensible defaults for TTS (Edge), terminal (local), agent
@@ -3117,7 +3126,7 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
    ``hermes setup <section>``.
    """
    # Step 1: Model & Provider (essential — skips rotation/vision/TTS)
-    setup_model_provider(config, quick=True)
+    setup_model_provider(config, quick="nous_portal" if nous_quick else True )

    # Step 2: Apply defaults for everything else
    _apply_default_agent_settings(config)
@@ -3150,7 +3159,9 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):

    _print_setup_summary(config, hermes_home)

-    _offer_launch_chat()
+    # if the user hasn't set up the gateway, assume they want to launch chat.
+    force_launch_chat = gateway_choice == 0
+    _offer_launch_chat(force_launch_chat)


 def _run_quick_setup(config: dict, hermes_home):
@@ -49,7 +49,7 @@ from hermes_cli.config import (
 from gateway.status import get_running_pid, read_runtime_status

 try:
-    from fastapi import FastAPI, HTTPException, Request, WebSocket
+    from fastapi import FastAPI, HTTPException, Request
    from fastapi.middleware.cors import CORSMiddleware
    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
    from fastapi.staticfiles import StaticFiles
@@ -69,14 +69,8 @@ app = FastAPI(title="Hermes Agent", version=__version__)
 # Session token for protecting sensitive endpoints (reveal).
 # Generated fresh on every server start — dies when the process exits.
 # Injected into the SPA HTML so only the legitimate web UI can use it.
-#
-# Dev override: set HERMES_DASHBOARD_DEV_TOKEN to pin the token across
-# restarts so the Vite dev server (running on a different port than the
-# FastAPI backend) can inject the same value into its served index.html
-# and hit /api/* + /api/ws successfully. Not for production.
 # ---------------------------------------------------------------------------
-
-_SESSION_TOKEN = (os.environ.get("HERMES_DASHBOARD_DEV_TOKEN") or "").strip() or secrets.token_urlsafe(32)
+_SESSION_TOKEN = secrets.token_urlsafe(32)
 _SESSION_HEADER_NAME = "X-Hermes-Session-Token"

 # Simple rate limiter for the reveal endpoint
@@ -2793,34 +2787,6 @@ def _mount_plugin_api_routes():
            _log.warning("Failed to load plugin %s API routes: %s", plugin["name"], exc)


-# ---------------------------------------------------------------------------
-# tui_gateway WebSocket — wire-compatible with `python -m tui_gateway.entry`.
-#
-# Same newline-delimited JSON-RPC protocol the Ink TUI speaks over stdio,
-# exposed over WebSocket so browser / iOS / Android clients can drive the
-# exact same handlers with zero dispatcher duplication.
-#
-# Auth: client supplies the ephemeral session token via ``?token=`` query
-# parameter, matching the REST auth model. Must be validated before ``accept``
-# so unauthorised clients never see any traffic.
-# ---------------------------------------------------------------------------
-
-
-@app.websocket("/api/ws")
-async def _tui_gateway_websocket(ws: WebSocket):
-    """WebSocket entrypoint that replays stdio tui_gateway over a socket."""
-    token = ws.query_params.get("token", "")
-    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
-        await ws.close(code=4401)
-        return
-
-    # Imported lazily so this module can load in environments where
-    # tui_gateway isn't available (e.g. config-only tooling).
-    from tui_gateway.ws import handle_ws
-
-    await handle_ws(ws)
-
-
 # Mount plugin API routes before the SPA catch-all.
 _mount_plugin_api_routes()

@@ -571,7 +571,7 @@ def test_cmd_model_forwards_nous_login_tls_options(monkeypatch):
        captured["ca_bundle"] = login_args.ca_bundle
        captured["insecure"] = login_args.insecure

-    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login)
+    monkeypatch.setattr("hermes_cli.auth.login_nous", _fake_login)

    hermes_main.cmd_model(
        SimpleNamespace(
@@ -1,22 +1,28 @@
 """Regression tests for the TUI gateway's `complete.path` handler.

-Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
-with no colon yet) still surfaced files alongside directories in the
-TUI composer, because the gateway-side completion lives in
-`tui_gateway/server.py` and was never touched by the earlier fix to
-`hermes_cli/commands.py`.
+Reported during the TUI v2 blitz retest:
+  - typing `@folder:` (and `@folder` with no colon yet) surfaced files
+    alongside directories — the gateway-side completion lives in
+    `tui_gateway/server.py` and was never touched by the earlier fix to
+    `hermes_cli/commands.py`.
+  - typing `@appChrome` required the full `@ui-tui/src/components/app…`
+    path to find the file — users expect Cmd-P-style fuzzy basename
+    matching across the repo, not a strict directory prefix filter.

 Covers:
  - `@folder:` only yields directories
  - `@file:` only yields regular files
  - Bare `@folder` / `@file` (no colon) lists cwd directly
  - Explicit prefix is preserved in the completion text
+  - `@<name>` with no slash fuzzy-matches basenames anywhere in the tree
 """

 from __future__ import annotations

 from pathlib import Path

+import pytest
+
 from tui_gateway import server


@@ -33,6 +39,15 @@ def _items(word: str):
    return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]


+@pytest.fixture(autouse=True)
+def _reset_fuzzy_cache(monkeypatch):
+    # Each test walks a fresh tmp dir; clear the cached listing so prior
+    # roots can't leak through the TTL window.
+    server._fuzzy_cache.clear()
+    yield
+    server._fuzzy_cache.clear()
+
+
 def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
    monkeypatch.chdir(tmp_path)
    _fixture(tmp_path)
@@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):

    for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
        assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
+
+
+# ── Fuzzy basename matching ──────────────────────────────────────────────
+# Users shouldn't have to know the full path — typing `@appChrome` should
+# find `ui-tui/src/components/appChrome.tsx`.
+
+
+def _nested_fixture(tmp_path: Path):
+    (tmp_path / "readme.md").write_text("x")
+    (tmp_path / ".env").write_text("x")
+    (tmp_path / "ui-tui/src/components").mkdir(parents=True)
+    (tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x")
+    (tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x")
+    (tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x")
+    (tmp_path / "ui-tui/src/hooks").mkdir(parents=True)
+    (tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x")
+    (tmp_path / "tui_gateway").mkdir()
+    (tmp_path / "tui_gateway/server.py").write_text("x")
+
+
+def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch):
+    """`@appChrome` — with no slash — should surface the nested file."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    entries = _items("@appChrome")
+    texts = [t for t, _, _ in entries]
+
+    assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
+
+    # Display is the basename, meta is the containing directory, so the
+    # picker can show `appChrome.tsx  ui-tui/src/components` on one row.
+    row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx")
+    assert row[1] == "appChrome.tsx"
+    assert row[2] == "ui-tui/src/components"
+
+
+def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch):
+    """Better matches sort before weaker matches regardless of path depth."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+    (tmp_path / "server.py").write_text("x")  # exact basename match at root
+
+    texts = [t for t, _, _ in _items("@server")]
+
+    # Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both
+    # rank 1 on basename but exact basename wins on the sort key; shorter
+    # rel path breaks ties.
+    assert texts[0] == "@file:server.py", texts
+    assert "@file:tui_gateway/server.py" in texts
+
+
+def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch):
+    """Mid-basename camelCase pieces match without substring scanning."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@Chrome")]
+
+    # `Chrome` starts a camelCase word inside `appChrome.tsx`.
+    assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
+
+
+def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch):
+    """`@uCo` → `useCompletion.ts` via subsequence, last-resort tier."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@uCo")]
+
+    assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts
+
+
+def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch):
+    """Explicit `@file:` prefix still wins the completion tag."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@file:appChrome")]
+
+    assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
+
+
+def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch):
+    """Any `/` in the query = user is navigating; keep directory listing."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@ui-tui/src/components/app")]
+
+    # Directory-listing mode prefixes with `@file:` / `@folder:` per entry.
+    # It should only surface direct children of the named dir — not the
+    # nested `useCompletion.ts`.
+    assert any("appChrome.tsx" in t for t in texts), texts
+    assert not any("useCompletion.ts" in t for t in texts), texts
+
+
+def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch):
+    """`@folder:<name>` still lists directories — fuzzy scanner only walks
+    files (git-tracked + untracked), so defer to the dir-listing path."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    texts = [t for t, _, _ in _items("@folder:ui")]
+
+    # Root has `ui-tui/` as a directory; the listing branch should surface it.
+    assert any(t.startswith("@folder:ui-tui") for t in texts), texts
+
+
+def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch):
+    """`.env` doesn't leak into `@env` but does show for `@.env`."""
+    monkeypatch.chdir(tmp_path)
+    _nested_fixture(tmp_path)
+
+    assert not any(".env" in t for t, _, _ in _items("@env"))
+    assert any(t.endswith(".env") for t, _, _ in _items("@.env"))
+
+
+def test_fuzzy_caps_results(tmp_path, monkeypatch):
+    """The 30-item cap survives a big tree."""
+    monkeypatch.chdir(tmp_path)
+    for i in range(60):
+        (tmp_path / f"mod_{i:03d}.py").write_text("x")
+
+    items = _items("@mod")
+
+    assert len(items) == 30
+
+
+def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch):
+    """When the gateway runs from a subdirectory of a git repo, fuzzy
+    completion paths must resolve under that cwd — not under the repo root.
+
+    Without this, `@appChrome` from inside `apps/web/` would suggest
+    `@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would
+    look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every
+    `git ls-files` result back to a `relpath(root)` and drop anything
+    outside `root` so the completion contract stays "paths are cwd-relative".
+    """
+    import subprocess
+
+    subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
+    subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True)
+    subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True)
+
+    (tmp_path / "apps" / "web" / "src").mkdir(parents=True)
+    (tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x")
+    (tmp_path / "apps" / "api" / "src").mkdir(parents=True)
+    (tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x")
+    (tmp_path / "README.md").write_text("x")
+
+    subprocess.run(["git", "add", "."], cwd=tmp_path, check=True)
+    subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True)
+
+    # Run from `apps/web/` — completions should be relative to here, and
+    # files outside this subtree (apps/api, README.md at root) shouldn't
+    # appear at all.
+    monkeypatch.chdir(tmp_path / "apps" / "web")
+
+    texts = [t for t, _, _ in _items("@appChrome")]
+
+    assert "@file:src/appChrome.tsx" in texts, texts
+    assert not any("apps/web/" in t for t in texts), texts
+
+    server._fuzzy_cache.clear()
+    other_texts = [t for t, _, _ in _items("@server")]
+
+    assert not any("server.ts" in t for t in other_texts), other_texts
+
+    server._fuzzy_cache.clear()
+    readme_texts = [t for t, _, _ in _items("@README")]
+
+    assert not any("README.md" in t for t in readme_texts), readme_texts
@@ -1677,454 +1677,3 @@ class TestDashboardPluginManifestExtensions:
        plugins = web_server._get_dashboard_plugins(force_rescan=True)
        entry = next(p for p in plugins if p["name"] == "mixed-slots")
        assert entry["slots"] == ["sidebar", "header-right"]
-
-
-# ---------------------------------------------------------------------------
-# /api/ws — WebSocket wire-compatible with stdio tui_gateway
-# ---------------------------------------------------------------------------
-
-
-class TestTuiGatewayWebSocket:
-    """E2E tests for /api/ws.
-
-    The WS endpoint multiplexes the same JSON-RPC protocol Ink speaks over
-    stdio onto a browser/iOS-friendly socket. These tests exercise the
-    transport boundary without booting a real AIAgent — handlers are
-    monkey-patched in for deterministic byte-level assertions.
-    """
-
-    @pytest.fixture(autouse=True)
-    def _setup(self):
-        try:
-            from starlette.testclient import TestClient
-        except ImportError:
-            pytest.skip("fastapi/starlette not installed")
-        from hermes_cli.web_server import app, _SESSION_TOKEN
-        self.client = TestClient(app)
-        self.token = _SESSION_TOKEN
-
-    def _url(self, token=None):
-        tok = self.token if token is None else token
-        return f"/api/ws?token={tok}" if tok else "/api/ws"
-
-    def _drain_ready(self, ws):
-        """Skip the ``gateway.ready`` event emitted on accept."""
-        frame = ws.receive_json()
-        assert frame.get("method") == "event"
-        assert frame["params"]["type"] == "gateway.ready"
-        return frame
-
-    def test_handshake_emits_gateway_ready(self):
-        with self.client.websocket_connect(self._url()) as ws:
-            first = ws.receive_json()
-            assert first["jsonrpc"] == "2.0"
-            assert first["method"] == "event"
-            assert first["params"]["type"] == "gateway.ready"
-            assert "skin" in first["params"]["payload"]
-
-    def test_rejects_missing_token(self):
-        from starlette.websockets import WebSocketDisconnect
-        with pytest.raises(WebSocketDisconnect):
-            with self.client.websocket_connect(self._url(token="")) as ws:
-                ws.receive_json()
-
-    def test_rejects_bad_token(self):
-        from starlette.websockets import WebSocketDisconnect
-        with pytest.raises(WebSocketDisconnect):
-            with self.client.websocket_connect(self._url(token="bogus-token-xyz")) as ws:
-                ws.receive_json()
-
-    def test_parse_error_on_bad_frame(self):
-        with self.client.websocket_connect(self._url()) as ws:
-            self._drain_ready(ws)
-            ws.send_text("this is { not json")
-            resp = ws.receive_json()
-            assert resp["jsonrpc"] == "2.0"
-            assert resp["error"]["code"] == -32700
-            assert resp["error"]["message"] == "parse error"
-
-    def test_unknown_method_returns_rpc_error(self):
-        with self.client.websocket_connect(self._url()) as ws:
-            self._drain_ready(ws)
-            ws.send_json({"jsonrpc": "2.0", "id": "u1", "method": "does.not.exist"})
-            resp = ws.receive_json()
-            assert resp["id"] == "u1"
-            assert resp["error"]["code"] == -32601
-            assert "does.not.exist" in resp["error"]["message"]
-
-    def test_inline_handler_returns_response(self):
-        """An inline handler's result round-trips via the WS transport."""
-        from tui_gateway import server
-
-        sentinel = "_ws_inline_test"
-        server._methods[sentinel] = lambda rid, params: server._ok(rid, {"pong": params.get("ping")})
-        try:
-            with self.client.websocket_connect(self._url()) as ws:
-                self._drain_ready(ws)
-                ws.send_json({"jsonrpc": "2.0", "id": "i1", "method": sentinel, "params": {"ping": "PONG"}})
-                resp = ws.receive_json()
-                assert resp == {"jsonrpc": "2.0", "id": "i1", "result": {"pong": "PONG"}}
-        finally:
-            server._methods.pop(sentinel, None)
-
-    def test_pool_handler_response_arrives_via_ws(self):
-        """Long-handler responses written from the thread pool must reach the WS client."""
-        from tui_gateway import server
-
-        # Register a "slash.exec" replacement so we exercise the pool path
-        # (_LONG_HANDLERS includes "slash.exec").
-        original = server._methods.get("slash.exec")
-        server._methods["slash.exec"] = lambda rid, params: server._ok(rid, {"output": "async-ok"})
-        try:
-            with self.client.websocket_connect(self._url()) as ws:
-                self._drain_ready(ws)
-                ws.send_json({"jsonrpc": "2.0", "id": "p1", "method": "slash.exec", "params": {}})
-                resp = ws.receive_json()
-                assert resp["id"] == "p1"
-                assert resp["result"] == {"output": "async-ok"}
-        finally:
-            if original is not None:
-                server._methods["slash.exec"] = original
-            else:
-                server._methods.pop("slash.exec", None)
-
-    def test_session_events_route_to_owning_ws(self):
-        """Events emitted for a session created over WS land on that WS."""
-        from tui_gateway import server
-        from tui_gateway.transport import current_transport
-
-        sentinel_create = "_ws_emit_test_create"
-        sentinel_emit = "_ws_emit_test_fire"
-        created_sid = {"value": ""}
-
-        def create(rid, params):
-            sid = f"ws-emit-test-{uuid_hex()}"
-            created_sid["value"] = sid
-            server._sessions[sid] = {
-                "session_key": sid,
-                "transport": current_transport(),
-            }
-            return server._ok(rid, {"session_id": sid})
-
-        def fire(rid, params):
-            sid = params["session_id"]
-            server._emit("demo.event", sid, {"n": params.get("n", 0)})
-            return server._ok(rid, {"ok": True})
-
-        def uuid_hex():
-            import uuid
-            return uuid.uuid4().hex[:8]
-
-        server._methods[sentinel_create] = create
-        server._methods[sentinel_emit] = fire
-        try:
-            with self.client.websocket_connect(self._url()) as ws:
-                self._drain_ready(ws)
-
-                ws.send_json({"jsonrpc": "2.0", "id": "c1", "method": sentinel_create})
-                create_resp = ws.receive_json()
-                assert create_resp["id"] == "c1"
-                sid = create_resp["result"]["session_id"]
-                assert sid == created_sid["value"]
-
-                ws.send_json({
-                    "jsonrpc": "2.0",
-                    "id": "e1",
-                    "method": sentinel_emit,
-                    "params": {"session_id": sid, "n": 7},
-                })
-                # Event fires synchronously inside the handler, so it should
-                # arrive before the response.
-                frame1 = ws.receive_json()
-                frame2 = ws.receive_json()
-
-                event_frame = frame1 if frame1.get("method") == "event" else frame2
-                resp_frame = frame2 if frame2.get("id") == "e1" else frame1
-
-                assert event_frame["params"]["type"] == "demo.event"
-                assert event_frame["params"]["session_id"] == sid
-                assert event_frame["params"]["payload"] == {"n": 7}
-                assert resp_frame["result"] == {"ok": True}
-        finally:
-            server._methods.pop(sentinel_create, None)
-            server._methods.pop(sentinel_emit, None)
-            server._sessions.pop(created_sid["value"], None)
-
-    def test_ws_disconnect_resets_session_transport(self):
-        """After a WS hangs up, sessions it owned fall back to stdio so stray emits don't crash."""
-        from tui_gateway import server
-        from tui_gateway.transport import current_transport
-
-        sentinel = "_ws_disconnect_test"
-        captured = {"sid": "", "transport": None}
-
-        def create(rid, params):
-            sid = "ws-disconnect-sid"
-            captured["sid"] = sid
-            captured["transport"] = current_transport()
-            server._sessions[sid] = {
-                "session_key": sid,
-                "transport": captured["transport"],
-            }
-            return server._ok(rid, {"session_id": sid})
-
-        server._methods[sentinel] = create
-        try:
-            with self.client.websocket_connect(self._url()) as ws:
-                self._drain_ready(ws)
-                ws.send_json({"jsonrpc": "2.0", "id": "c1", "method": sentinel})
-                ws.receive_json()
-
-            # Give the server a moment to run the finally-block cleanup.
-            import time
-            for _ in range(50):
-                if server._sessions.get(captured["sid"], {}).get("transport") is not captured["transport"]:
-                    break
-                time.sleep(0.02)
-
-            sess = server._sessions.get(captured["sid"])
-            assert sess is not None
-            assert sess["transport"] is server._stdio_transport
-        finally:
-            server._methods.pop(sentinel, None)
-            server._sessions.pop(captured["sid"], None)
-
-
-# ---------------------------------------------------------------------------
-# Transport parity — same RPC, stdio vs WS, byte-identical envelopes
-# ---------------------------------------------------------------------------
-
-
-class TestTuiGatewayTransportParity:
-    """The whole point of the transport abstraction is that handlers don't
-    know what's on the other end. These tests lock that in: the response
-    envelope produced by ``server.handle_request`` directly (stdio fast path)
-    must match what a WS client receives for the same request.
-    """
-
-    @pytest.fixture(autouse=True)
-    def _setup(self):
-        try:
-            from starlette.testclient import TestClient
-        except ImportError:
-            pytest.skip("fastapi/starlette not installed")
-        from hermes_cli.web_server import app, _SESSION_TOKEN
-        self.client = TestClient(app)
-        self.token = _SESSION_TOKEN
-
-    def _ws_roundtrip(self, req: dict) -> dict:
-        with self.client.websocket_connect(f"/api/ws?token={self.token}") as ws:
-            ready = ws.receive_json()
-            assert ready["params"]["type"] == "gateway.ready"
-            ws.send_json(req)
-            return ws.receive_json()
-
-    def test_parity_unknown_method(self):
-        from tui_gateway import server
-        req = {"jsonrpc": "2.0", "id": "p-unk", "method": "no.such.method"}
-        assert self._ws_roundtrip(req) == server.handle_request(req)
-
-    def test_parity_inline_handler(self):
-        from tui_gateway import server
-
-        sentinel = "_parity_inline"
-        server._methods[sentinel] = lambda rid, params: server._ok(rid, {
-            "echo": params,
-            "const": 42,
-            "nested": {"a": [1, 2, 3], "b": None},
-        })
-        try:
-            req = {
-                "jsonrpc": "2.0",
-                "id": "p-inline",
-                "method": sentinel,
-                "params": {"hello": "world", "n": 1},
-            }
-            assert self._ws_roundtrip(req) == server.handle_request(req)
-        finally:
-            server._methods.pop(sentinel, None)
-
-    def test_parity_error_envelope(self):
-        from tui_gateway import server
-
-        sentinel = "_parity_err"
-        server._methods[sentinel] = lambda rid, params: server._err(rid, 4242, "nope")
-        try:
-            req = {"jsonrpc": "2.0", "id": "p-err", "method": sentinel}
-            assert self._ws_roundtrip(req) == server.handle_request(req)
-        finally:
-            server._methods.pop(sentinel, None)
-
-    def test_parity_stdio_transport_also_works(self):
-        """Calling dispatch() with the stdio transport explicitly must match the default."""
-        from tui_gateway import server
-
-        sentinel = "_parity_stdio"
-        server._methods[sentinel] = lambda rid, params: server._ok(rid, {"ok": True, "p": params})
-        try:
-            req = {"jsonrpc": "2.0", "id": "p-std", "method": sentinel, "params": {"x": 1}}
-            # Default (no transport arg)
-            default_resp = server.dispatch(dict(req))
-            # Explicit stdio transport
-            explicit_resp = server.dispatch(dict(req), server._stdio_transport)
-            assert default_resp == explicit_resp
-            assert default_resp["result"] == {"ok": True, "p": {"x": 1}}
-        finally:
-            server._methods.pop(sentinel, None)
-
-
-# ---------------------------------------------------------------------------
-# E2E: drive the "Ink --tui" JSON-RPC surface over ANY transport
-# ---------------------------------------------------------------------------
-
-
-class TestTuiGatewayE2EAnyPort:
-    """Scripted multi-message conversations that exercise the real dispatcher.
-
-    The same scripted sequence runs over (a) direct ``handle_request`` calls
-    and (b) a live WebSocket. Both must produce the same response envelopes
-    in the same order. This is the "hermes --tui in any port" check.
-    """
-
-    @pytest.fixture(autouse=True)
-    def _setup(self):
-        try:
-            from starlette.testclient import TestClient
-        except ImportError:
-            pytest.skip("fastapi/starlette not installed")
-        from hermes_cli.web_server import app, _SESSION_TOKEN
-        self.client = TestClient(app)
-        self.token = _SESSION_TOKEN
-
-    def _install_scripted_methods(self):
-        """Install a tiny surface that mimics what Ink exercises on startup:
-
-        - commands.ping         returns a deterministic pong
-        - session.sim_create    creates a fake session (no real agent)
-        - session.sim_close     tears down the session
-        - config.sim_get_value  reads a key
-        """
-        from tui_gateway import server
-        from tui_gateway.transport import current_transport
-
-        added = []
-
-        def ping(rid, params):
-            return server._ok(rid, {"pong": True, "id": rid})
-        server._methods["commands.ping"] = ping
-        added.append("commands.ping")
-
-        def sim_create(rid, params):
-            import uuid
-            sid = f"sim-{uuid.uuid4().hex[:6]}"
-            server._sessions[sid] = {
-                "session_key": sid,
-                "transport": current_transport(),
-                "agent": None,
-            }
-            return server._ok(rid, {"session_id": sid})
-        server._methods["session.sim_create"] = sim_create
-        added.append("session.sim_create")
-
-        def sim_close(rid, params):
-            sid = params.get("session_id", "")
-            removed = server._sessions.pop(sid, None) is not None
-            return server._ok(rid, {"closed": removed})
-        server._methods["session.sim_close"] = sim_close
-        added.append("session.sim_close")
-
-        def sim_get_value(rid, params):
-            return server._ok(rid, {"value": "deterministic", "key": params.get("key", "")})
-        server._methods["config.sim_get_value"] = sim_get_value
-        added.append("config.sim_get_value")
-
-        return added
-
-    def _uninstall(self, added):
-        from tui_gateway import server
-        for name in added:
-            server._methods.pop(name, None)
-
-    def _script(self):
-        return [
-            {"jsonrpc": "2.0", "id": "s1", "method": "commands.ping"},
-            {"jsonrpc": "2.0", "id": "s2", "method": "session.sim_create"},
-            {"jsonrpc": "2.0", "id": "s3", "method": "config.sim_get_value",
-             "params": {"key": "display.skin"}},
-        ]
-
-    def test_script_over_direct_and_ws_match(self):
-        from tui_gateway import server
-
-        added = self._install_scripted_methods()
-        try:
-            script = self._script()
-
-            # Run over direct dispatch
-            direct_resps = [server.handle_request(dict(req)) for req in script]
-            # Clean up the session.create we just made so we don't leak into
-            # the WS run.
-            for r in direct_resps:
-                sid = (r.get("result") or {}).get("session_id")
-                if sid:
-                    server._sessions.pop(sid, None)
-
-            # Run over WS
-            with self.client.websocket_connect(f"/api/ws?token={self.token}") as ws:
-                ready = ws.receive_json()
-                assert ready["params"]["type"] == "gateway.ready"
-
-                ws_resps = []
-                for req in script:
-                    ws.send_json(req)
-                    ws_resps.append(ws.receive_json())
-
-            # Result shapes (stripping session-identity fields) should match.
-            def normalize(r):
-                r = dict(r)
-                if "result" in r and isinstance(r["result"], dict):
-                    result = dict(r["result"])
-                    # session ids are random — compare only structure
-                    if "session_id" in result:
-                        result["session_id"] = "<random>"
-                    r["result"] = result
-                return r
-
-            assert [normalize(r) for r in direct_resps] == [normalize(r) for r in ws_resps]
-
-            # And both surfaces ACTUALLY executed their handlers.
-            assert all("result" in r for r in ws_resps)
-            assert ws_resps[0]["result"]["pong"] is True
-            assert ws_resps[2]["result"]["value"] == "deterministic"
-        finally:
-            # Clean up any sessions created during the WS run.
-            for sid in [
-                sid for sid, sess in list(server._sessions.items()) if sid.startswith("sim-")
-            ]:
-                server._sessions.pop(sid, None)
-            self._uninstall(added)
-
-    def test_session_lifecycle_over_ws(self):
-        """Open a session, then close it — via WS only."""
-        from tui_gateway import server
-
-        added = self._install_scripted_methods()
-        try:
-            with self.client.websocket_connect(f"/api/ws?token={self.token}") as ws:
-                ready = ws.receive_json()
-                assert ready["params"]["type"] == "gateway.ready"
-
-                ws.send_json({"jsonrpc": "2.0", "id": "c1", "method": "session.sim_create"})
-                create = ws.receive_json()
-                sid = create["result"]["session_id"]
-                assert sid in server._sessions
-
-                ws.send_json({
-                    "jsonrpc": "2.0", "id": "x1", "method": "session.sim_close",
-                    "params": {"session_id": sid},
-                })
-                close = ws.receive_json()
-                assert close["result"] == {"closed": True}
-                assert sid not in server._sessions
-        finally:
-            self._uninstall(added)
@@ -1,6 +1,5 @@
 import atexit
 import concurrent.futures
-import contextvars
 import copy
 import json
 import logging
@@ -13,17 +12,9 @@ import time
 import uuid
 from datetime import datetime
 from pathlib import Path
-from typing import Optional

 from hermes_constants import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
-from tui_gateway.transport import (
-    StdioTransport,
-    Transport,
-    bind_transport,
-    current_transport,
-    reset_transport,
-)

 logger = logging.getLogger(__name__)

@@ -156,12 +147,6 @@ atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))
 _real_stdout = sys.stdout
 sys.stdout = sys.stderr

-# Module-level stdio transport used as the fallback sink when no transport is
-# bound via contextvar or session. The stream is resolved through a lambda so
-# runtime monkey-patches of `_real_stdout` (used extensively in tests) still
-# land in the right place.
-_stdio_transport = StdioTransport(lambda: _real_stdout, _stdout_lock)
-

 class _SlashWorker:
    """Persistent HermesCLI subprocess for slash commands."""
@@ -281,24 +266,14 @@ def _db_unavailable_error(rid, *, code: int):


 def write_json(obj: dict) -> bool:
-    """Emit one JSON frame. Routes via the most-specific transport available.
-
-    Precedence:
-
-    1. Event frames with a session id → the transport stored on that session,
-       so async events land with the client that owns the session even if
-       the emitting thread has no contextvar binding.
-    2. Otherwise the transport bound on the current context (set by
-       :func:`dispatch` for the lifetime of a request).
-    3. Otherwise the module-level stdio transport, matching the historical
-       behaviour and keeping tests that monkey-patch ``_real_stdout`` green.
-    """
-    if obj.get("method") == "event":
-        sid = ((obj.get("params") or {}).get("session_id")) or ""
-        if sid and (t := (_sessions.get(sid) or {}).get("transport")) is not None:
-            return t.write(obj)
-
-    return (current_transport() or _stdio_transport).write(obj)
+    line = json.dumps(obj, ensure_ascii=False) + "\n"
+    try:
+        with _stdout_lock:
+            _real_stdout.write(line)
+            _real_stdout.flush()
+        return True
+    except BrokenPipeError:
+        return False


 def _emit(event: str, sid: str, payload: dict | None = None):
@@ -368,39 +343,27 @@ def handle_request(req: dict) -> dict | None:
    return fn(req.get("id"), req.get("params", {}))


-def dispatch(req: dict, transport: Optional[Transport] = None) -> dict | None:
+def dispatch(req: dict) -> dict | None:
    """Route inbound RPCs — long handlers to the pool, everything else inline.

    Returns a response dict when handled inline. Returns None when the
    handler was scheduled on the pool; the worker writes its own
-    response via the bound transport when done.
-
-    *transport* (optional): pins every write produced by this request —
-    including any events emitted by the handler — to the given transport.
-    When omitted, writes fall back to the module-level stdio transport,
-    preserving the original behaviour for ``tui_gateway.entry``.
+    response via write_json when done.
    """
-    t = transport or _stdio_transport
-    token = bind_transport(t)
-    try:
-        if req.get("method") not in _LONG_HANDLERS:
-            return handle_request(req)
+    if req.get("method") not in _LONG_HANDLERS:
+        return handle_request(req)

-        # Snapshot the context so the pool worker sees the bound transport.
-        ctx = contextvars.copy_context()
+    def run():
+        try:
+            resp = handle_request(req)
+        except Exception as exc:
+            resp = _err(req.get("id"), -32000, f"handler error: {exc}")
+        if resp is not None:
+            write_json(resp)

-        def run():
-            try:
-                resp = handle_request(req)
-            except Exception as exc:
-                resp = _err(req.get("id"), -32000, f"handler error: {exc}")
-            if resp is not None:
-                t.write(resp)
+    _pool.submit(run)

-        _pool.submit(lambda: ctx.run(run))
-        return None
-    finally:
-        reset_transport(token)
+    return None


 def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
@@ -1293,7 +1256,6 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80):
        "tool_progress_mode": _load_tool_progress_mode(),
        "edit_snapshots": {},
        "tool_started_at": {},
-        "transport": current_transport() or _stdio_transport,
    }
    try:
        _sessions[sid]["slash_worker"] = _SlashWorker(
@@ -1436,7 +1398,6 @@ def _(rid, params: dict) -> dict:
        "slash_worker": None,
        "tool_progress_mode": _load_tool_progress_mode(),
        "tool_started_at": {},
-        "transport": current_transport() or _stdio_transport,
    }

    def _build() -> None:
@@ -3295,6 +3256,162 @@ def _(rid, params: dict) -> dict:

 # ── Methods: complete ─────────────────────────────────────────────────

+_FUZZY_CACHE_TTL_S = 5.0
+_FUZZY_CACHE_MAX_FILES = 20000
+_FUZZY_FALLBACK_EXCLUDES = frozenset(
+    {
+        ".git",
+        ".hg",
+        ".svn",
+        ".next",
+        ".cache",
+        ".venv",
+        "venv",
+        "node_modules",
+        "__pycache__",
+        "dist",
+        "build",
+        "target",
+        ".mypy_cache",
+        ".pytest_cache",
+        ".ruff_cache",
+    }
+)
+_fuzzy_cache_lock = threading.Lock()
+_fuzzy_cache: dict[str, tuple[float, list[str]]] = {}
+
+
+def _list_repo_files(root: str) -> list[str]:
+    """Return file paths relative to ``root``.
+
+    Uses ``git ls-files`` from the repo top (resolved via
+    ``rev-parse --show-toplevel``) so the listing covers tracked + untracked
+    files anywhere in the repo, then converts each path back to be relative
+    to ``root``. Files outside ``root`` (parent directories of cwd, sibling
+    subtrees) are excluded so the picker stays scoped to what's reachable
+    from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when
+    ``root`` isn't inside a git repo. Result cached per-root for
+    ``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes.
+    """
+    now = time.monotonic()
+    with _fuzzy_cache_lock:
+        cached = _fuzzy_cache.get(root)
+        if cached and now - cached[0] < _FUZZY_CACHE_TTL_S:
+            return cached[1]
+
+    files: list[str] = []
+    try:
+        top_result = subprocess.run(
+            ["git", "-C", root, "rev-parse", "--show-toplevel"],
+            capture_output=True,
+            timeout=2.0,
+            check=False,
+        )
+        if top_result.returncode == 0:
+            top = top_result.stdout.decode("utf-8", "replace").strip()
+            list_result = subprocess.run(
+                ["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"],
+                capture_output=True,
+                timeout=2.0,
+                check=False,
+            )
+            if list_result.returncode == 0:
+                for p in list_result.stdout.decode("utf-8", "replace").split("\0"):
+                    if not p:
+                        continue
+                    rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/")
+                    # Skip parents/siblings of cwd — keep the picker scoped
+                    # to root-and-below, matching Cmd-P workspace semantics.
+                    if rel.startswith("../"):
+                        continue
+                    files.append(rel)
+                    if len(files) >= _FUZZY_CACHE_MAX_FILES:
+                        break
+    except (OSError, subprocess.TimeoutExpired):
+        pass
+
+    if not files:
+        # Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays
+        # tractable. Dotfiles themselves survive — the ranker decides based
+        # on whether the query starts with `.`.
+        try:
+            for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
+                dirnames[:] = [
+                    d
+                    for d in dirnames
+                    if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".")
+                ]
+                rel_dir = os.path.relpath(dirpath, root)
+                for f in filenames:
+                    rel = f if rel_dir == "." else f"{rel_dir}/{f}"
+                    files.append(rel.replace(os.sep, "/"))
+                    if len(files) >= _FUZZY_CACHE_MAX_FILES:
+                        break
+                if len(files) >= _FUZZY_CACHE_MAX_FILES:
+                    break
+        except OSError:
+            pass
+
+    with _fuzzy_cache_lock:
+        _fuzzy_cache[root] = (now, files)
+
+    return files
+
+
+def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None:
+    """Rank ``name`` against ``query``; lower is better. Returns None to reject.
+
+    Tiers (kind):
+      0 — exact basename
+      1 — basename prefix (e.g. `app` → `appChrome.tsx`)
+      2 — word-boundary / camelCase hit (e.g. `chrome` → `appChrome.tsx`)
+      3 — substring anywhere in basename
+      4 — subsequence match (every query char appears in order)
+
+    Secondary key is `len(name)` so shorter names win ties.
+    """
+    if not query:
+        return (3, len(name))
+
+    nl = name.lower()
+    ql = query.lower()
+
+    if nl == ql:
+        return (0, len(name))
+
+    if nl.startswith(ql):
+        return (1, len(name))
+
+    # Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"].
+    # camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation;
+    # falls through to substring/subsequence if it misses.
+    parts: list[str] = []
+    buf = ""
+    for ch in name:
+        if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()):
+            if buf:
+                parts.append(buf)
+            buf = ch if ch not in "-_." else ""
+        else:
+            buf += ch
+    if buf:
+        parts.append(buf)
+    for p in parts:
+        if p.lower().startswith(ql):
+            return (2, len(name))
+
+    if ql in nl:
+        return (3, len(name))
+
+    i = 0
+    for ch in nl:
+        if ch == ql[i]:
+            i += 1
+            if i == len(ql):
+                return (4, len(name))
+
+    return None
+

@method("complete.path")
 def _(rid, params: dict) -> dict:
@@ -3330,6 +3447,42 @@ def _(rid, params: dict) -> dict:
            prefix_tag = ""
            path_part = query if is_context else query

+        # Fuzzy basename search across the repo when the user types a bare
+        # name with no path separator — `@appChrome` surfaces every file
+        # whose basename matches, regardless of directory depth. Matches what
+        # editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with
+        # `/`, `./`, `~/`, `/abs`) fall through to the directory-listing
+        # path so explicit navigation intent is preserved.
+        if (
+            is_context
+            and path_part
+            and "/" not in path_part
+            and prefix_tag != "folder"
+        ):
+            root = os.getcwd()
+            ranked: list[tuple[tuple[int, int], str, str]] = []
+            for rel in _list_repo_files(root):
+                basename = os.path.basename(rel)
+                if basename.startswith(".") and not path_part.startswith("."):
+                    continue
+                rank = _fuzzy_basename_rank(basename, path_part)
+                if rank is None:
+                    continue
+                ranked.append((rank, rel, basename))
+
+            ranked.sort(key=lambda r: (r[0], len(r[1]), r[1]))
+            tag = prefix_tag or "file"
+            for _, rel, basename in ranked[:30]:
+                items.append(
+                    {
+                        "text": f"@{tag}:{rel}",
+                        "display": basename,
+                        "meta": os.path.dirname(rel),
+                    }
+                )
+
+            return _ok(rid, {"items": items})
+
        expanded = _normalize_completion_path(path_part) if path_part else "."
        if expanded == "." or not expanded:
            search_dir, match = ".", ""
@@ -1,91 +0,0 @@
-"""Transport abstraction for the tui_gateway JSON-RPC server.
-
-Historically the gateway wrote every JSON frame directly to real stdout.  This
-module decouples the I/O sink from the handler logic so the same dispatcher
-can be driven over stdio (``tui_gateway.entry``) or WebSocket
-(``tui_gateway.ws``) without duplicating code.
-
-A :class:`Transport` is anything that can accept a JSON-serialisable dict and
-forward it to its peer.  The active transport for the current request is
-tracked in a :class:`contextvars.ContextVar` so handlers — including those
-dispatched onto the worker pool — route their writes to the right peer.
-
-Backward compatibility
----------------------
-``tui_gateway.server.write_json`` still works without any transport bound.
-When nothing is on the contextvar and no session-level transport is found,
-it falls back to the module-level :class:`StdioTransport`, which wraps the
-original ``_real_stdout`` + ``_stdout_lock`` pair.  Tests that monkey-patch
-``server._real_stdout`` continue to work because the stdio transport resolves
-the stream lazily through a callback.
-"""
-
-from __future__ import annotations
-
-import contextvars
-import json
-import threading
-from typing import Any, Callable, Optional, Protocol, runtime_checkable
-
-
-@runtime_checkable
-class Transport(Protocol):
-    """Minimal interface every transport implements."""
-
-    def write(self, obj: dict) -> bool:
-        """Emit one JSON frame. Return ``False`` when the peer is gone."""
-
-    def close(self) -> None:
-        """Release any resources owned by this transport."""
-
-
-_current_transport: contextvars.ContextVar[Optional[Transport]] = (
-    contextvars.ContextVar(
-        "hermes_gateway_transport",
-        default=None,
-    )
-)
-
-
-def current_transport() -> Optional[Transport]:
-    """Return the transport bound for the current request, if any."""
-    return _current_transport.get()
-
-
-def bind_transport(transport: Optional[Transport]):
-    """Bind *transport* for the current context. Returns a token for :func:`reset_transport`."""
-    return _current_transport.set(transport)
-
-
-def reset_transport(token) -> None:
-    """Restore the transport binding captured by :func:`bind_transport`."""
-    _current_transport.reset(token)
-
-
-class StdioTransport:
-    """Writes JSON frames to a stream (usually ``sys.stdout``).
-
-    The stream is resolved via a callable so runtime monkey-patches of the
-    underlying stream continue to work — this preserves the behaviour the
-    existing test suite relies on (``monkeypatch.setattr(server, "_real_stdout", ...)``).
-    """
-
-    __slots__ = ("_stream_getter", "_lock")
-
-    def __init__(self, stream_getter: Callable[[], Any], lock: threading.Lock) -> None:
-        self._stream_getter = stream_getter
-        self._lock = lock
-
-    def write(self, obj: dict) -> bool:
-        line = json.dumps(obj, ensure_ascii=False) + "\n"
-        try:
-            with self._lock:
-                stream = self._stream_getter()
-                stream.write(line)
-                stream.flush()
-            return True
-        except BrokenPipeError:
-            return False
-
-    def close(self) -> None:
-        return None
@@ -1,174 +0,0 @@
-"""WebSocket transport for the tui_gateway JSON-RPC server.
-
-Reuses :func:`tui_gateway.server.dispatch` verbatim so every RPC method, every
-slash command, every approval/clarify/sudo flow, and every agent event flows
-through the same handlers whether the client is Ink over stdio or an iOS /
-web client over WebSocket.
-
-Wire protocol
-------------
-Identical to stdio: newline-delimited JSON-RPC in both directions. The server
-emits a ``gateway.ready`` event immediately after connection accept, then
-echoes responses/events for inbound requests. No framing differences.
-
-Mounting
--------
-    from fastapi import WebSocket
-    from tui_gateway.ws import handle_ws
-
-    @app.websocket("/api/ws")
-    async def ws(ws: WebSocket):
-        await handle_ws(ws)
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-from typing import Any
-
-from tui_gateway import server
-
-_log = logging.getLogger(__name__)
-
-# Max seconds a pool-dispatched handler will block waiting for the event loop
-# to flush a WS frame before we mark the transport dead. Protects handler
-# threads from a wedged socket.
-_WS_WRITE_TIMEOUT_S = 10.0
-
-# Keep starlette optional at import time; handle_ws uses the real class when
-# it's available and falls back to a generic Exception sentinel otherwise.
-try:
-    from starlette.websockets import WebSocketDisconnect as _WebSocketDisconnect
-except ImportError:  # pragma: no cover - starlette is a required install path
-    _WebSocketDisconnect = Exception  # type: ignore[assignment]
-
-
-class WSTransport:
-    """Per-connection WS transport.
-
-    ``write`` is safe to call from any thread *other than* the event loop
-    thread that owns the socket. Pool workers (the only real caller) run in
-    their own threads, so marshalling onto the loop via
-    :func:`asyncio.run_coroutine_threadsafe` + ``future.result()`` is correct
-    and deadlock-free there.
-
-    When called from the loop thread itself (e.g. by ``handle_ws`` for an
-    inline response) the same call would deadlock: we'd schedule work onto
-    the loop we're currently blocking. We detect that case and fire-and-
-    forget instead. Callers that need to know when the bytes are on the wire
-    should use :meth:`write_async` from the loop thread.
-    """
-
-    def __init__(self, ws: Any, loop: asyncio.AbstractEventLoop) -> None:
-        self._ws = ws
-        self._loop = loop
-        self._closed = False
-
-    def write(self, obj: dict) -> bool:
-        if self._closed:
-            return False
-
-        line = json.dumps(obj, ensure_ascii=False)
-
-        try:
-            on_loop = asyncio.get_running_loop() is self._loop
-        except RuntimeError:
-            on_loop = False
-
-        if on_loop:
-            # Fire-and-forget — don't block the loop waiting on itself.
-            self._loop.create_task(self._safe_send(line))
-            return True
-
-        try:
-            fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop)
-            fut.result(timeout=_WS_WRITE_TIMEOUT_S)
-            return not self._closed
-        except Exception as exc:
-            self._closed = True
-            _log.debug("ws write failed: %s", exc)
-            return False
-
-    async def write_async(self, obj: dict) -> bool:
-        """Send from the owning event loop. Awaits until the frame is on the wire."""
-        if self._closed:
-            return False
-        await self._safe_send(json.dumps(obj, ensure_ascii=False))
-        return not self._closed
-
-    async def _safe_send(self, line: str) -> None:
-        try:
-            await self._ws.send_text(line)
-        except Exception as exc:
-            self._closed = True
-            _log.debug("ws send failed: %s", exc)
-
-    def close(self) -> None:
-        self._closed = True
-
-
-async def handle_ws(ws: Any) -> None:
-    """Run one WebSocket session. Wire-compatible with ``tui_gateway.entry``."""
-    await ws.accept()
-
-    transport = WSTransport(ws, asyncio.get_running_loop())
-
-    await transport.write_async(
-        {
-            "jsonrpc": "2.0",
-            "method": "event",
-            "params": {
-                "type": "gateway.ready",
-                "payload": {"skin": server.resolve_skin()},
-            },
-        }
-    )
-
-    try:
-        while True:
-            try:
-                raw = await ws.receive_text()
-            except _WebSocketDisconnect:
-                break
-
-            line = raw.strip()
-            if not line:
-                continue
-
-            try:
-                req = json.loads(line)
-            except json.JSONDecodeError:
-                ok = await transport.write_async(
-                    {
-                        "jsonrpc": "2.0",
-                        "error": {"code": -32700, "message": "parse error"},
-                        "id": None,
-                    }
-                )
-                if not ok:
-                    break
-                continue
-
-            # dispatch() may schedule long handlers on the pool; it returns
-            # None in that case and the worker writes the response itself via
-            # the transport we pass in (a separate thread, so transport.write
-            # is the safe path there). For inline handlers it returns the
-            # response dict, which we write here from the loop.
-            resp = await asyncio.to_thread(server.dispatch, req, transport)
-            if resp is not None and not await transport.write_async(resp):
-                break
-    finally:
-        transport.close()
-
-        # Detach the transport from any sessions it owned so later emits
-        # fall back to stdio instead of crashing into a closed socket.
-        for _, sess in list(server._sessions.items()):
-            if sess.get("transport") is transport:
-                sess["transport"] = server._stdio_transport
-
-        try:
-            await ws.close()
-        except Exception:
-            pass
@@ -152,91 +152,79 @@ describe('createGatewayEventHandler', () => {
    expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
  })

-  it('attaches inline_diff to the assistant completion body', () => {
+  it('anchors inline_diff as its own segment where the edit happened', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
+    const block = `\`\`\`diff\n${cleaned}\n\`\`\``

-    onEvent({
-      payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
-      type: 'tool.start'
-    } as any)
-    onEvent({
-      payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
+    // Narration → tool → tool-complete → more narration → message-complete.
+    // The diff MUST land between the two narration segments, not tacked
+    // onto the final one.
+    onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any)
+    onEvent({ payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, type: 'tool.start' } as any)
+    onEvent({ payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)

-    // Diff is buffered for message.complete and sanitized (ANSI stripped).
+    // Diff is already committed to segmentMessages as its own segment.
    expect(appended).toHaveLength(0)
-    expect(turnController.pendingInlineDiffs).toEqual([cleaned])
+    expect(turnController.segmentMessages).toEqual([
+      { role: 'assistant', text: 'Editing the file' },
+      { kind: 'diff', role: 'assistant', text: block }
+    ])

-    onEvent({
-      payload: { text: 'patch applied' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)

-    // Diff is rendered in the same assistant message body as the completion.
-    expect(appended).toHaveLength(1)
-    expect(appended[0]).toMatchObject({ role: 'assistant' })
-    expect(appended[0]?.text).toContain('patch applied')
-    expect(appended[0]?.text).toContain('```diff')
-    expect(appended[0]?.text).toContain(cleaned)
+    // Three transcript messages: pre-tool narration → diff (kind='diff',
+    // so MessageLine gives it blank-line breathing room) → post-tool
+    // narration. The final message does NOT contain a diff.
+    expect(appended).toHaveLength(3)
+    expect(appended[0]?.text).toBe('Editing the file')
+    expect(appended[1]).toMatchObject({ kind: 'diff', text: block })
+    expect(appended[2]?.text).toBe('patch applied')
+    expect(appended[2]?.text).not.toContain('```diff')
  })

-  it('does not append inline_diff twice when assistant text already contains it', () => {
+  it('drops the diff segment when the final assistant text narrates the same diff', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
    const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\``

-    onEvent({
-      payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

+    // Only the final message — diff-only segment dropped so we don't
+    // render two stacked copies of the same patch.
    expect(appended).toHaveLength(1)
    expect(appended[0]?.text).toBe(assistantText)
    expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
  })

-  it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
+  it('strips the CLI "┊ review diff" header from inline diff segments', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const raw = '  \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'

-    onEvent({
-      payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    expect(appended).toHaveLength(1)
+    // diff segment first (kind='diff'), final narration second
+    expect(appended).toHaveLength(2)
+    expect(appended[0]?.kind).toBe('diff')
    expect(appended[0]?.text).not.toContain('┊ review diff')
    expect(appended[0]?.text).toContain('--- a/foo.ts')
+    expect(appended[1]?.text).toBe('done')
  })

-  it('suppresses inline_diff when assistant already wrote a diff fence', () => {
+  it('drops the diff segment when assistant writes its own ```diff fence', () => {
    const appended: Msg[] = []
    const onEvent = createGatewayEventHandler(buildCtx(appended))
    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
    const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'

-    onEvent({
-      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
-      type: 'tool.complete'
-    } as any)
-    onEvent({
-      payload: { text: assistantText },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

    expect(appended).toHaveLength(1)
    expect(appended[0]?.text).toBe(assistantText)
@@ -252,15 +240,18 @@ describe('createGatewayEventHandler', () => {
      payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
      type: 'tool.complete'
    } as any)
-    onEvent({
-      payload: { text: 'done' },
-      type: 'message.complete'
-    } as any)
+    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    expect(appended).toHaveLength(1)
-    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
-    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    // Two segments: the diff block (kind='diff', no tool row) and the final
+    // narration (tool row belongs here since pendingSegmentTools carries
+    // across the flushStreamingSegment call).
+    expect(appended).toHaveLength(2)
+    expect(appended[0]?.kind).toBe('diff')
    expect(appended[0]?.text).toContain('```diff')
+    expect(appended[0]?.tools ?? []).toEqual([])
+    expect(appended[1]?.text).toBe('done')
+    expect(appended[1]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
  })

  it('shows setup panel for missing provider startup error', () => {
@@ -385,10 +385,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
          return
        }

-        // Keep inline diffs attached to the assistant completion body so
-        // they render in the same message flow, not as a standalone system
-        // artifact that can look out-of-place around tool rows.
-        turnController.queueInlineDiff(inlineDiffText)
+        // Anchor the diff to where the edit happened in the turn — between
+        // the narration that preceded the tool call and whatever the agent
+        // streams afterwards. The previous end-merge put the diff at the
+        // bottom of the final message even when the edit fired mid-turn,
+        // which read as "the agent wrote this after saying that".
+        turnController.pushInlineDiffSegment(inlineDiffText)

        return
      }
@@ -19,6 +19,20 @@ const INTERRUPT_COOLDOWN_MS = 1500
 const ACTIVITY_LIMIT = 8
 const TRAIL_LIMIT = 8

+// Extracts the raw patch from a diff-only segment produced by
+// pushInlineDiffSegment. Used at message.complete to dedupe against final
+// assistant text that narrates the same patch. Returns null for anything
+// else so real assistant narration never gets touched.
+const diffSegmentBody = (msg: Msg): null | string => {
+  if (msg.kind !== 'diff') {
+    return null
+  }
+
+  const m = msg.text.match(/^```diff\n([\s\S]*?)\n```$/)
+
+  return m ? m[1]! : null
+}
+
 export interface InterruptDeps {
  appendMessage: (msg: Msg) => void
  gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
@@ -40,7 +54,6 @@ class TurnController {
  bufRef = ''
  interrupted = false
  lastStatusNote = ''
-  pendingInlineDiffs: string[] = []
  persistedToolLabels = new Set<string>()
  persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
  protocolWarned = false
@@ -79,7 +92,6 @@ class TurnController {
    this.activeTools = []
    this.streamTimer = clear(this.streamTimer)
    this.bufRef = ''
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.segmentMessages = []

@@ -186,18 +198,35 @@ class TurnController {
    }, REASONING_PULSE_MS)
  }

-  queueInlineDiff(diffText: string) {
+  pushInlineDiffSegment(diffText: string) {
    // Strip CLI chrome the gateway emits before the unified diff (e.g. a
    // leading "┊ review diff" header written by `_emit_inline_diff` for the
    // terminal printer). That header only makes sense as stdout dressing,
    // not inside a markdown ```diff block.
-    const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
+    const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()

-    if (!text || this.pendingInlineDiffs.includes(text)) {
+    if (!stripped) {
      return
    }

-    this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
+    // Flush any in-progress streaming text as its own segment first, so the
+    // diff lands BETWEEN the assistant narration that preceded the edit and
+    // whatever the agent streams afterwards — not glued onto the final
+    // message. This is the whole point of segment-anchored diffs: the diff
+    // renders where the edit actually happened.
+    this.flushStreamingSegment()
+
+    const block = `\`\`\`diff\n${stripped}\n\`\`\``
+
+    // Skip consecutive duplicates (same tool firing tool.complete twice, or
+    // two edits producing the same patch). Keeping this cheap — deeper
+    // dedupe against the final assistant text happens at message.complete.
+    if (this.segmentMessages.at(-1)?.text === block) {
+      return
+    }
+
+    this.segmentMessages = [...this.segmentMessages, { kind: 'diff', role: 'assistant', text: block }]
+    patchTurnState({ streamSegments: this.segmentMessages })
  }

  pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
@@ -234,7 +263,6 @@ class TurnController {
    this.idle()
    this.clearReasoning()
    this.clearStatusTimer()
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.segmentMessages = []
    this.turnTools = []
@@ -245,31 +273,31 @@ class TurnController {
    const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
    const split = splitReasoning(rawText)
    const finalText = split.text
-    // Skip appending if the assistant already narrated the diff inside a
-    // markdown fence of its own — otherwise we render two stacked diff
-    // blocks for the same edit.
-    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
-
-    const remainingInlineDiffs = assistantAlreadyHasDiff
-      ? []
-      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
-
-    const inlineDiffBlock = remainingInlineDiffs.length
-      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
-      : ''
-
-    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
    const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
    const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
    const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
    const savedToolTokens = this.toolTokenAcc
    const tools = this.pendingSegmentTools
-    const finalMessages = [...this.segmentMessages]

-    if (mergedText) {
+    // Drop diff-only segments the agent is about to narrate in the final
+    // reply. Without this, a closing "here's the diff …" message would
+    // render two stacked copies of the same patch. Only touches segments
+    // with `kind: 'diff'` emitted by pushInlineDiffSegment — real
+    // assistant narration stays put.
+    const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText)
+
+    const segments = this.segmentMessages.filter(msg => {
+      const body = diffSegmentBody(msg)
+
+      return body === null || (!finalHasOwnDiffFence && !finalText.includes(body))
+    })
+
+    const finalMessages = [...segments]
+
+    if (finalText) {
      finalMessages.push({
        role: 'assistant',
-        text: mergedText,
+        text: finalText,
        thinking: savedReasoning || undefined,
        thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
        toolTokens: savedToolTokens || undefined,
@@ -300,7 +328,7 @@ class TurnController {
    this.bufRef = ''
    patchTurnState({ activity: [], outcome: '' })

-    return { finalMessages, finalText: mergedText, wasInterrupted }
+    return { finalMessages, finalText, wasInterrupted }
  }

  recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
@@ -406,7 +434,6 @@ class TurnController {
    this.bufRef = ''
    this.interrupted = false
    this.lastStatusNote = ''
-    this.pendingInlineDiffs = []
    this.pendingSegmentTools = []
    this.protocolWarned = false
    this.segmentMessages = []
@@ -452,7 +479,6 @@ class TurnController {
    this.endReasoningPhase()
    this.clearReasoning()
    this.activeTools = []
-    this.pendingInlineDiffs = []
    this.turnTools = []
    this.toolTokenAcc = 0
    this.persistedToolLabels.clear()
@@ -81,11 +81,16 @@ export const MessageLine = memo(function MessageLine({
    return <Text {...(body ? { color: body } : {})}>{msg.text}</Text>
  })()

+  // Diff segments (emitted by pushInlineDiffSegment between narration
+  // segments) need a blank line on both sides so the patch doesn't butt up
+  // against the prose around it.
+  const isDiffSegment = msg.kind === 'diff'
+
  return (
    <Box
      flexDirection="column"
-      marginBottom={msg.role === 'user' ? 1 : 0}
-      marginTop={msg.role === 'user' || msg.kind === 'slash' ? 1 : 0}
+      marginBottom={msg.role === 'user' || isDiffSegment ? 1 : 0}
+      marginTop={msg.role === 'user' || msg.kind === 'slash' || isDiffSegment ? 1 : 0}
    >
      {showDetails && (
        <Box flexDirection="column" marginBottom={1}>
@@ -102,7 +102,7 @@ export interface ClarifyReq {

 export interface Msg {
  info?: SessionInfo
-  kind?: 'intro' | 'panel' | 'slash' | 'trail'
+  kind?: 'diff' | 'intro' | 'panel' | 'slash' | 'trail'
  panelData?: PanelData
  role: Role
  text: string
@@ -11,22 +11,16 @@ Browser-based dashboard for managing Hermes Agent configuration, API keys, and m
 ## Development

 ```bash
-# Pin a shared dev token so Vite (5173) and FastAPI (9119) agree.
-# Without this, the SPA can't authenticate against the backend in dev mode.
-export HERMES_DASHBOARD_DEV_TOKEN="dev-$(openssl rand -hex 16)"
+# Start the backend API server
+cd ../
+python -m hermes_cli.main web --no-open

-# Terminal 1 — backend on :9119
-hermes dashboard --no-open
-
-# Terminal 2 — Vite dev server on :5173 with HMR + /api proxy
+# In another terminal, start the Vite dev server (with HMR + API proxy)
 cd web/
 npm run dev
-# then open http://localhost:5173
 ```

-The Vite dev server proxies `/api` and `/api/ws` (WebSocket) requests to `http://127.0.0.1:9119` (the FastAPI backend). The dev token is injected into the served `index.html` so the SPA's `window.__HERMES_SESSION_TOKEN__` matches what the backend expects.
-
-For a one-shot demo without HMR, skip the env var and just run `hermes dashboard` — it builds and serves the SPA directly on :9119 with a fresh random token injected.
+The Vite dev server proxies `/api` requests to `http://127.0.0.1:9119` (the FastAPI backend).

 ## Build

@@ -26,7 +26,6 @@ import { Cell, Grid, SelectionSwitcher, Typography } from "@nous-research/ui";
 import { cn } from "@/lib/utils";
 import { Backdrop } from "@/components/Backdrop";
 import StatusPage from "@/pages/StatusPage";
-import ChatPage from "@/pages/ChatPage";
 import ConfigPage from "@/pages/ConfigPage";
 import EnvPage from "@/pages/EnvPage";
 import SessionsPage from "@/pages/SessionsPage";
@@ -46,7 +45,6 @@ import { useTheme } from "@/themes";
 *  `path` in `BUILTIN_NAV` so `/path` lookups stay consistent. */
 const BUILTIN_ROUTES: Record<string, React.ComponentType> = {
  "/": StatusPage,
-  "/chat": ChatPage,
  "/sessions": SessionsPage,
  "/analytics": AnalyticsPage,
  "/logs": LogsPage,
@@ -58,7 +56,6 @@ const BUILTIN_ROUTES: Record<string, React.ComponentType> = {

 const BUILTIN_NAV: NavItem[] = [
  { path: "/", labelKey: "status", label: "Status", icon: Activity },
-  { path: "/chat", labelKey: "chat", label: "Chat", icon: Terminal },
  {
    path: "/sessions",
    labelKey: "sessions",
@@ -1,50 +1,22 @@
-import { useMemo, type ReactNode } from "react";
+import { useMemo } from "react";

 /**
 * Lightweight markdown renderer for LLM output.
 * Handles: code blocks, inline code, bold, italic, headers, links, lists, horizontal rules.
 * NOT a full CommonMark parser — optimized for typical assistant message patterns.
- *
- * `streaming` renders a blinking caret at the tail of the last block so it
- * appears to hug the final character instead of wrapping onto a new line
- * after a block element (paragraph/list/code/…).
 */
-export function Markdown({
-  content,
-  highlightTerms,
-  streaming,
-}: {
-  content: string;
-  highlightTerms?: string[];
-  streaming?: boolean;
-}) {
+export function Markdown({ content, highlightTerms }: { content: string; highlightTerms?: string[] }) {
  const blocks = useMemo(() => parseBlocks(content), [content]);
-  const caret = streaming ? <StreamingCaret /> : null;

  return (
    <div className="text-sm text-foreground leading-relaxed space-y-2">
      {blocks.map((block, i) => (
-        <Block
-          key={i}
-          block={block}
-          highlightTerms={highlightTerms}
-          caret={caret && i === blocks.length - 1 ? caret : null}
-        />
+        <Block key={i} block={block} highlightTerms={highlightTerms} />
      ))}
-      {blocks.length === 0 && caret}
    </div>
  );
 }

-function StreamingCaret() {
-  return (
-    <span
-      aria-hidden
-      className="inline-block w-[0.5em] h-[1em] ml-0.5 align-[-0.15em] bg-foreground/50 animate-pulse"
-    />
-  );
-}
-
 /* ------------------------------------------------------------------ */
 /*  Types                                                              */
 /* ------------------------------------------------------------------ */
@@ -86,11 +58,7 @@ function parseBlocks(text: string): BlockNode[] {
    // Heading
    const headingMatch = line.match(/^(#{1,4})\s+(.+)/);
    if (headingMatch) {
-      blocks.push({
-        type: "heading",
-        level: headingMatch[1].length,
-        content: headingMatch[2],
-      });
+      blocks.push({ type: "heading", level: headingMatch[1].length, content: headingMatch[2] });
      i++;
      continue;
    }
@@ -156,23 +124,12 @@ function parseBlocks(text: string): BlockNode[] {
 /*  Block renderer                                                     */
 /* ------------------------------------------------------------------ */

-function Block({
-  block,
-  highlightTerms,
-  caret,
-}: {
-  block: BlockNode;
-  highlightTerms?: string[];
-  caret?: ReactNode;
-}) {
+function Block({ block, highlightTerms }: { block: BlockNode; highlightTerms?: string[] }) {
  switch (block.type) {
    case "code":
      return (
        <pre className="bg-secondary/60 border border-border px-3 py-2.5 text-xs font-mono leading-relaxed overflow-x-auto">
-          <code>
-            {block.content}
-            {caret}
-          </code>
+          <code>{block.content}</code>
        </pre>
      );

@@ -184,46 +141,25 @@ function Block({
        h3: "text-sm font-semibold",
        h4: "text-sm font-medium",
      };
-      return (
-        <Tag className={sizes[Tag]}>
-          <InlineContent text={block.content} highlightTerms={highlightTerms} />
-          {caret}
-        </Tag>
-      );
+      return <Tag className={sizes[Tag]}><InlineContent text={block.content} highlightTerms={highlightTerms} /></Tag>;
    }

    case "hr":
-      return (
-        <>
-          <hr className="border-border" />
-          {caret}
-        </>
-      );
+      return <hr className="border-border" />;

    case "list": {
      const Tag = block.ordered ? "ol" : "ul";
-      const last = block.items.length - 1;
      return (
-        <Tag
-          className={`space-y-0.5 ${block.ordered ? "list-decimal" : "list-disc"} pl-5 text-sm`}
-        >
+        <Tag className={`space-y-0.5 ${block.ordered ? "list-decimal" : "list-disc"} pl-5 text-sm`}>
          {block.items.map((item, i) => (
-            <li key={i}>
-              <InlineContent text={item} highlightTerms={highlightTerms} />
-              {i === last ? caret : null}
-            </li>
+            <li key={i}><InlineContent text={item} highlightTerms={highlightTerms} /></li>
          ))}
        </Tag>
      );
    }

    case "paragraph":
-      return (
-        <p>
-          <InlineContent text={block.content} highlightTerms={highlightTerms} />
-          {caret}
-        </p>
-      );
+      return <p><InlineContent text={block.content} highlightTerms={highlightTerms} /></p>;
  }
 }

@@ -242,8 +178,7 @@ type InlineNode =
 function parseInline(text: string): InlineNode[] {
  const nodes: InlineNode[] = [];
  // Pattern priority: code > link > bold > italic > bare URL > line break
-  const pattern =
-    /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g;
+  const pattern = /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g;
  let lastIndex = 0;
  let match: RegExpExecArray | null;

@@ -282,13 +217,7 @@ function parseInline(text: string): InlineNode[] {
  return nodes;
 }

-function InlineContent({
-  text,
-  highlightTerms,
-}: {
-  text: string;
-  highlightTerms?: string[];
-}) {
+function InlineContent({ text, highlightTerms }: { text: string; highlightTerms?: string[] }) {
  const nodes = useMemo(() => parseInline(text), [text]);

  return (
@@ -296,34 +225,17 @@ function InlineContent({
      {nodes.map((node, i) => {
        switch (node.type) {
          case "text":
-            return (
-              <HighlightedText
-                key={i}
-                text={node.content}
-                terms={highlightTerms}
-              />
-            );
+            return <HighlightedText key={i} text={node.content} terms={highlightTerms} />;
          case "code":
            return (
-              <code
-                key={i}
-                className="bg-secondary/60 px-1.5 py-0.5 text-xs font-mono text-primary/90"
-              >
+              <code key={i} className="bg-secondary/60 px-1.5 py-0.5 text-xs font-mono text-primary/90">
                {node.content}
              </code>
            );
          case "bold":
-            return (
-              <strong key={i} className="font-semibold">
-                <HighlightedText text={node.content} terms={highlightTerms} />
-              </strong>
-            );
+            return <strong key={i} className="font-semibold"><HighlightedText text={node.content} terms={highlightTerms} /></strong>;
          case "italic":
-            return (
-              <em key={i}>
-                <HighlightedText text={node.content} terms={highlightTerms} />
-              </em>
-            );
+            return <em key={i}><HighlightedText text={node.content} terms={highlightTerms} /></em>;
          case "link":
            return (
              <a
@@ -357,12 +269,10 @@ function HighlightedText({ text, terms }: { text: string; terms?: string[] }) {
    <>
      {parts.map((part, i) =>
        regex.test(part) ? (
-          <mark key={i} className="bg-warning/30 text-warning px-0.5">
-            {part}
-          </mark>
+          <mark key={i} className="bg-warning/30 text-warning px-0.5">{part}</mark>
        ) : (
          <span key={i}>{part}</span>
-        ),
+        )
      )}
    </>
  );
@@ -1,392 +0,0 @@
-import { Button } from "@/components/ui/button";
-import { Input } from "@/components/ui/input";
-import type { GatewayClient } from "@/lib/gatewayClient";
-import { Check, Loader2, Search, X } from "lucide-react";
-import { useEffect, useMemo, useRef, useState } from "react";
-
-/**
- * Two-stage model picker modal.
- *
- * Mirrors ui-tui/src/components/modelPicker.tsx:
- *   Stage 1: pick provider (authenticated providers only)
- *   Stage 2: pick model within that provider
- *
- * On confirm, emits `/model <model> --provider <slug> [--global]` through
- * the parent callback so ChatPage can dispatch it via the existing slash
- * pipeline. That keeps persistence + actual switch logic in one place.
- */
-
-interface ModelOptionProvider {
-  name: string;
-  slug: string;
-  models?: string[];
-  total_models?: number;
-  is_current?: boolean;
-  warning?: string;
-}
-
-interface ModelOptionsResponse {
-  model?: string;
-  provider?: string;
-  providers?: ModelOptionProvider[];
-}
-
-interface Props {
-  gw: GatewayClient;
-  sessionId: string;
-  onClose(): void;
-  /** Parent runs the resulting slash command through slashExec. */
-  onSubmit(slashCommand: string): void;
-}
-
-export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) {
-  const [providers, setProviders] = useState<ModelOptionProvider[]>([]);
-  const [currentModel, setCurrentModel] = useState("");
-  const [currentProviderSlug, setCurrentProviderSlug] = useState("");
-  const [loading, setLoading] = useState(true);
-  const [error, setError] = useState<string | null>(null);
-  const [selectedSlug, setSelectedSlug] = useState("");
-  const [selectedModel, setSelectedModel] = useState("");
-  const [query, setQuery] = useState("");
-  const [persistGlobal, setPersistGlobal] = useState(false);
-  const closedRef = useRef(false);
-
-  // Load providers + models on open.
-  useEffect(() => {
-    closedRef.current = false;
-
-    gw.request<ModelOptionsResponse>(
-      "model.options",
-      sessionId ? { session_id: sessionId } : {},
-    )
-      .then((r) => {
-        if (closedRef.current) return;
-        const next = r?.providers ?? [];
-        setProviders(next);
-        setCurrentModel(String(r?.model ?? ""));
-        setCurrentProviderSlug(String(r?.provider ?? ""));
-        setSelectedSlug(
-          (next.find((p) => p.is_current) ?? next[0])?.slug ?? "",
-        );
-        setSelectedModel("");
-        setLoading(false);
-      })
-      .catch((e) => {
-        if (closedRef.current) return;
-        setError(e instanceof Error ? e.message : String(e));
-        setLoading(false);
-      });
-
-    return () => {
-      closedRef.current = true;
-    };
-  }, [gw, sessionId]);
-
-  // Esc closes.
-  useEffect(() => {
-    const onKey = (e: KeyboardEvent) => {
-      if (e.key === "Escape") {
-        e.preventDefault();
-        onClose();
-      }
-    };
-    window.addEventListener("keydown", onKey);
-    return () => window.removeEventListener("keydown", onKey);
-  }, [onClose]);
-
-  const selectedProvider = useMemo(
-    () => providers.find((p) => p.slug === selectedSlug) ?? null,
-    [providers, selectedSlug],
-  );
-
-  const models = useMemo(
-    () => selectedProvider?.models ?? [],
-    [selectedProvider],
-  );
-
-  const needle = query.trim().toLowerCase();
-
-  const filteredProviders = useMemo(
-    () =>
-      !needle
-        ? providers
-        : providers.filter(
-            (p) =>
-              p.name.toLowerCase().includes(needle) ||
-              p.slug.toLowerCase().includes(needle) ||
-              (p.models ?? []).some((m) => m.toLowerCase().includes(needle)),
-          ),
-    [providers, needle],
-  );
-
-  const filteredModels = useMemo(
-    () =>
-      !needle ? models : models.filter((m) => m.toLowerCase().includes(needle)),
-    [models, needle],
-  );
-
-  const canConfirm = !!selectedProvider && !!selectedModel;
-
-  const confirm = () => {
-    if (!canConfirm) return;
-    const global = persistGlobal ? " --global" : "";
-    onSubmit(
-      `/model ${selectedModel} --provider ${selectedProvider.slug}${global}`,
-    );
-    onClose();
-  };
-
-  return (
-    <div
-      className="fixed inset-0 z-100 flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
-      onClick={(e) => e.target === e.currentTarget && onClose()}
-      role="dialog"
-      aria-modal="true"
-      aria-labelledby="model-picker-title"
-    >
-      <div className="relative w-full max-w-3xl max-h-[80vh] border border-border bg-card shadow-2xl flex flex-col">
-        <button
-          type="button"
-          onClick={onClose}
-          className="absolute right-3 top-3 text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
-          aria-label="Close"
-        >
-          <X className="h-5 w-5" />
-        </button>
-
-        <header className="p-5 pb-3 border-b border-border">
-          <h2
-            id="model-picker-title"
-            className="font-display text-base tracking-wider uppercase"
-          >
-            Switch Model
-          </h2>
-          <p className="text-xs text-muted-foreground mt-1 font-mono">
-            current: {currentModel || "(unknown)"}
-            {currentProviderSlug && ` · ${currentProviderSlug}`}
-          </p>
-        </header>
-
-        <div className="px-5 pt-3 pb-2 border-b border-border">
-          <div className="relative">
-            <Search className="absolute left-2 top-1/2 -translate-y-1/2 h-3.5 w-3.5 text-muted-foreground" />
-            <Input
-              autoFocus
-              placeholder="Filter providers and models…"
-              value={query}
-              onChange={(e) => setQuery(e.target.value)}
-              className="pl-7 h-8 text-sm"
-            />
-          </div>
-        </div>
-
-        <div className="flex-1 min-h-0 grid grid-cols-[200px_1fr] overflow-hidden">
-          <ProviderColumn
-            loading={loading}
-            error={error}
-            providers={filteredProviders}
-            total={providers.length}
-            selectedSlug={selectedSlug}
-            query={needle}
-            onSelect={(slug) => {
-              setSelectedSlug(slug);
-              setSelectedModel("");
-            }}
-          />
-
-          <ModelColumn
-            provider={selectedProvider}
-            models={filteredModels}
-            allModels={models}
-            selectedModel={selectedModel}
-            currentModel={currentModel}
-            currentProviderSlug={currentProviderSlug}
-            onSelect={setSelectedModel}
-            onConfirm={(m) => {
-              setSelectedModel(m);
-              // Confirm on next tick so state settles.
-              window.setTimeout(confirm, 0);
-            }}
-          />
-        </div>
-
-        <footer className="border-t border-border p-3 flex items-center justify-between gap-3 flex-wrap">
-          <label className="flex items-center gap-2 text-xs text-muted-foreground cursor-pointer select-none">
-            <input
-              type="checkbox"
-              checked={persistGlobal}
-              onChange={(e) => setPersistGlobal(e.target.checked)}
-              className="cursor-pointer"
-            />
-            Persist globally (otherwise this session only)
-          </label>
-
-          <div className="flex items-center gap-2 ml-auto">
-            <Button variant="ghost" size="sm" onClick={onClose}>
-              Cancel
-            </Button>
-            <Button size="sm" onClick={confirm} disabled={!canConfirm}>
-              Switch
-            </Button>
-          </div>
-        </footer>
-      </div>
-    </div>
-  );
-}
-
-/* ------------------------------------------------------------------ */
-/*  Provider column                                                    */
-/* ------------------------------------------------------------------ */
-
-function ProviderColumn({
-  loading,
-  error,
-  providers,
-  total,
-  selectedSlug,
-  query,
-  onSelect,
-}: {
-  loading: boolean;
-  error: string | null;
-  providers: ModelOptionProvider[];
-  total: number;
-  selectedSlug: string;
-  query: string;
-  onSelect(slug: string): void;
-}) {
-  return (
-    <div className="border-r border-border overflow-y-auto">
-      {loading && (
-        <div className="flex items-center gap-2 p-4 text-xs text-muted-foreground">
-          <Loader2 className="h-3 w-3 animate-spin" /> loading…
-        </div>
-      )}
-
-      {error && <div className="p-4 text-xs text-destructive">{error}</div>}
-
-      {!loading && !error && providers.length === 0 && (
-        <div className="p-4 text-xs text-muted-foreground italic">
-          {query
-            ? "no matches"
-            : total === 0
-              ? "no authenticated providers"
-              : "no matches"}
-        </div>
-      )}
-
-      {providers.map((p) => {
-        const active = p.slug === selectedSlug;
-        return (
-          <button
-            key={p.slug}
-            type="button"
-            onClick={() => onSelect(p.slug)}
-            className={`w-full text-left px-3 py-2 text-xs border-l-2 transition-colors cursor-pointer flex items-start gap-2 ${
-              active
-                ? "bg-primary/10 border-l-primary text-foreground"
-                : "border-l-transparent text-muted-foreground hover:text-foreground hover:bg-muted/40"
-            }`}
-          >
-            <div className="flex-1 min-w-0">
-              <div className="flex items-center gap-1.5">
-                <span className="font-medium truncate">{p.name}</span>
-                {p.is_current && <CurrentTag />}
-              </div>
-              <div className="text-[0.65rem] text-muted-foreground/80 font-mono truncate">
-                {p.slug} · {p.total_models ?? p.models?.length ?? 0} models
-              </div>
-            </div>
-          </button>
-        );
-      })}
-    </div>
-  );
-}
-
-/* ------------------------------------------------------------------ */
-/*  Model column                                                       */
-/* ------------------------------------------------------------------ */
-
-function ModelColumn({
-  provider,
-  models,
-  allModels,
-  selectedModel,
-  currentModel,
-  currentProviderSlug,
-  onSelect,
-  onConfirm,
-}: {
-  provider: ModelOptionProvider | null;
-  models: string[];
-  allModels: string[];
-  selectedModel: string;
-  currentModel: string;
-  currentProviderSlug: string;
-  onSelect(model: string): void;
-  onConfirm(model: string): void;
-}) {
-  if (!provider) {
-    return (
-      <div className="overflow-y-auto">
-        <div className="p-4 text-xs text-muted-foreground italic">
-          pick a provider →
-        </div>
-      </div>
-    );
-  }
-
-  return (
-    <div className="overflow-y-auto">
-      {provider.warning && (
-        <div className="p-3 text-xs text-destructive border-b border-border">
-          {provider.warning}
-        </div>
-      )}
-
-      {models.length === 0 ? (
-        <div className="p-4 text-xs text-muted-foreground italic">
-          {allModels.length
-            ? "no models match your filter"
-            : "no models listed for this provider"}
-        </div>
-      ) : (
-        models.map((m) => {
-          const active = m === selectedModel;
-          const isCurrent =
-            m === currentModel && provider.slug === currentProviderSlug;
-
-          return (
-            <button
-              key={m}
-              type="button"
-              onClick={() => onSelect(m)}
-              onDoubleClick={() => onConfirm(m)}
-              className={`w-full text-left px-3 py-1.5 text-xs font-mono transition-colors cursor-pointer flex items-center gap-2 ${
-                active
-                  ? "bg-primary/15 text-foreground"
-                  : "text-muted-foreground hover:text-foreground hover:bg-muted/40"
-              }`}
-            >
-              <Check
-                className={`h-3 w-3 shrink-0 ${active ? "text-primary" : "text-transparent"}`}
-              />
-              <span className="flex-1 truncate">{m}</span>
-              {isCurrent && <CurrentTag />}
-            </button>
-          );
-        })
-      )}
-    </div>
-  );
-}
-
-function CurrentTag() {
-  return (
-    <span className="text-[0.6rem] uppercase tracking-wider text-primary/80 shrink-0">
-      current
-    </span>
-  );
-}
@@ -1,174 +0,0 @@
-import type { GatewayClient } from "@/lib/gatewayClient";
-import { ChevronRight } from "lucide-react";
-import {
-  forwardRef,
-  useCallback,
-  useEffect,
-  useImperativeHandle,
-  useRef,
-  useState,
-} from "react";
-
-/**
- * Slash-command autocomplete popover, rendered above the composer in ChatPage.
- * Mirrors the completion UX of the Ink TUI — type `/`, see matching commands,
- * arrow keys or click to select, Tab to apply, Enter to submit.
- *
- * The parent owns all keyboard handling via `ref.handleKey`, which returns
- * true when the popover consumed the event, so the composer's Enter/arrow
- * logic stays in one place.
- */
-
-export interface CompletionItem {
-  display: string;
-  text: string;
-  meta?: string;
-}
-
-export interface SlashPopoverHandle {
-  /** Returns true if the key was consumed by the popover. */
-  handleKey(e: React.KeyboardEvent<HTMLTextAreaElement>): boolean;
-}
-
-interface Props {
-  input: string;
-  gw: GatewayClient | null;
-  onApply(nextInput: string): void;
-}
-
-interface CompletionResponse {
-  items?: CompletionItem[];
-  replace_from?: number;
-}
-
-const DEBOUNCE_MS = 60;
-
-export const SlashPopover = forwardRef<SlashPopoverHandle, Props>(
-  function SlashPopover({ input, gw, onApply }, ref) {
-    const [items, setItems] = useState<CompletionItem[]>([]);
-    const [selected, setSelected] = useState(0);
-    const [replaceFrom, setReplaceFrom] = useState(1);
-    const lastInputRef = useRef<string>("");
-
-    // Debounced completion fetch. We never clear `items` in the effect body
-    // (doing so would flag react-hooks/set-state-in-effect); instead the
-    // render guard below hides stale items once the input stops matching.
-    useEffect(() => {
-      const trimmed = input ?? "";
-
-      if (!gw || !trimmed.startsWith("/") || trimmed === lastInputRef.current) {
-        if (!trimmed.startsWith("/")) lastInputRef.current = "";
-        return;
-      }
-      lastInputRef.current = trimmed;
-
-      const timer = window.setTimeout(async () => {
-        if (lastInputRef.current !== trimmed) return;
-        try {
-          const r = await gw.request<CompletionResponse>("complete.slash", {
-            text: trimmed,
-          });
-          if (lastInputRef.current !== trimmed) return;
-          setItems(r?.items ?? []);
-          setReplaceFrom(r?.replace_from ?? 1);
-          setSelected(0);
-        } catch {
-          if (lastInputRef.current === trimmed) setItems([]);
-        }
-      }, DEBOUNCE_MS);
-
-      return () => window.clearTimeout(timer);
-    }, [input, gw]);
-
-    const apply = useCallback(
-      (item: CompletionItem) => {
-        onApply(input.slice(0, replaceFrom) + item.text);
-      },
-      [input, replaceFrom, onApply],
-    );
-
-    // Only consume keys when the popover is actually visible. Stale items from
-    // a previous slash prefix are ignored once the user deletes the "/".
-    const visible = items.length > 0 && input.startsWith("/");
-
-    useImperativeHandle(
-      ref,
-      () => ({
-        handleKey: (e) => {
-          if (!visible) return false;
-
-          switch (e.key) {
-            case "ArrowDown":
-              e.preventDefault();
-              setSelected((s) => (s + 1) % items.length);
-              return true;
-
-            case "ArrowUp":
-              e.preventDefault();
-              setSelected((s) => (s - 1 + items.length) % items.length);
-              return true;
-
-            case "Tab": {
-              e.preventDefault();
-              const item = items[selected];
-              if (item) apply(item);
-              return true;
-            }
-
-            case "Escape":
-              e.preventDefault();
-              setItems([]);
-              return true;
-
-            default:
-              return false;
-          }
-        },
-      }),
-      [visible, items, selected, apply],
-    );
-
-    if (!visible) return null;
-
-    return (
-      <div
-        className="absolute bottom-full left-0 right-0 mb-2 max-h-64 overflow-y-auto rounded-md border border-border bg-popover shadow-xl text-sm"
-        role="listbox"
-      >
-        {items.map((it, i) => {
-          const active = i === selected;
-
-          return (
-            <button
-              key={`${it.text}-${i}`}
-              type="button"
-              role="option"
-              aria-selected={active}
-              onMouseEnter={() => setSelected(i)}
-              onClick={() => apply(it)}
-              className={`w-full flex items-center gap-2 px-3 py-1.5 text-left cursor-pointer transition-colors ${
-                active
-                  ? "bg-primary/10 text-foreground"
-                  : "text-muted-foreground hover:bg-muted/60"
-              }`}
-            >
-              <ChevronRight
-                className={`h-3 w-3 shrink-0 ${active ? "text-primary" : "text-transparent"}`}
-              />
-
-              <span className="font-mono text-xs shrink-0 truncate">
-                {it.display}
-              </span>
-
-              {it.meta && (
-                <span className="text-[0.7rem] text-muted-foreground/70 truncate ml-auto">
-                  {it.meta}
-                </span>
-              )}
-            </button>
-          );
-        })}
-      </div>
-    );
-  },
-);
@@ -1,228 +0,0 @@
-import {
-  AlertCircle,
-  Check,
-  ChevronDown,
-  ChevronRight,
-  Zap,
-} from "lucide-react";
-import { useEffect, useState } from "react";
-
-/**
- * Expandable tool call row — the web equivalent of Ink's ToolTrail node.
- *
- * Renders one `tool.start` + `tool.complete` pair (plus any `tool.progress`
- * in between) as a single collapsible item in the transcript:
- *
- *   ▸ ● read_file(path=/foo)                         2.3s
- *
- * Click the header to reveal a preformatted body with context (args), the
- * streaming preview (while running), and the final summary or error. Error
- * rows auto-expand so failures aren't silently collapsed.
- */
-
-export interface ToolEntry {
-  kind: "tool";
-  id: string;
-  tool_id: string;
-  name: string;
-  context?: string;
-  preview?: string;
-  summary?: string;
-  error?: string;
-  inline_diff?: string;
-  status: "running" | "done" | "error";
-  startedAt: number;
-  completedAt?: number;
-}
-
-const STATUS_TONE: Record<ToolEntry["status"], string> = {
-  running: "border-primary/40 bg-primary/[0.04]",
-  done: "border-border bg-muted/20",
-  error: "border-destructive/50 bg-destructive/[0.04]",
-};
-
-const BULLET_TONE: Record<ToolEntry["status"], string> = {
-  running: "text-primary",
-  done: "text-primary/80",
-  error: "text-destructive",
-};
-
-const TICK_MS = 500;
-
-export function ToolCall({ tool }: { tool: ToolEntry }) {
-  // `open` is derived: errors default-expanded, everything else collapsed.
-  // `null` means "follow the default"; any explicit bool is the user's override.
-  // This lets a running tool flip to expanded automatically when it errors,
-  // without mirroring state in an effect.
-  const [userOverride, setUserOverride] = useState<boolean | null>(null);
-  const open = userOverride ?? tool.status === "error";
-
-  // Tick `now` while the tool is running so the elapsed label updates live.
-  const [now, setNow] = useState(() => Date.now());
-  useEffect(() => {
-    if (tool.status !== "running") return;
-    const id = window.setInterval(() => setNow(() => Date.now()), TICK_MS);
-    return () => window.clearInterval(id);
-  }, [tool.status]);
-
-  // Historical tools (hydrated from session.resume) signal missing timestamps
-  // with `startedAt === 0`; we hide the elapsed badge for those rather than
-  // rendering a misleading "0ms".
-  const hasTimestamps = tool.startedAt > 0;
-  const elapsed = hasTimestamps
-    ? fmtElapsed((tool.completedAt ?? now) - tool.startedAt)
-    : null;
-
-  const hasBody = !!(
-    tool.context ||
-    tool.preview ||
-    tool.summary ||
-    tool.error ||
-    tool.inline_diff
-  );
-
-  const Chevron = open ? ChevronDown : ChevronRight;
-
-  return (
-    <div
-      className={`rounded-md border overflow-hidden ${STATUS_TONE[tool.status]}`}
-    >
-      <button
-        type="button"
-        onClick={() => setUserOverride(!open)}
-        disabled={!hasBody}
-        aria-expanded={open}
-        className="w-full flex items-center gap-2 px-2.5 py-1.5 text-left text-xs hover:bg-foreground/2 disabled:cursor-default cursor-pointer transition-colors"
-      >
-        {hasBody ? (
-          <Chevron className="h-3 w-3 shrink-0 text-muted-foreground" />
-        ) : (
-          <span className="w-3 shrink-0" />
-        )}
-
-        <Zap className={`h-3 w-3 shrink-0 ${BULLET_TONE[tool.status]}`} />
-
-        <span className="font-mono font-medium shrink-0">{tool.name}</span>
-
-        <span className="font-mono text-muted-foreground/80 truncate min-w-0 flex-1">
-          {tool.context ?? ""}
-        </span>
-
-        {tool.status === "running" && (
-          <span
-            className="inline-block h-2 w-2 rounded-full bg-primary animate-pulse shrink-0"
-            title="running"
-          />
-        )}
-        {tool.status === "error" && (
-          <AlertCircle
-            className="h-3 w-3 shrink-0 text-destructive"
-            aria-label="error"
-          />
-        )}
-        {tool.status === "done" && (
-          <Check
-            className="h-3 w-3 shrink-0 text-primary/80"
-            aria-label="done"
-          />
-        )}
-
-        {elapsed && (
-          <span className="font-mono text-[0.65rem] text-muted-foreground tabular-nums shrink-0">
-            {elapsed}
-          </span>
-        )}
-      </button>
-
-      {open && hasBody && (
-        <div className="border-t border-border/60 px-3 py-2 space-y-2 text-xs font-mono">
-          {tool.context && <Section label="context">{tool.context}</Section>}
-
-          {tool.preview && tool.status === "running" && (
-            <Section label="streaming">
-              {tool.preview}
-              <span className="inline-block w-1.5 h-3 align-middle bg-foreground/40 ml-0.5 animate-pulse" />
-            </Section>
-          )}
-
-          {tool.inline_diff && (
-            <Section label="diff">
-              <pre className="whitespace-pre overflow-x-auto text-[0.7rem] leading-snug">
-                {colorizeDiff(tool.inline_diff)}
-              </pre>
-            </Section>
-          )}
-
-          {tool.summary && (
-            <Section label="result">
-              <span className="text-foreground/90 whitespace-pre-wrap">
-                {tool.summary}
-              </span>
-            </Section>
-          )}
-
-          {tool.error && (
-            <Section label="error" tone="error">
-              <span className="text-destructive whitespace-pre-wrap">
-                {tool.error}
-              </span>
-            </Section>
-          )}
-        </div>
-      )}
-    </div>
-  );
-}
-
-function Section({
-  label,
-  children,
-  tone,
-}: {
-  label: string;
-  children: React.ReactNode;
-  tone?: "error";
-}) {
-  return (
-    <div className="flex gap-3">
-      <span
-        className={`uppercase tracking-wider text-[0.6rem] shrink-0 w-14 pt-0.5 ${
-          tone === "error" ? "text-destructive/80" : "text-muted-foreground/60"
-        }`}
-      >
-        {label}
-      </span>
-
-      <div className="flex-1 min-w-0 text-muted-foreground">{children}</div>
-    </div>
-  );
-}
-
-function fmtElapsed(ms: number): string {
-  const sec = Math.max(0, ms) / 1000;
-  if (sec < 1) return `${Math.round(ms)}ms`;
-  if (sec < 10) return `${sec.toFixed(1)}s`;
-  if (sec < 60) return `${Math.round(sec)}s`;
-
-  const m = Math.floor(sec / 60);
-  const s = Math.round(sec % 60);
-  return s ? `${m}m ${s}s` : `${m}m`;
-}
-
-/** Colorize unified-diff lines for the inline diff section. */
-function colorizeDiff(diff: string): React.ReactNode {
-  return diff.split("\n").map((line, i) => (
-    <div key={i} className={diffLineClass(line)}>
-      {line || "\u00A0"}
-    </div>
-  ));
-}
-
-function diffLineClass(line: string): string {
-  if (line.startsWith("+") && !line.startsWith("+++"))
-    return "text-emerald-500 dark:text-emerald-400";
-  if (line.startsWith("-") && !line.startsWith("---"))
-    return "text-destructive";
-  if (line.startsWith("@@")) return "text-primary";
-  return "text-muted-foreground/80";
-}
@@ -1,232 +0,0 @@
-/**
- * Browser WebSocket client for the tui_gateway JSON-RPC protocol.
- *
- * Speaks the exact same newline-delimited JSON-RPC dialect that the Ink TUI
- * drives over stdio. The server-side transport abstraction
- * (tui_gateway/transport.py + ws.py) routes the same dispatcher's writes
- * onto either stdout or a WebSocket depending on how the client connected.
- *
- *   const gw = new GatewayClient()
- *   await gw.connect()
- *   const { session_id } = await gw.request<{ session_id: string }>("session.create")
- *   gw.on("message.delta", (ev) => console.log(ev.payload?.text))
- *   await gw.request("prompt.submit", { session_id, text: "hi" })
- */
-
-export type GatewayEventName =
-  | "gateway.ready"
-  | "session.info"
-  | "message.start"
-  | "message.delta"
-  | "message.complete"
-  | "thinking.delta"
-  | "reasoning.delta"
-  | "reasoning.available"
-  | "status.update"
-  | "tool.start"
-  | "tool.progress"
-  | "tool.complete"
-  | "tool.generating"
-  | "clarify.request"
-  | "approval.request"
-  | "sudo.request"
-  | "secret.request"
-  | "background.complete"
-  | "btw.complete"
-  | "error"
-  | "skin.changed"
-  | (string & {});
-
-export interface GatewayEvent<P = unknown> {
-  type: GatewayEventName;
-  session_id?: string;
-  payload?: P;
-}
-
-export type ConnectionState =
-  | "idle"
-  | "connecting"
-  | "open"
-  | "closed"
-  | "error";
-
-interface Pending {
-  resolve: (v: unknown) => void;
-  reject: (e: Error) => void;
-  timer: ReturnType<typeof setTimeout>;
-}
-
-const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;
-
-/** Wildcard listener key: subscribe to every event regardless of type. */
-const ANY = "*";
-
-export class GatewayClient {
-  private ws: WebSocket | null = null;
-  private reqId = 0;
-  private pending = new Map<string, Pending>();
-  private listeners = new Map<string, Set<(ev: GatewayEvent) => void>>();
-  private _state: ConnectionState = "idle";
-  private stateListeners = new Set<(s: ConnectionState) => void>();
-
-  get state(): ConnectionState {
-    return this._state;
-  }
-
-  private setState(s: ConnectionState) {
-    if (this._state === s) return;
-    this._state = s;
-    for (const cb of this.stateListeners) cb(s);
-  }
-
-  onState(cb: (s: ConnectionState) => void): () => void {
-    this.stateListeners.add(cb);
-    cb(this._state);
-    return () => this.stateListeners.delete(cb);
-  }
-
-  /** Subscribe to a specific event type. Returns an unsubscribe function. */
-  on<P = unknown>(
-    type: GatewayEventName,
-    cb: (ev: GatewayEvent<P>) => void,
-  ): () => void {
-    let set = this.listeners.get(type);
-    if (!set) {
-      set = new Set();
-      this.listeners.set(type, set);
-    }
-    set.add(cb as (ev: GatewayEvent) => void);
-    return () => set!.delete(cb as (ev: GatewayEvent) => void);
-  }
-
-  /** Subscribe to every event (fires after type-specific listeners). */
-  onAny(cb: (ev: GatewayEvent) => void): () => void {
-    return this.on(ANY as GatewayEventName, cb);
-  }
-
-  async connect(token?: string): Promise<void> {
-    if (this._state === "open" || this._state === "connecting") return;
-    this.setState("connecting");
-
-    const resolved = token ?? window.__HERMES_SESSION_TOKEN__ ?? "";
-    if (!resolved) {
-      this.setState("error");
-      throw new Error(
-        "Session token not available — page must be served by the Hermes dashboard",
-      );
-    }
-
-    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
-    const ws = new WebSocket(
-      `${scheme}//${location.host}/api/ws?token=${encodeURIComponent(resolved)}`,
-    );
-    this.ws = ws;
-
-    await new Promise<void>((resolve, reject) => {
-      const onOpen = () => {
-        ws.removeEventListener("error", onError);
-        this.setState("open");
-        resolve();
-      };
-      const onError = () => {
-        ws.removeEventListener("open", onOpen);
-        this.setState("error");
-        reject(new Error("WebSocket connection failed"));
-      };
-      ws.addEventListener("open", onOpen, { once: true });
-      ws.addEventListener("error", onError, { once: true });
-    });
-
-    ws.addEventListener("message", (ev) => {
-      try {
-        this.dispatch(JSON.parse(ev.data));
-      } catch {
-        /* malformed frame — ignore */
-      }
-    });
-
-    ws.addEventListener("close", () => {
-      this.setState("closed");
-      this.rejectAllPending(new Error("WebSocket closed"));
-    });
-  }
-
-  close() {
-    this.ws?.close();
-    this.ws = null;
-  }
-
-  private dispatch(msg: Record<string, unknown>) {
-    const id = msg.id as string | undefined;
-
-    if (id !== undefined && this.pending.has(id)) {
-      const p = this.pending.get(id)!;
-      this.pending.delete(id);
-      clearTimeout(p.timer);
-
-      const err = msg.error as { message?: string } | undefined;
-      if (err) p.reject(new Error(err.message ?? "request failed"));
-      else p.resolve(msg.result);
-      return;
-    }
-
-    if (msg.method !== "event") return;
-
-    const params = (msg.params ?? {}) as GatewayEvent;
-    if (typeof params.type !== "string") return;
-
-    for (const cb of this.listeners.get(params.type) ?? []) cb(params);
-    for (const cb of this.listeners.get(ANY) ?? []) cb(params);
-  }
-
-  private rejectAllPending(err: Error) {
-    for (const p of this.pending.values()) {
-      clearTimeout(p.timer);
-      p.reject(err);
-    }
-    this.pending.clear();
-  }
-
-  /** Send a JSON-RPC request. Rejects on error response or timeout. */
-  request<T = unknown>(
-    method: string,
-    params: Record<string, unknown> = {},
-    timeoutMs = DEFAULT_REQUEST_TIMEOUT_MS,
-  ): Promise<T> {
-    if (!this.ws || this._state !== "open") {
-      return Promise.reject(
-        new Error(`gateway not connected (state=${this._state})`),
-      );
-    }
-
-    const id = `w${++this.reqId}`;
-
-    return new Promise<T>((resolve, reject) => {
-      const timer = setTimeout(() => {
-        if (this.pending.delete(id)) {
-          reject(new Error(`request timed out: ${method}`));
-        }
-      }, timeoutMs);
-
-      this.pending.set(id, {
-        resolve: (v) => resolve(v as T),
-        reject,
-        timer,
-      });
-
-      try {
-        this.ws!.send(JSON.stringify({ jsonrpc: "2.0", id, method, params }));
-      } catch (e) {
-        clearTimeout(timer);
-        this.pending.delete(id);
-        reject(e instanceof Error ? e : new Error(String(e)));
-      }
-    });
-  }
-}
-
-declare global {
-  interface Window {
-    __HERMES_SESSION_TOKEN__?: string;
-  }
-}
@@ -1,163 +0,0 @@
-/**
- * Slash command execution pipeline for the web chat.
- *
- * Mirrors the Ink TUI's createSlashHandler.ts:
- *
- *   1. Parse the command into `name` + `arg`.
- *   2. Try `slash.exec` — covers every registry-backed command the terminal
- *      UI knows about (/help, /resume, /compact, /model, …). Output is
- *      rendered into the transcript.
- *   3. If `slash.exec` errors (command rejected, unknown, or needs client
- *      behaviour), fall back to `command.dispatch` which returns a typed
- *      directive: `exec` | `plugin` | `alias` | `skill` | `send`.
- *   4. Each directive is dispatched to the appropriate callback.
- *
- * Keeping the pipeline here (instead of inline in ChatPage) lets future
- * clients (SwiftUI, Android) implement the same logic by reading the same
- * contract.
- */
-
-import type { GatewayClient } from "@/lib/gatewayClient";
-
-export interface SlashExecResponse {
-  output?: string;
-  warning?: string;
-}
-
-export type CommandDispatchResponse =
-  | { type: "exec" | "plugin"; output?: string }
-  | { type: "alias"; target: string }
-  | { type: "skill"; name: string; message?: string }
-  | { type: "send"; message: string };
-
-export interface SlashExecCallbacks {
-  /** Render a transcript system message. */
-  sys(text: string): void;
-  /** Submit a user message to the agent (prompt.submit). */
-  send(message: string): Promise<void> | void;
-}
-
-export interface SlashExecOptions {
-  /** Raw command including the leading slash (e.g. "/model opus-4.6"). */
-  command: string;
-  /** Session id. If empty the call is still issued — some commands are session-less. */
-  sessionId: string;
-  gw: GatewayClient;
-  callbacks: SlashExecCallbacks;
-}
-
-export type SlashExecResult = "done" | "sent" | "error";
-
-/**
- * Run a slash command. Returns the terminal state so callers can decide
- * whether to clear the composer, queue retries, etc.
- */
-export async function executeSlash({
-  command,
-  sessionId,
-  gw,
-  callbacks: { sys, send },
-}: SlashExecOptions): Promise<SlashExecResult> {
-  const { name, arg } = parseSlash(command);
-
-  if (!name) {
-    sys("empty slash command");
-    return "error";
-  }
-
-  // Primary dispatcher.
-  try {
-    const r = await gw.request<SlashExecResponse>("slash.exec", {
-      command: command.replace(/^\/+/, ""),
-      session_id: sessionId,
-    });
-    const body = r?.output || `/${name}: no output`;
-    sys(r?.warning ? `warning: ${r.warning}\n${body}` : body);
-    return "done";
-  } catch {
-    /* fall through to command.dispatch */
-  }
-
-  try {
-    const d = parseCommandDispatch(
-      await gw.request<unknown>("command.dispatch", {
-        name,
-        arg,
-        session_id: sessionId,
-      }),
-    );
-
-    if (!d) {
-      sys("error: invalid response: command.dispatch");
-      return "error";
-    }
-
-    switch (d.type) {
-      case "exec":
-      case "plugin":
-        sys(d.output ?? "(no output)");
-        return "done";
-
-      case "alias":
-        return executeSlash({
-          command: `/${d.target}${arg ? ` ${arg}` : ""}`,
-          sessionId,
-          gw,
-          callbacks: { sys, send },
-        });
-
-      case "skill":
-      case "send": {
-        const msg = d.message?.trim() ?? "";
-        if (!msg) {
-          sys(
-            `/${name}: ${d.type === "skill" ? "skill payload missing message" : "empty message"}`,
-          );
-          return "error";
-        }
-        if (d.type === "skill") sys(`⚡ loading skill: ${d.name}`);
-        await send(msg);
-        return "sent";
-      }
-    }
-  } catch (err) {
-    sys(`error: ${err instanceof Error ? err.message : String(err)}`);
-    return "error";
-  }
-}
-
-export function parseSlash(command: string): { name: string; arg: string } {
-  const m = command.replace(/^\/+/, "").match(/^(\S+)\s*(.*)$/);
-  return m ? { name: m[1], arg: m[2].trim() } : { name: "", arg: "" };
-}
-
-function parseCommandDispatch(raw: unknown): CommandDispatchResponse | null {
-  if (!raw || typeof raw !== "object") return null;
-
-  const r = raw as Record<string, unknown>;
-  const str = (v: unknown) => (typeof v === "string" ? v : undefined);
-
-  switch (r.type) {
-    case "exec":
-    case "plugin":
-      return { type: r.type, output: str(r.output) };
-
-    case "alias":
-      return typeof r.target === "string"
-        ? { type: "alias", target: r.target }
-        : null;
-
-    case "skill":
-      return typeof r.name === "string"
-        ? { type: "skill", name: r.name, message: str(r.message) }
-        : null;
-
-    case "send":
-      return typeof r.message === "string"
-        ? { type: "send", message: r.message }
-        : null;
-
-    default:
-      return null;
-  }
-}
@@ -1,752 +0,0 @@
-import { Markdown } from "@/components/Markdown";
-import { ModelPickerDialog } from "@/components/ModelPickerDialog";
-import {
-  SlashPopover,
-  type SlashPopoverHandle,
-} from "@/components/SlashPopover";
-import { ToolCall, type ToolEntry } from "@/components/ToolCall";
-import { Badge } from "@/components/ui/badge";
-import { Button } from "@/components/ui/button";
-import { Card } from "@/components/ui/card";
-import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient";
-import { executeSlash } from "@/lib/slashExec";
-import {
-  AlertCircle,
-  ChevronDown,
-  Copy,
-  Heart,
-  RefreshCw,
-  Send,
-  Square,
-} from "lucide-react";
-import { useCallback, useEffect, useRef, useState } from "react";
-import { useSearchParams } from "react-router-dom";
-
-/*
- * Chat — the "Ink TUI in a browser" proof.
- *
- * Drives the exact same tui_gateway JSON-RPC surface Ink drives over stdio,
- * but over a WebSocket served by hermes_cli/web_server.py. Covers message
- * streaming, tool calls, interrupts, slash commands, and model switching.
- * Approvals / clarify / resume picker / attachments are still TODO; the
- * event listeners on GatewayClient give type-safe hooks for each.
- */
-
-type MessageRole = "user" | "assistant" | "system";
-
-interface TextMessage {
-  kind: "message";
-  id: string;
-  role: MessageRole;
-  text: string;
-  streaming?: boolean;
-  rendered?: string;
-  error?: boolean;
-}
-
-type ChatEntry = TextMessage | ToolEntry;
-
-/** Shape of messages returned by session.resume — see _history_to_messages in tui_gateway/server.py. */
-interface HydratedMessage {
-  role: "user" | "assistant" | "system" | "tool";
-  text?: string;
-  name?: string;
-  context?: string;
-}
-
-interface SessionResumeResponse {
-  session_id: string;
-  resumed: string;
-  message_count: number;
-  messages: HydratedMessage[];
-  info?: Record<string, unknown>;
-}
-
-interface SessionInfo {
-  model?: string;
-  provider?: string;
-  cwd?: string;
-  tools?: Record<string, unknown>;
-  skills?: Record<string, unknown>;
-  credential_warning?: string;
-}
-
-const STATE_LABEL: Record<ConnectionState, string> = {
-  idle: "idle",
-  connecting: "connecting",
-  open: "connected",
-  closed: "closed",
-  error: "error",
-};
-
-const STATE_TONE: Record<ConnectionState, string> = {
-  idle: "bg-muted text-muted-foreground",
-  connecting: "bg-primary/10 text-primary",
-  open: "bg-emerald-500/10 text-emerald-500 dark:text-emerald-400",
-  closed: "bg-muted text-muted-foreground",
-  error: "bg-destructive/10 text-destructive",
-};
-
-const randId = (prefix: string) =>
-  `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 6)}`;
-
-// Mirror ui-tui/src/app/useMainApp.ts — same regex, same palette, same beat.
-// Web parity with the Ink TUI's GoodVibesHeart easter egg: a thank-you pulses
-// a heart next to the connection badge.
-const GOOD_VIBES_RE = /\b(good bot|thanks|thank you|thx|ty|ily|love you)\b/i;
-const HEART_COLORS = ["#ff5fa2", "#ff4d6d", "#ffbd38"];
-
-export default function ChatPage() {
-  const gwRef = useRef<GatewayClient | null>(null);
-  const slashRef = useRef<SlashPopoverHandle | null>(null);
-  const transcriptEndRef = useRef<HTMLDivElement | null>(null);
-  const textareaRef = useRef<HTMLTextAreaElement | null>(null);
-
-  const [searchParams] = useSearchParams();
-  const resumeId = searchParams.get("resume") ?? "";
-
-  const [connState, setConnState] = useState<ConnectionState>("idle");
-  const [sessionId, setSessionId] = useState("");
-  const [sessionInfo, setSessionInfo] = useState<SessionInfo | null>(null);
-  const [entries, setEntries] = useState<ChatEntry[]>([]);
-  const [draft, setDraft] = useState("");
-  const [busy, setBusy] = useState(false);
-  const [connectError, setConnectError] = useState("");
-  const [runtimeError, setRuntimeError] = useState("");
-  const [modelPickerOpen, setModelPickerOpen] = useState(false);
-  const [goodVibesTick, setGoodVibesTick] = useState(0);
-
-  /* ---------------------------------------------------------------- */
-  /*  Entry helpers                                                    */
-  /* ---------------------------------------------------------------- */
-
-  /** Replace the most recent streaming assistant message, if any. */
-  const updateStreamingAssistant = useCallback(
-    (fn: (m: TextMessage) => TextMessage) => {
-      setEntries((list) => {
-        for (let i = list.length - 1; i >= 0; i--) {
-          const e = list[i];
-          if (e.kind === "message" && e.role === "assistant" && e.streaming) {
-            const next = list.slice();
-            next[i] = fn(e);
-            return next;
-          }
-        }
-        return list;
-      });
-    },
-    [],
-  );
-
-  const pushMessage = useCallback(
-    (role: MessageRole, text: string, extra: Partial<TextMessage> = {}) => {
-      setEntries((list) => [
-        ...list,
-        { kind: "message", id: randId(role[0]), role, text, ...extra },
-      ]);
-    },
-    [],
-  );
-
-  const pushSystem = useCallback(
-    (text: string) => pushMessage("system", text),
-    [pushMessage],
-  );
-
-  /* ---------------------------------------------------------------- */
-  /*  Bootstrap: connect, wire events, open or resume a session        */
-  /* ---------------------------------------------------------------- */
-
-  const bootstrap = useCallback(async () => {
-    setEntries([]);
-    setSessionId("");
-    setSessionInfo(null);
-    setBusy(false);
-    setConnectError("");
-    setRuntimeError("");
-
-    const gw = gwRef.current ?? new GatewayClient();
-    gwRef.current = gw;
-
-    gw.onState(setConnState);
-
-    gw.on<SessionInfo>("session.info", (ev) => {
-      if (ev.payload) setSessionInfo(ev.payload);
-    });
-
-    gw.on("message.start", () => {
-      pushMessage("assistant", "", { streaming: true });
-      setBusy(true);
-    });
-
-    gw.on<{ text?: string; rendered?: string }>("message.delta", (ev) => {
-      const d = ev.payload?.text ?? "";
-      if (!d) return;
-      updateStreamingAssistant((m) => ({ ...m, text: m.text + d }));
-    });
-
-    gw.on<{ text?: string; rendered?: string; reasoning?: string }>(
-      "message.complete",
-      (ev) => {
-        updateStreamingAssistant((m) => ({
-          ...m,
-          text: ev.payload?.text ?? m.text,
-          rendered: ev.payload?.rendered,
-          streaming: false,
-        }));
-        setBusy(false);
-      },
-    );
-
-    gw.on<{ tool_id: string; name?: string; context?: string }>(
-      "tool.start",
-      (ev) => {
-        if (!ev.payload) return;
-        const { tool_id, name, context } = ev.payload;
-
-        // Insert tool rows BEFORE the current streaming assistant bubble so
-        // the transcript reads "user → tools → final message" rather than
-        // "empty bubble → tool → bubble filling in". If there's no streaming
-        // assistant (tool fired before message.start, or no message at all),
-        // append to the end.
-        const row: ToolEntry = {
-          kind: "tool",
-          id: `t-${tool_id}`,
-          tool_id,
-          name: name ?? "tool",
-          context,
-          status: "running",
-          startedAt: Date.now(),
-        };
-
-        setEntries((list) => {
-          for (let i = list.length - 1; i >= 0; i--) {
-            const e = list[i];
-            if (e.kind === "message" && e.role === "assistant" && e.streaming) {
-              return [...list.slice(0, i), row, ...list.slice(i)];
-            }
-          }
-          return [...list, row];
-        });
-      },
-    );
-
-    gw.on<{ name?: string; preview?: string }>("tool.progress", (ev) => {
-      const name = ev.payload?.name ?? "";
-      const preview = ev.payload?.preview ?? "";
-      if (!name || !preview) return;
-
-      // Update the most recent running tool entry with this name.
-      setEntries((list) => {
-        for (let i = list.length - 1; i >= 0; i--) {
-          const e = list[i];
-          if (e.kind === "tool" && e.status === "running" && e.name === name) {
-            const next = list.slice();
-            next[i] = { ...e, preview };
-            return next;
-          }
-        }
-        return list;
-      });
-    });
-
-    gw.on<{
-      tool_id: string;
-      name?: string;
-      summary?: string;
-      error?: string;
-      inline_diff?: string;
-    }>("tool.complete", (ev) => {
-      if (!ev.payload) return;
-      const { tool_id, summary, error, inline_diff } = ev.payload;
-
-      setEntries((list) =>
-        list.map((e) =>
-          e.kind === "tool" && e.tool_id === tool_id
-            ? {
-                ...e,
-                status: error ? "error" : "done",
-                summary: summary ?? (error ? undefined : e.summary),
-                error: error ?? e.error,
-                inline_diff: inline_diff ?? e.inline_diff,
-                completedAt: Date.now(),
-              }
-            : e,
-        ),
-      );
-    });
-
-    gw.on<{ message?: string }>("error", (ev) => {
-      setRuntimeError(ev.payload?.message ?? "unknown error");
-      setBusy(false);
-    });
-
-    try {
-      await gw.connect();
-
-      if (resumeId) {
-        const resp = await gw.request<SessionResumeResponse>("session.resume", {
-          session_id: resumeId,
-          cols: 100,
-        });
-        setSessionId(resp.session_id);
-        setEntries(hydrateMessages(resp.messages ?? []));
-        pushSystem(
-          `resumed session ${resp.resumed} · ${resp.message_count ?? resp.messages?.length ?? 0} messages`,
-        );
-        // NOTE: intentionally NOT clearing the ?resume= param. Doing so
-        // flips `resumeId` back to "" which is a dep of the bootstrap
-        // effect, re-triggering cleanup + a fresh session.create and
-        // wiping the transcript we just hydrated.
-      } else {
-        const { session_id } = await gw.request<{ session_id: string }>(
-          "session.create",
-          { cols: 100 },
-        );
-        setSessionId(session_id);
-      }
-    } catch (err) {
-      setConnectError(err instanceof Error ? err.message : String(err));
-    }
-  }, [pushMessage, pushSystem, resumeId, updateStreamingAssistant]);
-
-  // Rebootstrap whenever the resume target changes. React Router keeps the
-  // component mounted when the search params flip, so navigating to
-  // /chat?resume=X from within the app must tear down the current WS
-  // connection and open a fresh session.
-  useEffect(() => {
-    bootstrap();
-    return () => {
-      gwRef.current?.close();
-      gwRef.current = null;
-    };
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [resumeId]);
-
-  useEffect(() => {
-    transcriptEndRef.current?.scrollIntoView({
-      behavior: "smooth",
-      block: "end",
-    });
-  }, [entries]);
-
-  /* ---------------------------------------------------------------- */
-  /*  Submission                                                       */
-  /* ---------------------------------------------------------------- */
-
-  const submitUserMessage = useCallback(
-    async (text: string) => {
-      const gw = gwRef.current;
-      const trimmed = text.trim();
-      if (!gw || !sessionId || !trimmed) return;
-
-      pushMessage("user", trimmed);
-      setRuntimeError("");
-
-      try {
-        await gw.request("prompt.submit", {
-          session_id: sessionId,
-          text: trimmed,
-        });
-      } catch (err) {
-        setRuntimeError(err instanceof Error ? err.message : String(err));
-        setBusy(false);
-        updateStreamingAssistant((m) => ({
-          ...m,
-          streaming: false,
-          error: true,
-        }));
-      }
-    },
-    [sessionId, pushMessage, updateStreamingAssistant],
-  );
-
-  const submitSlash = useCallback(
-    async (command: string) => {
-      const gw = gwRef.current;
-      if (!gw || !sessionId) return;
-
-      pushSystem(command);
-      await executeSlash({
-        command,
-        sessionId,
-        gw,
-        callbacks: { sys: pushSystem, send: submitUserMessage },
-      });
-    },
-    [sessionId, pushSystem, submitUserMessage],
-  );
-
-  const send = useCallback(async () => {
-    const text = draft.trim();
-    if (!text || busy || !sessionId) return;
-
-    setDraft("");
-    if (!text.startsWith("/") && GOOD_VIBES_RE.test(text)) {
-      setGoodVibesTick((v) => v + 1);
-    }
-    await (text.startsWith("/") ? submitSlash(text) : submitUserMessage(text));
-  }, [busy, draft, sessionId, submitSlash, submitUserMessage]);
-
-  const interrupt = useCallback(() => {
-    gwRef.current
-      ?.request("session.interrupt", { session_id: sessionId })
-      .catch(() => {
-        /* resync on next status event */
-      });
-  }, [sessionId]);
-
-  /* ---------------------------------------------------------------- */
-  /*  Render                                                           */
-  /* ---------------------------------------------------------------- */
-
-  const canSend =
-    connState === "open" && !!sessionId && !busy && draft.trim().length > 0;
-  const canPickModel = connState === "open" && !!sessionId;
-  const placeholder =
-    connState !== "open"
-      ? "waiting for gateway…"
-      : busy
-        ? "agent is running — press Interrupt to stop, or queue a follow-up"
-        : "message hermes… (Enter to send, Shift+Enter for newline, / for commands)";
-
-  return (
-    // Opt out of the App root's `font-mondwest uppercase` — the dashboard
-    // uses pixel-display caps for chrome, but chat prose needs readable
-    // mixed-case. `font-courier` matches the terminal aesthetic without
-    // fighting the rest of the app's typography.
-    <div className="flex flex-col gap-4 h-[calc(100vh-8rem)] font-courier normal-case">
-      <header className="flex flex-wrap items-center gap-2 justify-between">
-        <div className="flex items-center gap-2 flex-wrap">
-          <Badge className={STATE_TONE[connState]}>
-            <span className="mr-1 h-1.5 w-1.5 rounded-full bg-current inline-block" />
-            {STATE_LABEL[connState]}
-          </Badge>
-
-          <GoodVibesHeart tick={goodVibesTick} />
-
-          <ModelBadge
-            model={sessionInfo?.model}
-            enabled={canPickModel}
-            onClick={() => setModelPickerOpen(true)}
-          />
-
-          {sessionId && (
-            <button
-              onClick={() =>
-                navigator.clipboard?.writeText(sessionId).catch(() => {})
-              }
-              className="inline-flex items-center gap-1 font-mono text-[0.7rem] text-muted-foreground hover:text-foreground transition-colors cursor-pointer"
-              title="Copy session id"
-            >
-              <Copy className="h-3 w-3" />
-              {sessionId}
-            </button>
-          )}
-        </div>
-
-        <div className="flex items-center gap-2">
-          {busy && (
-            <Button onClick={interrupt} variant="outline" size="sm">
-              <Square className="h-3 w-3 mr-1" fill="currentColor" />
-              Interrupt
-            </Button>
-          )}
-
-          <Button onClick={bootstrap} variant="ghost" size="sm">
-            <RefreshCw className="h-3 w-3 mr-1" />
-            Reset session
-          </Button>
-        </div>
-      </header>
-
-      {connectError && (
-        <Card className="p-3 border-destructive/50 bg-destructive/5 text-sm flex items-start gap-2">
-          <AlertCircle className="h-4 w-4 mt-0.5 shrink-0 text-destructive" />
-          <div>
-            <div className="font-medium text-destructive">
-              Can't connect to gateway
-            </div>
-            <div className="text-muted-foreground text-xs mt-0.5">
-              {connectError}
-            </div>
-          </div>
-        </Card>
-      )}
-
-      <Card className="flex-1 min-h-0 overflow-hidden flex flex-col">
-        <div className="flex-1 overflow-y-auto p-4 sm:p-6 space-y-3">
-          {entries.length === 0 && !connectError && (
-            <EmptyState connState={connState} cwd={sessionInfo?.cwd} />
-          )}
-
-          {entries.map((entry) =>
-            entry.kind === "tool" ? (
-              <ToolCall key={entry.id} tool={entry} />
-            ) : (
-              <MessageRow key={entry.id} message={entry} />
-            ),
-          )}
-
-          {runtimeError && (
-            <div className="flex items-start gap-2 text-xs text-destructive">
-              <AlertCircle className="h-3.5 w-3.5 mt-0.5 shrink-0" />
-              <span>{runtimeError}</span>
-            </div>
-          )}
-
-          <div ref={transcriptEndRef} />
-        </div>
-
-        <div className="border-t border-border p-3 sm:p-4 relative">
-          <SlashPopover
-            ref={slashRef}
-            input={draft}
-            gw={gwRef.current}
-            onApply={(next) => {
-              setDraft(next);
-              textareaRef.current?.focus();
-            }}
-          />
-
-          <div className="flex items-stretch overflow-hidden rounded-md border border-border bg-background/40 transition-colors focus-within:border-foreground/30 focus-within:bg-background/60 focus-within:ring-1 focus-within:ring-foreground/20">
-            <textarea
-              ref={textareaRef}
-              value={draft}
-              onChange={(e) => setDraft(e.target.value)}
-              onKeyDown={(e) => {
-                if (slashRef.current?.handleKey(e)) return;
-                if (
-                  e.key === "Enter" &&
-                  !e.shiftKey &&
-                  !e.nativeEvent.isComposing
-                ) {
-                  e.preventDefault();
-                  send();
-                }
-              }}
-              placeholder={placeholder}
-              rows={1}
-              className="flex-1 resize-none bg-transparent px-3.5 py-2.5 text-sm leading-relaxed placeholder:text-muted-foreground/50 focus:outline-none min-h-[40px] max-h-[200px] disabled:opacity-50"
-              style={{ fieldSizing: "content" } as React.CSSProperties}
-              disabled={connState !== "open"}
-            />
-
-            <button
-              type="button"
-              onClick={send}
-              disabled={!canSend}
-              aria-label="Send message"
-              className="shrink-0 w-11 flex items-center justify-center border-l border-border bg-foreground/90 text-background transition-colors cursor-pointer hover:bg-foreground active:bg-foreground/80 disabled:opacity-30 disabled:cursor-not-allowed disabled:hover:bg-foreground/90"
-            >
-              <Send className="h-4 w-4" />
-            </button>
-          </div>
-        </div>
-      </Card>
-
-      {modelPickerOpen && gwRef.current && (
-        <ModelPickerDialog
-          gw={gwRef.current}
-          sessionId={sessionId}
-          onClose={() => setModelPickerOpen(false)}
-          onSubmit={submitSlash}
-        />
-      )}
-    </div>
-  );
-}
-
-/* ------------------------------------------------------------------ */
-/*  Subcomponents                                                      */
-/* ------------------------------------------------------------------ */
-
-/**
- * Port of ui-tui's GoodVibesHeart — a ♥ glows for 650ms in a random palette
- * colour every time the user says something kind. Same regex, same beat, just
- * rendered via a Lucide icon instead of an Ink Text node.
- */
-function GoodVibesHeart({ tick }: { tick: number }) {
-  const [active, setActive] = useState(false);
-  const [color, setColor] = useState(HEART_COLORS[0]);
-
-  useEffect(() => {
-    if (tick <= 0) return;
-    setColor(HEART_COLORS[Math.floor(Math.random() * HEART_COLORS.length)]);
-    setActive(true);
-    const id = setTimeout(() => setActive(false), 650);
-    return () => clearTimeout(id);
-  }, [tick]);
-
-  return (
-    <Heart
-      aria-hidden
-      className={`h-4 w-4 transition-all duration-300 ${
-        active ? "scale-125 opacity-100" : "scale-75 opacity-0"
-      }`}
-      fill={active ? color : "none"}
-      style={{ color }}
-    />
-  );
-}
-
-function ModelBadge({
-  model,
-  enabled,
-  onClick,
-}: {
-  model: string | undefined;
-  enabled: boolean;
-  onClick(): void;
-}) {
-  const hasModel = !!model;
-  const className = hasModel
-    ? "inline-flex items-center gap-1 rounded-md border border-border bg-muted/40 px-2 py-0.5 font-mono text-[0.7rem] hover:bg-muted hover:border-foreground/30 transition-colors cursor-pointer disabled:opacity-60 disabled:cursor-not-allowed"
-    : "inline-flex items-center gap-1 rounded-md border border-dashed border-border px-2 py-0.5 font-mono text-[0.7rem] text-muted-foreground hover:text-foreground hover:border-foreground/30 transition-colors cursor-pointer disabled:opacity-60 disabled:cursor-not-allowed";
-
-  return (
-    <button
-      type="button"
-      onClick={() => enabled && onClick()}
-      disabled={!enabled}
-      title="Click to switch model (same as /model)"
-      className={className}
-    >
-      {hasModel ? (
-        <>
-          <span>{model}</span>
-          <ChevronDown className="h-3 w-3 text-muted-foreground" />
-        </>
-      ) : (
-        <>
-          <ChevronDown className="h-3 w-3" />
-          pick model
-        </>
-      )}
-    </button>
-  );
-}
-
-function EmptyState({
-  connState,
-  cwd,
-}: {
-  connState: ConnectionState;
-  cwd: string | undefined;
-}) {
-  const ready = connState === "open";
-
-  return (
-    <div className="h-full flex items-center justify-center text-center px-4">
-      <div className="max-w-md space-y-4">
-        <div className="text-base text-foreground/80">
-          {ready ? (
-            <>
-              hermes is ready
-              <span className="ml-0.5 inline-block w-1.5 h-4 bg-foreground/60 align-middle animate-pulse" />
-            </>
-          ) : (
-            "connecting to gateway…"
-          )}
-        </div>
-
-        <div className="text-xs text-muted-foreground/70 leading-relaxed">
-          same agent, same tools — served over a socket.
-        </div>
-
-        <div className="flex flex-wrap justify-center items-center gap-1.5 text-[0.7rem] text-muted-foreground/60 pt-1">
-          <span>type</span>
-          <kbd className="rounded border border-border bg-muted/40 px-1.5 py-0.5 font-mono">
-            /
-          </kbd>
-          <span>for slash commands,</span>
-          <kbd className="rounded border border-border bg-muted/40 px-1.5 py-0.5 font-mono">
-            Enter
-          </kbd>
-          <span>to send</span>
-        </div>
-
-        {cwd && (
-          <div className="pt-2 font-mono text-[0.65rem] text-muted-foreground/40 truncate">
-            cwd · {cwd}
-          </div>
-        )}
-      </div>
-    </div>
-  );
-}
-
-function MessageRow({ message }: { message: TextMessage }) {
-  if (message.role === "user") {
-    return (
-      <div className="flex justify-end">
-        <div className="max-w-[80%] rounded-lg bg-primary text-primary-foreground px-3 py-2 whitespace-pre-wrap text-sm">
-          {message.text}
-        </div>
-      </div>
-    );
-  }
-
-  if (message.role === "system") {
-    return (
-      <div className="flex justify-center">
-        <div className="max-w-full rounded-md border border-dashed border-border bg-muted/20 px-3 py-1.5 text-xs text-muted-foreground font-mono whitespace-pre-wrap">
-          {message.text}
-        </div>
-      </div>
-    );
-  }
-
-  return (
-    <div className="flex justify-start">
-      <div
-        className={`max-w-[85%] rounded-lg border px-3.5 py-2.5 ${
-          message.error
-            ? "border-destructive/50 bg-destructive/5"
-            : "border-border bg-muted/30"
-        }`}
-      >
-        {message.text ? (
-          <Markdown content={message.text} streaming={message.streaming} />
-        ) : (
-          <span className="inline-flex items-center gap-1 text-muted-foreground text-sm italic">
-            thinking…
-            {message.streaming && (
-              <span
-                aria-hidden
-                className="inline-block w-[0.5em] h-[1em] align-[-0.15em] bg-foreground/50 animate-pulse"
-              />
-            )}
-          </span>
-        )}
-      </div>
-    </div>
-  );
-}
-
-/* ------------------------------------------------------------------ */
-/*  Hydration                                                          */
-/* ------------------------------------------------------------------ */
-
-function hydrateMessages(list: HydratedMessage[]): ChatEntry[] {
-  return list.map(
-    (m, i): ChatEntry =>
-      m.role === "tool"
-        ? {
-            kind: "tool",
-            id: `h-tool-${i}`,
-            tool_id: `h-tool-${i}`,
-            name: m.name ?? "tool",
-            context: m.context || undefined,
-            status: "done",
-            // Historical — no reliable timestamps in the hydrated payload.
-            startedAt: 0,
-          }
-        : {
-            kind: "message",
-            id: `h-msg-${i}`,
-            role: m.role,
-            text: m.text ?? "",
-          },
-  );
-}
@@ -1,11 +1,9 @@
 import { useEffect, useState, useCallback, useRef } from "react";
-import { useNavigate } from "react-router-dom";
 import {
  ChevronDown,
  ChevronLeft,
  ChevronRight,
  MessageSquare,
-  Play,
  Search,
  Trash2,
  Clock,
@@ -240,7 +238,6 @@ function SessionRow({
  isExpanded,
  onToggle,
  onDelete,
-  onOpen,
 }: {
  session: SessionInfo;
  snippet?: string;
@@ -248,7 +245,6 @@ function SessionRow({
  isExpanded: boolean;
  onToggle: () => void;
  onDelete: () => void;
-  onOpen: () => void;
 }) {
  const [messages, setMessages] = useState<SessionMessage[] | null>(null);
  const [loading, setLoading] = useState(false);
@@ -333,19 +329,6 @@ function SessionRow({
          <Badge variant="outline" className="text-[10px]">
            {session.source ?? "local"}
          </Badge>
-          <Button
-            variant="ghost"
-            size="icon"
-            className="h-7 w-7 text-muted-foreground hover:text-primary"
-            aria-label="Open in chat"
-            title="Open in chat"
-            onClick={(e) => {
-              e.stopPropagation();
-              onOpen();
-            }}
-          >
-            <Play className="h-3.5 w-3.5" />
-          </Button>
          <Button
            variant="ghost"
            size="icon"
@@ -363,12 +346,6 @@ function SessionRow({

      {isExpanded && (
        <div className="border-t border-border bg-background/50 p-4">
-          <div className="flex items-center justify-end pb-3">
-            <Button size="sm" variant="outline" onClick={onOpen}>
-              <Play className="h-3 w-3 mr-1.5" />
-              Open in chat
-            </Button>
-          </div>
          {loading && (
            <div className="flex items-center justify-center py-8">
              <div className="h-5 w-5 animate-spin rounded-full border-2 border-primary border-t-transparent" />
@@ -405,14 +382,6 @@ export default function SessionsPage() {
  const [searching, setSearching] = useState(false);
  const debounceRef = useRef<ReturnType<typeof setTimeout>>(null);
  const { t } = useI18n();
-  const navigate = useNavigate();
-
-  const handleOpen = useCallback(
-    (id: string) => {
-      navigate(`/chat?resume=${encodeURIComponent(id)}`);
-    },
-    [navigate],
-  );

  const loadSessions = useCallback((p: number) => {
    setLoading(true);
@@ -548,7 +517,6 @@ export default function SessionsPage() {
                  setExpandedId((prev) => (prev === s.id ? null : s.id))
                }
                onDelete={() => handleDelete(s.id)}
-                onOpen={() => handleOpen(s.id)}
              />
            ))}
          </div>
@@ -64,11 +64,7 @@ export default defineConfig({
  },
  server: {
    proxy: {
-      // REST endpoints + the /api/ws WebSocket (ws: true enables upgrade forwarding).
-      "/api": {
-        target: BACKEND,
-        ws: true,
-      },
+      "/api": BACKEND,
    },
  },
 });
Author	SHA1	Message	Date
Ari Lotter	2f230b5ad9	feat: add fast-path setup for nous account adds a nous account specific fast flow & autolaunches into chat if gateway isn't set up	2026-04-24 00:07:23 -04:00
Ari Lotter	bdc9b07c9d	change: always run setup on no-config run there's instructions on how to exit & do it manually, no point in asking	2026-04-24 00:06:48 -04:00
brooklyn!	6fdbf2f2d7	Merge pull request #14820 from NousResearch/bb/tui-at-fuzzy-match fix(tui): @<name> fuzzy-matches filenames across the repo	2026-04-23 19:40:43 -05:00
Brooklyn Nicholson	0a679cb7ad	fix(tui): restore voice/panic handlers + scope fuzzy paths to cwd Two fixes on top of the fuzzy-@ branch: (1) Rebase artefact: re-apply only the fuzzy additions on top of fresh `tui_gateway/server.py`. The earlier commit was cut from a base 58 commits behind main and clobbered ~170 lines of voice.toggle / voice.record handlers and the gateway crash hooks (`_panic_hook`, `_thread_panic_hook`). Reset server.py to origin/main and re-add only: - `_FUZZY_*` constants + `_list_repo_files` + `_fuzzy_basename_rank` - the new fuzzy branch in the `complete.path` handler (2) Path scoping (Copilot review): `git ls-files` returns repo-root- relative paths, but completions need to resolve under the gateway's cwd. When hermes is launched from a subdirectory, the previous code surfaced `@file:apps/web/src/foo.tsx` even though the agent would resolve that relative to `apps/web/` and miss. Fix: - `git -C root rev-parse --show-toplevel` to get repo top - `git -C top ls-files …` for the listing - `os.path.relpath(top + p, root)` per result, dropping anything starting with `../` so the picker stays scoped to cwd-and-below (matches Cmd-P workspace semantics) `apps/web/src/foo.tsx` ends up as `@file:src/foo.tsx` from inside `apps/web/`, and sibling subtrees + parent-of-cwd files don't leak. New test `test_fuzzy_paths_relative_to_cwd_inside_subdir` builds a 3-package mono-repo, runs from `apps/web/`, and verifies completion paths are subtree-relative + outside-of-cwd files don't appear. Copilot review threads addressed: #3134675504 (path scoping), #3134675532 (`voice.toggle` regression), #3134675541 (`voice.record` regression — both were stale-base artefacts, not behavioural changes).	2026-04-23 19:38:33 -05:00
Brooklyn Nicholson	41b4d69167	Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/tui-at-fuzzy-match	2026-04-23 19:35:18 -05:00
brooklyn!	3f343cf7cf	Merge pull request #14822 from NousResearch/bb/tui-inline-diff-segment-anchor fix(tui): anchor inline_diff to the segment where the edit happened	2026-04-23 19:32:21 -05:00
Brooklyn Nicholson	4ae5b58cb1	fix(tui): restore voice handlers + address copilot review Rebase-artefact cleanup on this branch: - Restore `voice.status` and `voice.transcript` cases in createGatewayEventHandler plus the `voice` / `submission` / `composer.setInput` ctx destructuring. They were added to main in the 58-commit gap that this branch was originally cut behind; dropping them was unintentional. - Rebase the test ctx shape to match main (voice.* fakes, submission.submitRef, composer.setInput) and apply the same segment-anchor test rewrites on top. - Drop the `#14XXX` placeholder from the tool.complete comment; replace with a plain-English rationale. - Rewrite the broken mid-word "pushInlineDiff- Segment" in turnController's dedupe comment to refer to pushInlineDiffSegment and `kind: 'diff'` plainly. - Collapse the filter predicate in recordMessageComplete from a 4-line if/return into one boolean expression — same semantics, reads left-to-right as a single predicate. Copilot review threads resolved: #3134668789, #3134668805, #3134668822.	2026-04-23 19:22:41 -05:00
Brooklyn Nicholson	2258a181f0	fix(tui): give inline_diff segments blank-line breathing room Visual polish on top of the segment-anchor change: diff blocks were butting up against the narration around them. Tag diff-only segments with `kind: 'diff'` (extended on Msg) and give them `marginTop={1}` + `marginBottom={1}` in MessageLine, matching the spacing we already use for user messages. Also swaps the regex-based `diffSegmentBody` check for an explicit `kind === 'diff'` guard so the dedupe path is clearer.	2026-04-23 19:11:59 -05:00
Brooklyn Nicholson	11b2942f16	fix(tui): anchor inline_diff to the segment where the edit happened Revisits #13729. That PR buffered each `tool.complete`'s inline_diff and merged them into the final assistant message body as a fenced ```diff block. The merge-at-end placement reads as "the agent wrote this after the summary", even when the edit fired mid-turn — which is both misleading and (per blitz feedback) feels like noise tacked onto the end of every task. Segment-anchored placement instead: - On tool.complete with inline_diff, `pushInlineDiffSegment` calls `flushStreamingSegment` first (so any in-progress narration lands as its own segment), then pushes the ```diff block as its own segment into segmentMessages. The diff is now anchored BETWEEN the narration that preceded the edit and whatever the agent streams afterwards, which is where the edit actually happened. - `recordMessageComplete` no longer merges buffered diffs. The only remaining dedupe is "drop diff-only segments whose body the final assistant text narrates verbatim (or whose diff fence the final text already contains)" — same tradeoff as before, kept so an agent that narrates its own diff doesn't render two stacked copies. - Drops `pendingInlineDiffs` and `queueInlineDiff` — buffer + end- merge machinery is gone; segmentMessages is now the only source of truth. Side benefit: Ctrl+C interrupt (`interruptTurn`) iterates segmentMessages, so diff segments are now preserved in the transcript when the user cancels after an edit. Previously the pending buffer was silently dropped on interrupt. Reported by Teknium during blitz usage: "no diffs are ever at the end because it didn't make this file edit after the final message".	2026-04-23 19:02:44 -05:00
Brooklyn Nicholson	b08cbc7a79	fix(tui): @<name> fuzzy-matches filenames across the repo Typing `@appChrome` in the composer should surface `ui-tui/src/components/appChrome.tsx` without requiring the user to first type the full directory path — matches the Cmd-P behaviour users expect from modern editors. The gateway's `complete.path` handler was doing a plain `os.listdir(".")` + `startswith` prefix match, so basenames only resolved inside the current working directory. This reworks it to: - enumerate repo files via `git ls-files -z --cached --others --exclude-standard` (fast, honours `.gitignore`); fall back to a bounded `os.walk` that skips common vendor / build dirs when the working dir isn't a git repo. Results cached per-root with a 5s TTL so rapid keystrokes don't respawn git processes. - rank basenames with a 5-tier scorer: exact → prefix → camelCase / word-boundary → substring → subsequence. Shorter basenames win ties; shorter rel paths break basename-length ties. - only take the fuzzy branch when the query is bare (no `/`), is a context reference (`@...`), and isn't `@folder:` — path-ish queries and folder tags fall through to the existing directory-listing path so explicit navigation intent is preserved. Completion rows now carry `display = basename`, `meta = directory`, so the picker renders `appChrome.tsx ui-tui/src/components` on one row (basename bold, directory dim) — the meta column was previously "dir" / "" and is a more useful signal for fuzzy hits. Reported by Ben Barclay during the TUI v2 blitz test.	2026-04-23 19:01:27 -05:00