feat(gui): make desktop setup flow real and testable

Add a GUI-first setup gate and runtime state API so desktop onboarding is safe, iterative, and works with isolated fresh-mode installs. Scaffold and wire the desktop shell/runtime pieces so this branch runs end-to-end without disturbing existing user installs.
Merge pull request #15766 from NousResearch/bb/tui-ssh-copy
2026-04-25 19:48:02 -05:00 · 2026-04-25 15:33:17 -05:00 · 2026-04-25 15:32:45 -05:00 · 2026-04-25 15:26:51 -05:00 · 2026-04-25 15:21:26 -05:00 · 2026-04-25 15:17:36 -05:00
146 changed files with 19589 additions and 3160 deletions
@@ -1680,9 +1680,9 @@ def build_anthropic_kwargs(

    # ── Strip sampling params on 4.7+ ─────────────────────────────────
    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
-    # Callers (auxiliary_client, flush_memories, etc.) may set these for
-    # older models; drop them here as a safety net so upstream 4.6 → 4.7
-    # migrations don't require coordinated edits everywhere.
+    # Callers (auxiliary_client, etc.) may set these for older models;
+    # drop them here as a safety net so upstream 4.6 → 4.7 migrations
+    # don't require coordinated edits everywhere.
    if _forbids_sampling_params(model):
        for _sampling_key in ("temperature", "top_p", "top_k"):
            kwargs.pop(_sampling_key, None)
@@ -390,7 +390,7 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

-        # Tools support for flush_memories and similar callers
+        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
            converted = []
@@ -1349,6 +1349,49 @@ def _is_auth_error(exc: Exception) -> bool:
    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()


+def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
+    """Detect provider 400s for an unsupported request parameter.
+
+    Different OpenAI-compatible endpoints phrase the same class of error a few
+    ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
+    ``param`` field, ``X is not supported``, ``unknown parameter: X``,
+    ``unrecognized request argument: X``.  We match on both the parameter
+    name and a generic "unsupported/unknown/unrecognized parameter" marker so
+    call sites can reactively retry without the offending key instead of
+    surfacing a noisy auxiliary failure.
+
+    Generalizes the temperature-specific detector that originally shipped
+    with PR #15621 so the same retry strategy can cover ``max_tokens``,
+    ``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
+    for the generalization pattern.
+    """
+    param_lower = (param or "").lower()
+    if not param_lower:
+        return False
+    err_lower = str(exc).lower()
+    if param_lower not in err_lower:
+        return False
+    return any(marker in err_lower for marker in (
+        "unsupported parameter",
+        "unsupported_parameter",
+        "not supported",
+        "does not support",
+        "unknown parameter",
+        "unrecognized request argument",
+        "unrecognized parameter",
+        "invalid parameter",
+    ))
+
+
+def _is_unsupported_temperature_error(exc: Exception) -> bool:
+    """Back-compat wrapper: detect API errors where the model rejects ``temperature``.
+
+    Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
+    public symbol because existing tests and call sites import it by name.
+    """
+    return _is_unsupported_parameter_error(exc, "temperature")
+
+
 def _evict_cached_clients(provider: str) -> None:
    """Drop cached auxiliary clients for a provider so fresh creds are used."""
    normalized = _normalize_aux_provider(provider)
@@ -2760,8 +2803,8 @@ def _build_call_kwargs(
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
-    # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
-    # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
+    # drop here so auxiliary callers that hardcode temperature (e.g. 0 on
+    # structured-JSON extraction) don't 400 the moment
    # the aux model is flipped to 4.7.
    if temperature is not None:
        from agent.anthropic_adapter import _forbids_sampling_params
@@ -2849,7 +2892,7 @@ def call_llm(

    Args:
        task: Auxiliary task name ("compression", "vision", "web_extract",
-              "session_search", "skills_hub", "mcp", "flush_memories").
+              "session_search", "skills_hub", "mcp", "title_generation").
              Reads provider:model from config/env. Ignored if provider is set.
        provider: Explicit provider override.
        model: Explicit model override.
@@ -2952,13 +2995,45 @@ def call_llm(
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

-    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
+    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
+    # then payment fallback.
    try:
        return _validate_llm_response(
            client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s: provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                # If retry still fails, fall through to the max_tokens /
+                # payment / auth chains below using the temperature-stripped
+                # kwargs.  Re-raise only if the retry hit something those
+                # chains won't handle.
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -3221,8 +3296,35 @@ async def async_call_llm(
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s (async): provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    await client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -44,22 +44,31 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
 # Multimodal content helpers
 # ---------------------------------------------------------------------------

-def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
    """Convert chat-style multimodal content to Responses API input parts.

    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+    Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
+
+    The ``role`` parameter controls the text content type:
+    - ``"user"`` (default) → ``"input_text"``
+    - ``"assistant"`` → ``"output_text"``
+
+    The Responses API rejects ``input_text`` inside assistant messages and
+    ``output_text`` inside user messages, so callers MUST pass the correct
+    role for the message being converted.

    Returns an empty list when ``content`` is not a list or contains no
    recognized parts — callers fall back to the string path.
    """
+    text_type = "output_text" if role == "assistant" else "input_text"
    if not isinstance(content, list):
        return []
    converted: List[Dict[str, Any]] = []
    for part in content:
        if isinstance(part, str):
            if part:
-                converted.append({"type": "input_text", "text": part})
+                converted.append({"type": text_type, "text": part})
            continue
        if not isinstance(part, dict):
            continue
@@ -67,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
        if ptype in {"text", "input_text", "output_text"}:
            text = part.get("text")
            if isinstance(text, str) and text:
-                converted.append({"type": "input_text", "text": text})
+                converted.append({"type": text_type, "text": text})
            continue
        if ptype in {"image_url", "input_image"}:
            image_ref = part.get("image_url")
@@ -233,9 +242,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
        if role in {"user", "assistant"}:
            content = msg.get("content", "")
            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content)
+                content_parts = _chat_content_to_responses_parts(content, role=role)
+                text_type = "output_text" if role == "assistant" else "input_text"
                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                    p.get("text", "") for p in content_parts if p.get("type") == text_type
                )
            else:
                content_parts = []
@@ -429,13 +439,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                content = ""
            if isinstance(content, list):
                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``input_image``).
-                # Validate each part and pass through.
+                # is already in Responses format (``input_text`` / ``output_text``
+                # / ``input_image``).  Validate each part and pass through.
+                # Use the correct text type for the role — ``output_text`` for
+                # assistant messages, ``input_text`` for user messages.
+                text_type = "output_text" if role == "assistant" else "input_text"
                validated: List[Dict[str, Any]] = []
                for part_idx, part in enumerate(content):
                    if isinstance(part, str):
                        if part:
-                            validated.append({"type": "input_text", "text": part})
+                            validated.append({"type": text_type, "text": part})
                        continue
                    if not isinstance(part, dict):
                        raise ValueError(
@@ -446,7 +459,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                        text = part.get("text", "")
                        if not isinstance(text, str):
                            text = str(text or "")
-                        validated.append({"type": "input_text", "text": text})
+                        validated.append({"type": text_type, "text": text})
                    elif ptype in {"input_image", "image_url"}:
                        image_ref = part.get("image_url", "")
                        detail = part.get("detail")
@@ -318,6 +318,13 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        # Recalculate token budgets for the new context length so the
+        # compressor stays calibrated after a model switch (e.g. 200K → 32K).
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
+        self.tail_token_budget = target_tokens
+        self.max_summary_tokens = min(
+            int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+        )

    def __init__(
        self,
@@ -0,0 +1,58 @@
+# Hermes Apps
+
+Platform apps live here. The first app is a cross-platform GUI shell around the
+existing Hermes dashboard; it should not fork chat, config, logs, or session UI.
+
+## Shape
+
+```text
+apps/
+  gui/      # cross-platform app shell: dev Chrome shell now, Tauri native next
+  shared/   # runtime bundle notes/scripts used by Windows + macOS packaging
+```
+
+## Desktop Dev
+
+The backend-only GUI mode is:
+
+```bash
+hermes dashboard --gui
+```
+
+The fast GUI shell is:
+
+```powershell
+cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
+npm run dev
+```
+
+The native Tauri shell is:
+
+```powershell
+cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
+npm run dev:tauri
+```
+
+`--gui` implies the embedded TUI; do not pass `--tui` separately for GUI mode.
+
+## MVP Boundary
+
+Included:
+
+- bundled Python runtime
+- bundled Node/TUI runtime
+- CLI install to PATH
+- profile picker and first-run setup
+- dashboard health/reconnect state
+- tray controls
+- desktop notifications
+- Windows installer
+
+Deferred:
+
+- code signing
+- native self-updater
+- store distribution
+
+For MVP updates, the desktop UI should run the existing `hermes update` flow and
+surface progress/finish notifications.
@@ -0,0 +1,102 @@
+# Hermes GUI
+
+Cross-platform GUI shell for the Hermes dashboard.
+
+## Fast Dev Shell
+
+This gets a GUI window on Windows/WSL today by launching Chrome in app mode:
+
+```bash
+cd apps/gui
+npm run dev
+```
+
+It starts `hermes dashboard --gui --no-open --port 9120`, waits for
+`/api/health`, then opens a standalone app window at `http://127.0.0.1:9120`.
+
+## Native Shell
+
+The native Tauri shell is still scaffolded:
+
+```bash
+cd apps/gui
+npm run dev:tauri
+```
+
+From Windows PowerShell on a `\\wsl$` path, use PowerShell `npm`, not
+`npm.cmd`:
+
+```powershell
+Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
+cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
+npm run dev:tauri
+```
+
+`npm.cmd` goes through `cmd.exe`, and `cmd.exe` cannot use UNC paths as the
+current directory.
+
+If `npm run` still falls through `cmd.exe`, bypass npm entirely:
+
+```powershell
+\\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1
+```
+
+The launcher builds into `%LOCALAPPDATA%\Hermes\cargo-target\gui` instead of
+`\\wsl$` because Windows Cargo incremental locks do not work reliably on UNC
+WSL filesystems.
+
+In dev, either start Hermes yourself:
+
+```bash
+hermes dashboard --gui --no-open --port 9120
+```
+
+or let the native shell start it. The tray menu owns:
+
+- Open Hermes
+- Open in Browser
+- Restart Hermes Runtime
+- Quit Hermes
+
+The native shell reuses a healthy GUI runtime when one is already running.
+Otherwise it picks the first free port from `9120..9139`, passes that port into
+the WSL/backend process, and navigates the Tauri window there. Set
+`HERMES_GUI_PORT` to force a starting port.
+
+## Fresh Install Emulation
+
+Use an isolated Hermes home without touching your real `~/.hermes`:
+
+```powershell
+powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh
+```
+
+Reset that disposable home and run again:
+
+```powershell
+powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh -ResetFresh
+```
+
+Fresh mode stores state in `%LOCALAPPDATA%\Hermes\fresh-install-home` and starts
+from port `9140` so it does not collide with your normal GUI dev session.
+
+Set `HERMES_GUI_MIN_SPLASH_MS` only when debugging the startup screen; default
+startup is instant once the backend is healthy.
+
+## Boundary
+
+GUI owns:
+
+- app shell/window
+- startup state
+- sidecar process lifecycle
+- future tray/notifications/installers
+
+Hermes owns:
+
+- dashboard UI
+- auth/session token
+- profiles/config/env
+- TUI/PTT chat bridge
+- tools/skills/gateway
+- update flow
@@ -0,0 +1,57 @@
+param(
+  [string]$Command = "dev",
+  [switch]$Fresh,
+  [switch]$ResetFresh
+)
+
+$ErrorActionPreference = "Stop"
+Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
+
+$AppRoot = Split-Path -Parent $MyInvocation.MyCommand.Path
+$Script = Join-Path $AppRoot "scripts\tauri.mjs"
+
+if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
+  throw "Windows Node.js was not found. Install it with: winget install OpenJS.NodeJS.LTS"
+}
+
+if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {
+  throw "Windows Rust was not found. Install it with: winget install Rustlang.Rustup"
+}
+
+$Tauri = Get-Command tauri -ErrorAction SilentlyContinue
+$CargoTauri = Get-Command cargo-tauri -ErrorAction SilentlyContinue
+
+if (-not $Tauri -and -not $CargoTauri) {
+  throw "Tauri CLI not found. Install it with: npm install -g @tauri-apps/cli (run from a normal Windows path, not \\wsl$)"
+}
+
+$env:CARGO_INCREMENTAL = "0"
+$env:CARGO_TARGET_DIR = Join-Path $env:LOCALAPPDATA "Hermes\cargo-target\gui"
+New-Item -ItemType Directory -Force -Path $env:CARGO_TARGET_DIR | Out-Null
+
+if ($Fresh) {
+  $FreshHome = Join-Path $env:LOCALAPPDATA "Hermes\fresh-install-home"
+  if ($ResetFresh -and (Test-Path $FreshHome)) {
+    Remove-Item -Recurse -Force $FreshHome
+  }
+  New-Item -ItemType Directory -Force -Path $FreshHome | Out-Null
+  $env:HERMES_HOME = $FreshHome
+  $env:HERMES_GUI_PORT = "9140"
+  $env:HERMES_GUI_FRESH = "1"
+  Write-Host "Fresh GUI mode"
+  Write-Host "  HERMES_HOME=$FreshHome"
+  Write-Host "  HERMES_GUI_PORT=$env:HERMES_GUI_PORT"
+}
+
+Push-Location $AppRoot
+try {
+  if ($Tauri) {
+    & tauri $Command
+  }
+  else {
+    & cargo tauri $Command
+  }
+}
+finally {
+  Pop-Location
+}
@@ -0,0 +1,13 @@
+{
+  "name": "@hermes/gui",
+  "version": "0.0.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "node scripts/dev-shell.mjs",
+    "dev:tauri": "node scripts/tauri.mjs dev",
+    "build": "node scripts/tauri.mjs build",
+    "dashboard": "node scripts/start-dashboard.mjs",
+    "tauri": "node scripts/tauri.mjs"
+  }
+}
@@ -0,0 +1,156 @@
+import { spawn, spawnSync } from "node:child_process";
+import { createServer } from "node:net";
+import { dirname, resolve } from "node:path";
+import { setTimeout as delay } from "node:timers/promises";
+import { fileURLToPath } from "node:url";
+
+const here = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(here, "../../..");
+const python = process.env.HERMES_PYTHON || "python";
+let port = process.env.HERMES_GUI_PORT || "9120";
+let url = `http://127.0.0.1:${port}`;
+
+let dashboard = null;
+
+function stop() {
+  if (dashboard && !dashboard.killed) dashboard.kill();
+}
+
+process.on("SIGINT", () => {
+  stop();
+  process.exit(130);
+});
+process.on("SIGTERM", () => {
+  stop();
+  process.exit(143);
+});
+process.on("exit", stop);
+
+async function waitForHealth() {
+  for (let i = 0; i < 120; i += 1) {
+    if (await isHealthy()) return true;
+    await delay(500);
+  }
+  return false;
+}
+
+async function isHealthy() {
+  try {
+    const res = await fetch(`${url}/api/health`, {
+      signal: AbortSignal.timeout(1000),
+    });
+    const data = await res.json();
+    return res.ok && data.status === "ok";
+  } catch {
+    return false;
+  }
+}
+
+function canBind(candidate) {
+  return new Promise((resolveBind) => {
+    const server = createServer();
+    server.once("error", () => resolveBind(false));
+    server.listen(Number(candidate), "127.0.0.1", () => {
+      server.close(() => resolveBind(true));
+    });
+  });
+}
+
+async function choosePort() {
+  if (process.env.HERMES_GUI_PORT) return;
+
+  let candidate = Number(port);
+  for (let i = 0; i < 20; i += 1) {
+    if (await canBind(candidate)) {
+      port = String(candidate);
+      url = `http://127.0.0.1:${port}`;
+      return;
+    }
+    candidate += 1;
+  }
+}
+
+function startDashboard() {
+  dashboard = spawn(
+    python,
+    [
+      "-m",
+      "hermes_cli.main",
+      "dashboard",
+      "--gui",
+      "--no-open",
+      "--host",
+      "127.0.0.1",
+      "--port",
+      port,
+    ],
+    {
+      cwd: repoRoot,
+      env: {
+        ...process.env,
+        HERMES_GUI: "1",
+      },
+      stdio: "inherit",
+    },
+  );
+
+  dashboard.on("exit", (code) => {
+    process.exit(code ?? 0);
+  });
+}
+
+function run(command, args) {
+  return (
+    spawnSync(command, args, {
+      shell: process.platform === "win32",
+      stdio: "ignore",
+    }).status === 0
+  );
+}
+
+function openGuiWindow() {
+  if (process.platform === "win32") {
+    return (
+      run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", url])
+    );
+  }
+
+  if (process.env.WSL_DISTRO_NAME) {
+    return (
+      run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", url])
+    );
+  }
+
+  if (process.platform === "darwin") {
+    return (
+      run("open", ["-na", "Google Chrome", "--args", `--app=${url}`]) ||
+      run("open", [url])
+    );
+  }
+
+  return (
+    run("google-chrome", [`--app=${url}`]) ||
+    run("chromium", [`--app=${url}`]) ||
+    run("xdg-open", [url])
+  );
+}
+
+if (await isHealthy()) {
+  console.log(`Hermes GUI already running -> ${url}`);
+  openGuiWindow();
+  process.exit(0);
+}
+
+await choosePort();
+startDashboard();
+
+if (await waitForHealth()) {
+  console.log(`Hermes GUI -> ${url}`);
+  openGuiWindow();
+} else {
+  console.error(`Hermes GUI did not become healthy at ${url}`);
+}
@@ -0,0 +1,95 @@
+import { spawn } from "node:child_process";
+import { dirname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const here = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(here, "../../..");
+const python = process.env.HERMES_PYTHON || "python";
+const port = process.env.HERMES_GUI_PORT || "9120";
+const url = `http://127.0.0.1:${port}`;
+
+async function isHealthy() {
+  try {
+    const res = await fetch(`${url}/api/health`, {
+      signal: AbortSignal.timeout(1000),
+    });
+    const data = await res.json();
+    return res.ok && data.status === "ok";
+  } catch {
+    return false;
+  }
+}
+
+function wslRepoRoot() {
+  const normalized = repoRoot.replaceAll("\\", "/");
+  const parts = normalized.split("/");
+  const host = parts[2]?.toLowerCase();
+  if (process.platform !== "win32") return null;
+  if (host !== "wsl$" && host !== "wsl.localhost") return null;
+  const distro = parts[3];
+  const path = `/${parts.slice(4).join("/")}`;
+  return distro && path !== "/" ? { distro, path } : null;
+}
+
+function spawnDashboard() {
+  const wsl = wslRepoRoot();
+  if (wsl) {
+    return spawn(
+      "wsl.exe",
+      [
+        "-d",
+        wsl.distro,
+        "--cd",
+        wsl.path,
+        "env",
+        "HERMES_GUI=1",
+        process.env.HERMES_WSL_PYTHON || "python",
+        "-m",
+        "hermes_cli.main",
+        "dashboard",
+        "--gui",
+        "--no-open",
+        "--host",
+        "127.0.0.1",
+        "--port",
+        port,
+      ],
+      { stdio: "inherit" },
+    );
+  }
+
+  return spawn(
+    python,
+    [
+      "-m",
+      "hermes_cli.main",
+      "dashboard",
+      "--gui",
+      "--no-open",
+      "--host",
+      "127.0.0.1",
+      "--port",
+      port,
+    ],
+    {
+      cwd: repoRoot,
+      env: {
+        ...process.env,
+        HERMES_GUI: "1",
+      },
+      stdio: "inherit",
+    },
+  );
+}
+
+if (await isHealthy()) {
+  console.log(`Hermes GUI already running -> ${url}`);
+  process.exit(0);
+}
+
+const child = spawnDashboard();
+
+child.on("exit", (code, signal) => {
+  if (signal) process.kill(process.pid, signal);
+  process.exit(code ?? 0);
+});
@@ -0,0 +1,90 @@
+import { spawnSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const here = dirname(fileURLToPath(import.meta.url));
+const appRoot = resolve(here, "..");
+const bin = process.platform === "win32" ? "tauri.cmd" : "tauri";
+const localTauri = resolve(appRoot, "node_modules", ".bin", bin);
+const args = process.argv.slice(2);
+
+function isWsl() {
+  return process.platform === "linux" && !!process.env.WSL_DISTRO_NAME;
+}
+
+function quotePs(value) {
+  return `'${value.replaceAll("'", "''")}'`;
+}
+
+function dispatchToWindows() {
+  const pathResult = spawnSync("wslpath", ["-w", appRoot], {
+    encoding: "utf8",
+  });
+  const windowsPath = pathResult.stdout.trim();
+  if (!windowsPath) return false;
+
+  const command = [
+    "$ErrorActionPreference = 'Stop'",
+    "Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
+    "if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {",
+    '  Write-Error "Windows npm was not found. Install Windows Node.js first: winget install OpenJS.NodeJS.LTS"',
+    "}",
+    "if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {",
+    '  Write-Error "Windows Rust was not found. Install Rust first: winget install Rustlang.Rustup"',
+    "}",
+    `Set-Location -LiteralPath ${quotePs(windowsPath)}`,
+    "& npm run dev:tauri",
+  ].join("; ");
+  const result = spawnSync(
+    "powershell.exe",
+    ["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command],
+    { stdio: "inherit" },
+  );
+  process.exit(result.status ?? 1);
+}
+
+function run(command, commandArgs, { exit = true } = {}) {
+  if (process.platform === "win32") {
+    const psCommand = [
+      "$ErrorActionPreference = 'Stop'",
+      "Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
+      `Set-Location -LiteralPath ${quotePs(appRoot)}`,
+      `& ${quotePs(command)} ${commandArgs.map(quotePs).join(" ")}`,
+    ].join("; ");
+    const result = spawnSync(
+      "powershell.exe",
+      ["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", psCommand],
+      { stdio: "inherit" },
+    );
+    if (result.error && result.error.code === "ENOENT") return false;
+    if (exit) process.exit(result.status ?? 1);
+    return result.status === 0;
+  }
+
+  const result = spawnSync(command, commandArgs, {
+    cwd: appRoot,
+    env: process.env,
+    stdio: "inherit",
+  });
+
+  if (result.error && result.error.code === "ENOENT") return false;
+  if (exit) process.exit(result.status ?? 1);
+  return result.status === 0;
+}
+
+if (isWsl() && process.env.HERMES_GUI_TAURI_WSL !== "1") {
+  console.log("Launching native Windows Tauri from WSL...");
+  dispatchToWindows();
+  console.error(
+    "Could not hand off to Windows PowerShell. Run this from Windows PowerShell instead:",
+  );
+  console.error("  cd \\\\wsl$\\Ubuntu\\home\\bb\\hermes-agent\\apps\\gui");
+  console.error("  npm run dev:tauri");
+  process.exit(1);
+}
+
+if (existsSync(localTauri)) run(localTauri, args);
+if (run("tauri", args, { exit: false })) process.exit(0);
+if (run("cargo", ["tauri", ...args], { exit: false })) process.exit(0);
+run("npx", ["--yes", "@tauri-apps/cli@latest", ...args]);
@@ -0,0 +1 @@
+/target/
@@ -0,0 +1,17 @@
+[package]
+name = "hermes-gui"
+version = "0.0.0"
+description = "Hermes GUI shell"
+edition = "2021"
+
+[lib]
+name = "hermes_gui_lib"
+crate-type = ["staticlib", "cdylib", "rlib"]
+
+[build-dependencies]
+tauri-build = { version = "2", features = [] }
+
+[dependencies]
+tauri = { version = "2", features = ["tray-icon"] }
+tauri-plugin-notification = "2"
+tauri-plugin-opener = "2"
@@ -0,0 +1,3 @@
+fn main() {
+    tauri_build::build();
+}
@@ -0,0 +1,7 @@
+{
+  "$schema": "../gen/schemas/desktop-schema.json",
+  "identifier": "default",
+  "description": "Default Hermes GUI permissions",
+  "windows": ["main"],
+  "permissions": ["core:default", "notification:default", "opener:default"]
+}
@@ -0,0 +1 @@
+{"default":{"identifier":"default","description":"Default Hermes GUI permissions","local":true,"windows":["main"],"permissions":["core:default","notification:default","opener:default"]}}
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+  <rect width="100" height="100" rx="18" fill="#071313"/>
+  <text x="50" y="70" text-anchor="middle" font-size="68" fill="#f0e6d2">⚕</text>
+</svg>
@@ -0,0 +1 @@
+
@@ -0,0 +1,433 @@
+use std::{
+    io::{Read, Write},
+    net::{TcpListener, TcpStream},
+    process::{Child, Command, Stdio},
+    sync::Mutex,
+    time::{Duration, Instant},
+};
+
+use tauri::{
+    image::Image,
+    menu::{Menu, MenuItem, PredefinedMenuItem},
+    tray::{MouseButton, MouseButtonState, TrayIconBuilder, TrayIconEvent},
+    App, AppHandle, Manager, WebviewWindow,
+};
+
+const GUI_HOST: &str = "127.0.0.1";
+const DEFAULT_GUI_PORT: u16 = 9120;
+const MIN_SPLASH_MS: u64 = 0;
+const SPLASH_URL: &str = "data:text/html,%3C!doctype%20html%3E%3Cmeta%20charset%3Dutf-8%3E%3Cstyle%3Ebody%7Bmargin%3A0%3Bheight%3A100vh%3Bdisplay%3Agrid%3Bplace-items%3Acenter%3Bbackground%3A%23071313%3Bcolor%3A%23f0e6d2%3Bfont%3A14px%20monospace%3Bletter-spacing%3A.08em%3Btext-transform%3Auppercase%7D%3C%2Fstyle%3E%3Cbody%3EStarting%20Hermes%E2%80%A6%3C%2Fbody%3E";
+
+struct GuiState {
+    child: Mutex<Option<Child>>,
+    port: Mutex<u16>,
+}
+
+fn gui_url(port: u16) -> String {
+    format!("http://{GUI_HOST}:{port}")
+}
+
+fn check_health(port: u16) -> bool {
+    let Ok(mut stream) = TcpStream::connect_timeout(
+        &format!("{GUI_HOST}:{port}").parse().unwrap(),
+        Duration::from_secs(1),
+    ) else {
+        return false;
+    };
+
+    let _ = stream.set_read_timeout(Some(Duration::from_secs(1)));
+    let request =
+        format!("GET /api/health HTTP/1.1\r\nHost: {GUI_HOST}:{port}\r\nConnection: close\r\n\r\n");
+
+    if stream.write_all(request.as_bytes()).is_err() {
+        return false;
+    }
+
+    let mut response = String::new();
+    let _ = stream.read_to_string(&mut response);
+    response.contains("200 OK")
+        && response.contains("\"status\":\"ok\"")
+        && response.contains("\"mode\":\"gui\"")
+}
+
+fn can_bind(port: u16) -> bool {
+    TcpListener::bind((GUI_HOST, port)).is_ok()
+}
+
+fn base_port() -> u16 {
+    std::env::var("HERMES_GUI_PORT")
+        .ok()
+        .and_then(|raw| raw.parse().ok())
+        .unwrap_or(DEFAULT_GUI_PORT)
+}
+
+fn select_port() -> u16 {
+    let start = base_port();
+    for port in start..start.saturating_add(20) {
+        if check_health(port) || can_bind(port) {
+            return port;
+        }
+    }
+    start
+}
+
+fn repo_root() -> std::path::PathBuf {
+    std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("../../..")
+        .canonicalize()
+        .unwrap_or_else(|_| std::path::PathBuf::from("."))
+}
+
+fn runtime_dir() -> Option<std::path::PathBuf> {
+    std::env::var_os("HERMES_GUI_RUNTIME_DIR").map(std::path::PathBuf::from)
+}
+
+fn runtime_python(runtime: &std::path::Path) -> std::path::PathBuf {
+    if cfg!(target_os = "windows") {
+        runtime.join("venv").join("Scripts").join("python.exe")
+    } else {
+        runtime.join("venv").join("bin").join("python")
+    }
+}
+
+fn wsl_path(root: &std::path::Path) -> Option<(String, String)> {
+    let raw = root.to_string_lossy().replace('\\', "/");
+    let parts: Vec<&str> = raw.split('/').collect();
+    let host = parts.get(2)?.to_ascii_lowercase();
+    if host != "wsl$" && host != "wsl.localhost" {
+        return None;
+    }
+    let distro = parts.get(3)?.to_string();
+    let path = format!("/{}", parts.get(4..)?.join("/"));
+    Some((distro, path))
+}
+
+fn start_dashboard(port: u16) -> std::io::Result<Child> {
+    if let Some(runtime) = runtime_dir() {
+        let python = runtime_python(&runtime);
+        let web_dist = runtime.join("web_dist");
+        let tui_dir = runtime.join("ui-tui");
+        let port = port.to_string();
+        return Command::new(python)
+            .args([
+                "-m",
+                "hermes_cli.main",
+                "dashboard",
+                "--gui",
+                "--no-open",
+                "--host",
+                GUI_HOST,
+                "--port",
+                &port,
+            ])
+            .env("HERMES_GUI", "1")
+            .env("HERMES_GUI_PORT", &port)
+            .env("HERMES_WEB_DIST", web_dist)
+            .env("HERMES_TUI_DIR", tui_dir)
+            .envs(
+                std::env::vars()
+                    .filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
+            )
+            .stdin(Stdio::null())
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn();
+    }
+
+    let root = repo_root();
+    let port = port.to_string();
+
+    if let Some((distro, path)) = wsl_path(&root) {
+        let port_env = format!("HERMES_GUI_PORT={port}");
+        let mut env_args = vec!["HERMES_GUI=1".to_string(), port_env];
+        if let Ok(home) = std::env::var("HERMES_HOME") {
+            env_args.push(format!("HERMES_HOME={home}"));
+        }
+        if let Ok(fresh) = std::env::var("HERMES_GUI_FRESH") {
+            env_args.push(format!("HERMES_GUI_FRESH={fresh}"));
+        }
+        let mut args = vec![
+            "-d".to_string(),
+            distro,
+            "--cd".to_string(),
+            path,
+            "env".to_string(),
+        ];
+        args.extend(env_args);
+        args.extend([
+            "python".to_string(),
+            "-m".to_string(),
+            "hermes_cli.main".to_string(),
+            "dashboard".to_string(),
+            "--gui".to_string(),
+            "--no-open".to_string(),
+            "--host".to_string(),
+            GUI_HOST.to_string(),
+            "--port".to_string(),
+            port.clone(),
+        ]);
+        return Command::new("wsl.exe")
+            .args(args)
+            .stdin(Stdio::null())
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn();
+    }
+
+    Command::new("python")
+        .args([
+            "-m",
+            "hermes_cli.main",
+            "dashboard",
+            "--gui",
+            "--no-open",
+            "--host",
+            GUI_HOST,
+            "--port",
+            &port,
+        ])
+        .current_dir(root)
+        .env("HERMES_GUI", "1")
+        .env("HERMES_GUI_PORT", &port)
+        .envs(
+            std::env::vars()
+                .filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
+        )
+        .stdin(Stdio::null())
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .spawn()
+}
+
+fn stop_owned_dashboard(state: &GuiState) {
+    let Some(mut child) = state.child.lock().expect("gui child lock poisoned").take() else {
+        return;
+    };
+    let _ = child.kill();
+    let _ = child.wait();
+}
+
+fn current_port(state: &GuiState) -> u16 {
+    *state.port.lock().expect("gui port lock poisoned")
+}
+
+fn ensure_dashboard(state: &GuiState) -> Result<(), String> {
+    let current = current_port(state);
+    if check_health(current) {
+        return Ok(());
+    }
+
+    let port = select_port();
+    *state.port.lock().expect("gui port lock poisoned") = port;
+
+    if check_health(port) {
+        return Ok(());
+    }
+
+    let child = start_dashboard(port).map_err(|err| {
+        format!(
+            "Could not auto-start Hermes dashboard ({err}). Start it manually with: hermes dashboard --gui --no-open --port {port}"
+        )
+    })?;
+    *state.child.lock().expect("gui child lock poisoned") = Some(child);
+    Ok(())
+}
+
+fn navigate_when_ready(window: WebviewWindow, port: u16) {
+    std::thread::spawn(move || {
+        let started = Instant::now();
+        while started.elapsed() < Duration::from_secs(60) {
+            if check_health(port) {
+                let min_splash = std::env::var("HERMES_GUI_MIN_SPLASH_MS")
+                    .ok()
+                    .and_then(|raw| raw.parse::<u64>().ok())
+                    .unwrap_or(MIN_SPLASH_MS);
+                let elapsed = started.elapsed();
+                if elapsed < Duration::from_millis(min_splash) {
+                    std::thread::sleep(Duration::from_millis(min_splash) - elapsed);
+                }
+                if let Ok(url) = tauri::Url::parse(&gui_url(port)) {
+                    let _ = window.navigate(url);
+                    let _ = window.show();
+                    let _ = window.set_focus();
+                }
+                return;
+            }
+            std::thread::sleep(Duration::from_millis(500));
+        }
+    });
+}
+
+fn show_main_window(app: &AppHandle) {
+    if let Some(window) = app.get_webview_window("main") {
+        let _ = window.show();
+        let _ = window.set_focus();
+    }
+}
+
+fn open_browser(port: u16) {
+    let url = gui_url(port);
+
+    #[cfg(target_os = "windows")]
+    let _ = Command::new("cmd")
+        .args(["/C", "start", "", &url])
+        .stdin(Stdio::null())
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .spawn();
+
+    #[cfg(target_os = "macos")]
+    let _ = Command::new("open").arg(&url).spawn();
+
+    #[cfg(all(unix, not(target_os = "macos")))]
+    let _ = Command::new("xdg-open").arg(&url).spawn();
+}
+
+fn tray_icon() -> Image<'static> {
+    let width = 32;
+    let height = 32;
+    let mut rgba = Vec::with_capacity(width * height * 4);
+
+    for y in 0..height {
+        for x in 0..width {
+            let mark = (14..=17).contains(&x) && (5..=26).contains(&y)
+                || (8..=23).contains(&x) && (13..=16).contains(&y)
+                || (10..=21).contains(&x) && (y == 5 || y == 26);
+            if mark {
+                rgba.extend_from_slice(&[0xF0, 0xE6, 0xD2, 0xFF]);
+            } else {
+                rgba.extend_from_slice(&[0x07, 0x13, 0x13, 0xFF]);
+            }
+        }
+    }
+
+    Image::new_owned(rgba, width as u32, height as u32)
+}
+
+fn restart_runtime(app: &AppHandle) -> Result<(), String> {
+    let state = app.state::<GuiState>();
+    stop_owned_dashboard(&state);
+    ensure_dashboard(&state)?;
+
+    if let Some(window) = app.get_webview_window("main") {
+        if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
+            let _ = window.navigate(url);
+        }
+        let port = current_port(&state);
+        navigate_when_ready(window, port);
+    }
+
+    Ok(())
+}
+
+fn setup_tray(app: &App) -> tauri::Result<()> {
+    let open_item = MenuItem::with_id(app, "open", "Open Hermes", true, None::<&str>)?;
+    let browser_item = MenuItem::with_id(app, "browser", "Open in Browser", true, None::<&str>)?;
+    let restart_item =
+        MenuItem::with_id(app, "restart", "Restart Hermes Runtime", true, None::<&str>)?;
+    let status_item = MenuItem::with_id(app, "status", "Local runtime", false, None::<&str>)?;
+    let separator = PredefinedMenuItem::separator(app)?;
+    let separator2 = PredefinedMenuItem::separator(app)?;
+    let quit_item = MenuItem::with_id(app, "quit", "Quit Hermes", true, None::<&str>)?;
+
+    let menu = Menu::with_items(
+        app,
+        &[
+            &open_item,
+            &browser_item,
+            &restart_item,
+            &separator,
+            &status_item,
+            &separator2,
+            &quit_item,
+        ],
+    )?;
+
+    let icon = tray_icon();
+    let _tray = TrayIconBuilder::new()
+        .icon(icon)
+        .menu(&menu)
+        .tooltip("Hermes")
+        .on_menu_event(|app, event| match event.id.as_ref() {
+            "open" => show_main_window(app),
+            "browser" => {
+                let state = app.state::<GuiState>();
+                open_browser(current_port(&state));
+            }
+            "restart" => {
+                if let Err(err) = restart_runtime(app) {
+                    eprintln!("Failed to restart Hermes runtime: {err}");
+                }
+            }
+            "quit" => {
+                let state = app.state::<GuiState>();
+                stop_owned_dashboard(&state);
+                app.exit(0);
+            }
+            _ => {}
+        })
+        .on_tray_icon_event(|tray, event| {
+            if let TrayIconEvent::Click {
+                button: MouseButton::Left,
+                button_state: MouseButtonState::Up,
+                ..
+            } = event
+            {
+                show_main_window(&tray.app_handle());
+            }
+        })
+        .build(app)?;
+
+    Ok(())
+}
+
+#[tauri::command]
+fn runtime_running(app: AppHandle) -> bool {
+    let state = app.state::<GuiState>();
+    check_health(current_port(&state))
+}
+
+#[tauri::command]
+fn restart_runtime_command(app: AppHandle) -> Result<(), String> {
+    restart_runtime(&app)
+}
+
+pub fn run() {
+    tauri::Builder::default()
+        .plugin(tauri_plugin_notification::init())
+        .plugin(tauri_plugin_opener::init())
+        .manage(GuiState {
+            child: Mutex::new(None),
+            port: Mutex::new(base_port()),
+        })
+        .invoke_handler(tauri::generate_handler![
+            runtime_running,
+            restart_runtime_command
+        ])
+        .setup(|app| {
+            setup_tray(app)?;
+
+            if let Some(window) = app.get_webview_window("main") {
+                if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
+                    let _ = window.navigate(url);
+                }
+
+                let state = app.state::<GuiState>();
+                if let Err(err) = ensure_dashboard(&state) {
+                    eprintln!("{err}");
+                }
+
+                let port = current_port(&state);
+                navigate_when_ready(window, port);
+            }
+            Ok(())
+        })
+        .on_window_event(|window, event| {
+            if let tauri::WindowEvent::CloseRequested { api, .. } = event {
+                api.prevent_close();
+                let _ = window.hide();
+            }
+        })
+        .run(tauri::generate_context!())
+        .expect("failed to run Hermes GUI");
+}
@@ -0,0 +1,5 @@
+#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
+
+fn main() {
+    hermes_gui_lib::run();
+}
@@ -0,0 +1,38 @@
+{
+  "$schema": "https://schema.tauri.app/config/2",
+  "productName": "Hermes",
+  "version": "0.0.0",
+  "identifier": "ai.nous.hermes.gui",
+  "build": {
+    "beforeDevCommand": "",
+    "beforeBuildCommand": "",
+    "devUrl": "http://127.0.0.1:9120",
+    "frontendDist": "../dist"
+  },
+  "app": {
+    "withGlobalTauri": true,
+    "windows": [
+      {
+        "label": "main",
+        "title": "Hermes",
+        "width": 1400,
+        "height": 900,
+        "minWidth": 900,
+        "minHeight": 600,
+        "resizable": true,
+        "center": true
+      }
+    ],
+    "security": {
+      "csp": "default-src 'self' http://127.0.0.1:* http://localhost:*; connect-src 'self' http://127.0.0.1:* http://localhost:* ws://127.0.0.1:* ws://localhost:*; img-src 'self' data: blob: http://127.0.0.1:* http://localhost:*; style-src 'self' 'unsafe-inline' http://127.0.0.1:* http://localhost:*; script-src 'self' 'unsafe-inline' 'unsafe-eval' http://127.0.0.1:* http://localhost:*"
+    }
+  },
+  "bundle": {
+    "active": true,
+    "icon": ["icons/32x32.png", "icons/icon.ico", "icons/icon.svg"],
+    "targets": ["nsis", "dmg", "app"],
+    "resources": {
+      "sidecars": "sidecars/"
+    }
+  }
+}
@@ -0,0 +1,5 @@
+// Browser-side GUI bridge entry.
+//
+// The dashboard remains in `web/`; this file is reserved for future shell-only
+// glue if we need pre-navigation scripts or native event wiring.
+export {};
@@ -0,0 +1,44 @@
+param(
+  [string]$Out = "$PSScriptRoot\..\gui\src-tauri\sidecars\hermes-runtime",
+  [string]$Python = "python"
+)
+
+$Root = Resolve-Path "$PSScriptRoot\..\.."
+
+Write-Host "Bundling Hermes GUI runtime"
+Write-Host "repo: $Root"
+Write-Host "out:  $Out"
+
+if (Test-Path $Out) {
+  Remove-Item -Recurse -Force $Out
+}
+New-Item -ItemType Directory -Force -Path $Out | Out-Null
+
+Write-Host "-> Building dashboard"
+npm --prefix "$Root\web" ci
+npm --prefix "$Root\web" run build
+Copy-Item -Recurse "$Root\web\dist" "$Out\web_dist"
+
+Write-Host "-> Building TUI"
+npm --prefix "$Root\ui-tui" ci
+npm --prefix "$Root\ui-tui" run build
+New-Item -ItemType Directory -Force -Path "$Out\ui-tui" | Out-Null
+Copy-Item -Recurse "$Root\ui-tui\dist" "$Out\ui-tui\dist"
+Copy-Item "$Root\ui-tui\package.json" "$Out\ui-tui\package.json"
+Copy-Item "$Root\ui-tui\package-lock.json" "$Out\ui-tui\package-lock.json"
+Copy-Item -Recurse "$Root\ui-tui\node_modules" "$Out\ui-tui\node_modules"
+
+Write-Host "-> Creating Python runtime"
+& $Python -m venv "$Out\venv"
+& "$Out\venv\Scripts\python.exe" -m pip install --upgrade pip
+& "$Out\venv\Scripts\python.exe" -m pip install -e "$Root[web,pty]"
+
+@"
+# Hermes GUI Runtime
+
+Generated by apps/shared/bundle-runtime.ps1.
+
+Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
+"@ | Set-Content "$Out\README.md"
+
+Write-Host "Runtime bundle ready: $Out"
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+OUT="${1:-"$ROOT/apps/gui/src-tauri/sidecars/hermes-runtime"}"
+PYTHON="${PYTHON:-python}"
+
+echo "Bundling Hermes GUI runtime"
+echo "repo: $ROOT"
+echo "out:  $OUT"
+
+rm -rf "$OUT"
+mkdir -p "$OUT"
+
+echo "→ Building dashboard"
+npm --prefix "$ROOT/web" ci
+npm --prefix "$ROOT/web" run build
+cp -a "$ROOT/web/dist" "$OUT/web_dist"
+
+echo "→ Building TUI"
+npm --prefix "$ROOT/ui-tui" ci
+npm --prefix "$ROOT/ui-tui" run build
+mkdir -p "$OUT/ui-tui"
+cp -a "$ROOT/ui-tui/dist" "$OUT/ui-tui/dist"
+cp -a "$ROOT/ui-tui/package.json" "$ROOT/ui-tui/package-lock.json" "$OUT/ui-tui/"
+cp -a "$ROOT/ui-tui/node_modules" "$OUT/ui-tui/node_modules"
+
+echo "→ Creating Python runtime"
+"$PYTHON" -m venv "$OUT/venv"
+"$OUT/venv/bin/python" -m pip install --upgrade pip
+"$OUT/venv/bin/python" -m pip install -e "$ROOT[web,pty]"
+
+cat > "$OUT/README.md" <<EOF
+# Hermes GUI Runtime
+
+Generated by apps/shared/bundle-runtime.sh.
+
+Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
+EOF
+
+echo "✓ Runtime bundle ready: $OUT"
@@ -0,0 +1,33 @@
+# GUI Runtime Contract
+
+The GUI shell starts Hermes with a small, explicit environment.
+
+## Environment
+
+```text
+HERMES_GUI=1
+HERMES_WEB_DIST=<bundled web dist>
+HERMES_TUI_DIR=<bundled ui-tui dir>
+```
+
+The native shell uses `127.0.0.1:9120` as its initial GUI port during dev.
+Bundled builds should keep the port private to the local machine and expose it
+through `/api/health` and `/api/runtime`.
+
+The shell should also pass the selected profile through the normal Hermes CLI
+profile mechanism once the profile picker is wired.
+
+## Ports
+
+Use `127.0.0.1` only. Start with the GUI default port, then fall back to a
+free port if occupied. Show the chosen port in the tray menu.
+
+## User Data
+
+The installer owns app files. Hermes owns user state under `HERMES_HOME`.
+Uninstallers must not delete user state unless the user explicitly asks.
+
+## Update Model
+
+MVP does not use Tauri's native updater. GUI runs `hermes update`, tails the
+action log, notifies completion, then offers to restart the runtime.
@@ -790,9 +790,16 @@ code_execution:
 # Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
-  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # max_concurrent_children: 3                # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
+                                              # WARNING: values above 10 multiply API cost linearly.
+  # max_spawn_depth: 1                        # Delegation tree depth cap (range: 1-3, default: 1 = flat).
+                                              # Raise to 2 to allow workers to spawn their own subagents.
+                                              # Requires role="orchestrator" on intermediate agents.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # subagent_auto_approve: false              # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
+                                              # or auto-approve "once" (true) instead of blocking on stdin.
+                                              # The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
+                                              # Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
@@ -3176,7 +3176,14 @@ class HermesCLI:
        # the configured model (e.g. "qwen3.6-plus"), causing 400 errors.
        runtime_model = runtime.get("model")
        if runtime_model and isinstance(runtime_model, str):
-            self.model = runtime_model
+            # Only use runtime model if: model is unset, or model equals provider name
+            should_use_runtime_model = (
+                not self.model or  # No model configured yet
+                self.model == self.provider or  # Model is the provider slug
+                self.model == runtime.get("name")  # Model matches provider display name
+            )
+            if should_use_runtime_model:
+                self.model = runtime_model

        # If model is still empty (e.g. user ran `hermes auth add openai-codex`
        # without `hermes model`), fall back to the provider's first catalog
@@ -4661,10 +4668,6 @@ class HermesCLI:
    def new_session(self, silent=False):
        """Start a fresh session with a new session ID and cleared agent state."""
        if self.agent and self.conversation_history:
-            try:
-                self.agent.flush_memories(self.conversation_history)
-            except (Exception, KeyboardInterrupt):
-                pass
            # Trigger memory extraction on the old session before session_id rotates.
            self.agent.commit_memory_session(self.conversation_history)
            self._notify_session_boundary("on_session_finalize")
@@ -10781,12 +10784,6 @@ class HermesCLI:
                    self.agent.interrupt()
                except Exception:
                    pass
-            # Flush memories before exit (only for substantial conversations)
-            if self.agent and self.conversation_history:
-                try:
-                    self.agent.flush_memories(self.conversation_history)
-                except (Exception, KeyboardInterrupt):
-                    pass
            # Shut down voice recorder (release persistent audio stream)
            if hasattr(self, '_voice_recorder') and self._voice_recorder:
                try:
@@ -16,7 +16,7 @@ import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Optional, Dict, List, Any
+from typing import Optional, Dict, List, Any, Union

 logger = logging.getLogger(__name__)

@@ -417,6 +417,7 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
+    context_from: Optional[Union[str, List[str]]] = None,
    enabled_toolsets: Optional[List[str]] = None,
    workdir: Optional[str] = None,
 ) -> Dict[str, Any]:
@@ -438,6 +439,9 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
+        context_from: Optional job ID (or list of job IDs) whose most recent output
+                      is injected into the prompt as context before each run.
+                      Useful for chaining cron jobs: job A finds data, job B processes it.
        enabled_toolsets: Optional list of toolset names to restrict the agent to.
                          When set, only tools from these toolsets are loaded, reducing
                          token overhead. When omitted, all default tools are loaded.
@@ -481,6 +485,14 @@ def create_job(
    normalized_toolsets = normalized_toolsets or None
    normalized_workdir = _normalize_workdir(workdir)

+    # Normalize context_from: accept str or list of str, store as list or None
+    if isinstance(context_from, str):
+        context_from = [context_from.strip()] if context_from.strip() else None
+    elif isinstance(context_from, list):
+        context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
+    else:
+        context_from = None
+
    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
        "id": job_id,
@@ -492,6 +504,7 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
+        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -671,6 +671,47 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                f"{prompt}"
            )

+    # Inject output from referenced cron jobs as context.
+    context_from = job.get("context_from")
+    if context_from:
+        from cron.jobs import OUTPUT_DIR
+        if isinstance(context_from, str):
+            context_from = [context_from]
+        for source_job_id in context_from:
+            # Guard against path traversal — valid job IDs are 12-char hex strings
+            if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
+                logger.warning("context_from: skipping invalid job_id %r", source_job_id)
+                continue
+            try:
+                job_output_dir = OUTPUT_DIR / source_job_id
+                if not job_output_dir.exists():
+                    continue  # silent skip — no output yet
+                output_files = sorted(
+                    job_output_dir.glob("*.md"),
+                    key=lambda f: f.stat().st_mtime,
+                    reverse=True,
+                )
+                if not output_files:
+                    continue  # silent skip — no output yet
+                latest_output = output_files[0].read_text(encoding="utf-8").strip()
+                # Truncate to 8K characters to avoid prompt bloat
+                _MAX_CONTEXT_CHARS = 8000
+                if len(latest_output) > _MAX_CONTEXT_CHARS:
+                    latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
+                if latest_output:
+                    prompt = (
+                        f"## Output from job '{source_job_id}'\n"
+                        "The following is the most recent output from a preceding "
+                        "cron job. Use it as context for your analysis.\n\n"
+                        f"```\n{latest_output}\n```\n\n"
+                        f"{prompt}"
+                    )
+                else:
+                    continue  # silent skip — empty output
+            except (OSError, PermissionError) as e:
+                logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
+                # silent skip — do not pollute the prompt with error messages
+
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
@@ -2543,6 +2543,9 @@ class BasePlatformAdapter(ABC):
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
+        guild_id: Optional[str] = None,
+        parent_chat_id: Optional[str] = None,
+        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2560,6 +2563,9 @@ class BasePlatformAdapter(ABC):
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
+            guild_id=str(guild_id) if guild_id else None,
+            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
+            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
@@ -3261,6 +3261,7 @@ class DiscordAdapter(BasePlatformAdapter):
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
+                    parent_channel_id = str(message.channel.id)
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
@@ -3320,6 +3321,9 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
+            guild_id=str(message.guild.id) if message.guild else None,
+            parent_chat_id=parent_channel_id,
+            message_id=str(message.id),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -532,6 +532,20 @@ class MatrixAdapter(BasePlatformAdapter):
                )
                await crypto_store.open()

+                # Bind the store to the runtime device_id before any
+                # put_account() runs. PgCryptoStore defaults _device_id
+                # to "" and its crypto_account UPSERT never updates the
+                # device_id column on conflict — so once put_account
+                # writes blank, it stays blank forever. That breaks
+                # every downstream device-scoped olm operation: peer
+                # to-device ciphertext can't find our identity key and
+                # no megolm sessions ever land. Setting _device_id here
+                # (in-memory; the on-disk row may not exist yet) makes
+                # the first put_account write the correct value.
+                # DeviceID is a NewType(str) so plain str works at runtime.
+                if client.device_id:
+                    await crypto_store.put_device_id(client.device_id)
+
                crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
                olm = OlmMachine(client, crypto_store, crypto_state)

@@ -524,7 +524,7 @@ def _load_gateway_config() -> dict:
 def _resolve_gateway_model(config: dict | None = None) -> str:
    """Read model from config.yaml — single source of truth.

-    Without this, temporary AIAgent instances (memory flush, /compress) fall
+    Without this, temporary AIAgent instances (e.g. /compress) fall
    back to the hardcoded default which fails when the active provider is
    openai-codex.
    """
@@ -915,129 +915,6 @@ class GatewayRunner:
                e,
            )

-    # -----------------------------------------------------------------
-
-    def _flush_memories_for_session(
-        self,
-        old_session_id: str,
-        session_key: Optional[str] = None,
-    ):
-        """Prompt the agent to save memories/skills before context is lost.
-
-        Synchronous worker — meant to be called via run_in_executor from
-        an async context so it doesn't block the event loop.
-        """
-        # Skip cron sessions — they run headless with no meaningful user
-        # conversation to extract memories from.
-        if old_session_id and old_session_id.startswith("cron_"):
-            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
-            return
-
-        try:
-            history = self.session_store.load_transcript(old_session_id)
-            if not history or len(history) < 4:
-                return
-
-            from run_agent import AIAgent
-            model, runtime_kwargs = self._resolve_session_agent_runtime(
-                session_key=session_key,
-            )
-            if not runtime_kwargs.get("api_key"):
-                return
-
-            tmp_agent = AIAgent(
-                **runtime_kwargs,
-                model=model,
-                max_iterations=8,
-                quiet_mode=True,
-                skip_memory=True,  # Flush agent — no memory provider
-                enabled_toolsets=["memory", "skills"],
-                session_id=old_session_id,
-            )
-            try:
-                # Fully silence the flush agent — quiet_mode only suppresses init
-                # messages; tool call output still leaks to the terminal through
-                # _safe_print → _print_fn.  Set a no-op to prevent that.
-                tmp_agent._print_fn = lambda *a, **kw: None
-
-                # Build conversation history from transcript
-                msgs = [
-                    {"role": m.get("role"), "content": m.get("content")}
-                    for m in history
-                    if m.get("role") in ("user", "assistant") and m.get("content")
-                ]
-
-                # Read live memory state from disk so the flush agent can see
-                # what's already saved and avoid overwriting newer entries.
-                _current_memory = ""
-                try:
-                    from tools.memory_tool import get_memory_dir
-                    _mem_dir = get_memory_dir()
-                    for fname, label in [
-                        ("MEMORY.md", "MEMORY (your personal notes)"),
-                        ("USER.md", "USER PROFILE (who the user is)"),
-                    ]:
-                        fpath = _mem_dir / fname
-                        if fpath.exists():
-                            content = fpath.read_text(encoding="utf-8").strip()
-                            if content:
-                                _current_memory += f"\n\n## Current {label}:\n{content}"
-                except Exception:
-                    pass  # Non-fatal — flush still works, just without the guard
-
-                # Give the agent a real turn to think about what to save
-                flush_prompt = (
-                    "[System: This session is about to be automatically reset due to "
-                    "inactivity or a scheduled daily reset. The conversation context "
-                    "will be cleared after this turn.\n\n"
-                    "Review the conversation above and:\n"
-                    "1. Save any important facts, preferences, or decisions to memory "
-                    "(user profile or your notes) that would be useful in future sessions.\n"
-                    "2. If you discovered a reusable workflow or solved a non-trivial "
-                    "problem, consider saving it as a skill.\n"
-                    "3. If nothing is worth saving, that's fine — just skip.\n\n"
-                )
-
-                if _current_memory:
-                    flush_prompt += (
-                        "IMPORTANT — here is the current live state of memory. Other "
-                        "sessions, cron jobs, or the user may have updated it since this "
-                        "conversation ended. Do NOT overwrite or remove entries unless "
-                        "the conversation above reveals something that genuinely "
-                        "supersedes them. Only add new information that is not already "
-                        "captured below."
-                        f"{_current_memory}\n\n"
-                    )
-
-                flush_prompt += (
-                    "Do NOT respond to the user. Just use the memory and skill_manage "
-                    "tools if needed, then stop.]"
-                )
-
-                tmp_agent.run_conversation(
-                    user_message=flush_prompt,
-                    conversation_history=msgs,
-                )
-            finally:
-                self._cleanup_agent_resources(tmp_agent)
-            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
-        except Exception as e:
-            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
-
-    async def _async_flush_memories(
-        self,
-        old_session_id: str,
-        session_key: Optional[str] = None,
-    ):
-        """Run the sync memory flush in a thread pool so it won't block the event loop."""
-        loop = asyncio.get_running_loop()
-        await loop.run_in_executor(
-            None,
-            self._flush_memories_for_session,
-            old_session_id,
-            session_key,
-        )
-
    @property
    def should_exit_cleanly(self) -> bool:
        return self._exit_cleanly
@@ -1103,7 +980,7 @@ class GatewayRunner:
            if override_runtime.get("api_key"):
                logger.debug(
                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
-                    (resolved_session_key or "")[:30], model, override_model,
+                    resolved_session_key or "", model, override_model,
                    override_runtime.get("provider"),
                )
                return override_model, override_runtime
@@ -1111,12 +988,12 @@ class GatewayRunner:
            # resolution and apply model/provider from the override on top.
            logger.debug(
                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
-                (resolved_session_key or "")[:30], model, override_model,
+                resolved_session_key or "", model, override_model,
            )
        else:
            logger.debug(
                "No session model override: session=%s config_model=%s override_keys=%s",
-                (resolved_session_key or "")[:30], model,
+                resolved_session_key or "", model,
                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
            )

@@ -1687,7 +1564,7 @@ class GatewayRunner:
                continue
            try:
                agent.interrupt(reason)
-                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
+                logger.debug("Interrupted running agent for session %s during shutdown", session_key)
            except Exception as e:
                logger.debug("Failed interrupting agent during shutdown: %s", e)

@@ -1859,7 +1736,7 @@ class GatewayRunner:
                    logger.warning(
                        "Auto-suspended stuck session %s (active across %d "
                        "consecutive restarts — likely a stuck loop)",
-                        session_key[:30], counts[session_key],
+                        session_key, counts[session_key],
                    )
            except Exception:
                pass
@@ -2272,7 +2149,7 @@ class GatewayRunner:
        except Exception as e:
            logger.error("Recovered watcher setup error: %s", e)

-        # Start background session expiry watcher for proactive memory flushing
+        # Start background session expiry watcher to finalize expired sessions
        asyncio.create_task(self._session_expiry_watcher())

        # Start background reconnection watcher for platforms that failed at startup
@@ -2289,25 +2166,24 @@ class GatewayRunner:
        return True
    
    async def _session_expiry_watcher(self, interval: int = 300):
-        """Background task that proactively flushes memories for expired sessions.
-        
-        Runs every `interval` seconds (default 5 min).  For each session that
-        has expired according to its reset policy, flushes memories in a thread
-        pool and marks the session so it won't be flushed again.
+        """Background task that finalizes expired sessions.

-        This means memories are already saved by the time the user sends their
-        next message, so there's no blocking delay.
+        Runs every ``interval`` seconds (default 5 min).  For each session
+        whose reset policy has expired, invokes ``on_session_finalize``
+        hooks, cleans up the cached AIAgent's tool resources, evicts the
+        cache entry so it can be garbage-collected, and marks the session
+        so it won't be finalized again.
        """
        await asyncio.sleep(60)  # initial delay — let the gateway fully start
-        _flush_failures: dict[str, int] = {}  # session_id -> consecutive failure count
-        _MAX_FLUSH_RETRIES = 3
+        _finalize_failures: dict[str, int] = {}  # session_id -> consecutive failure count
+        _MAX_FINALIZE_RETRIES = 3
        while self._running:
            try:
                self.session_store._ensure_loaded()
                # Collect expired sessions first, then log a single summary.
                _expired_entries = []
                for key, entry in list(self.session_store._entries.items()):
-                    if entry.memory_flushed:
+                    if entry.expiry_finalized:
                        continue
                    if not self.session_store._is_session_expired(entry):
                        continue
@@ -2325,13 +2201,12 @@ class GatewayRunner:
                        f"{p}:{c}" for p, c in sorted(_platforms.items())
                    )
                    logger.info(
-                        "Session expiry: %d sessions to flush (%s)",
+                        "Session expiry: %d sessions to finalize (%s)",
                        len(_expired_entries), _plat_summary,
                    )

                for key, entry in _expired_entries:
                    try:
-                        await self._async_flush_memories(entry.session_id, key)
                        try:
                            from hermes_cli.plugins import invoke_hook as _invoke_hook
                            _parts = key.split(":")
@@ -2363,48 +2238,48 @@ class GatewayRunner:
                        # be garbage-collected.  Otherwise the cache grows
                        # unbounded across the gateway's lifetime.
                        self._evict_cached_agent(key)
-                        # Mark as flushed and persist to disk so the flag
+                        # Mark as finalized and persist to disk so the flag
                        # survives gateway restarts.
                        with self.session_store._lock:
-                            entry.memory_flushed = True
+                            entry.expiry_finalized = True
                            self.session_store._save()
                        logger.debug(
-                            "Memory flush completed for session %s",
+                            "Session expiry finalized for %s",
                            entry.session_id,
                        )
-                        _flush_failures.pop(entry.session_id, None)
+                        _finalize_failures.pop(entry.session_id, None)
                    except Exception as e:
-                        failures = _flush_failures.get(entry.session_id, 0) + 1
-                        _flush_failures[entry.session_id] = failures
-                        if failures >= _MAX_FLUSH_RETRIES:
+                        failures = _finalize_failures.get(entry.session_id, 0) + 1
+                        _finalize_failures[entry.session_id] = failures
+                        if failures >= _MAX_FINALIZE_RETRIES:
                            logger.warning(
-                                "Memory flush gave up after %d attempts for %s: %s. "
-                                "Marking as flushed to prevent infinite retry loop.",
+                                "Session finalize gave up after %d attempts for %s: %s. "
+                                "Marking as finalized to prevent infinite retry loop.",
                                failures, entry.session_id, e,
                            )
                            with self.session_store._lock:
-                                entry.memory_flushed = True
+                                entry.expiry_finalized = True
                                self.session_store._save()
-                            _flush_failures.pop(entry.session_id, None)
+                            _finalize_failures.pop(entry.session_id, None)
                        else:
                            logger.debug(
-                                "Memory flush failed (%d/%d) for %s: %s",
-                                failures, _MAX_FLUSH_RETRIES, entry.session_id, e,
+                                "Session finalize failed (%d/%d) for %s: %s",
+                                failures, _MAX_FINALIZE_RETRIES, entry.session_id, e,
                            )

                if _expired_entries:
-                    _flushed = sum(
-                        1 for _, e in _expired_entries if e.memory_flushed
+                    _done = sum(
+                        1 for _, e in _expired_entries if e.expiry_finalized
                    )
-                    _failed = len(_expired_entries) - _flushed
+                    _failed = len(_expired_entries) - _done
                    if _failed:
                        logger.info(
-                            "Session expiry done: %d flushed, %d pending retry",
-                            _flushed, _failed,
+                            "Session expiry done: %d finalized, %d pending retry",
+                            _done, _failed,
                        )
                    else:
                        logger.info(
-                            "Session expiry done: %d flushed", _flushed,
+                            "Session expiry done: %d finalized", _done,
                        )

                # Sweep agents that have been idle beyond the TTL regardless
@@ -2681,7 +2556,7 @@ class GatewayRunner:
                    except Exception as _e:
                        logger.debug(
                            "mark_resume_pending failed for %s: %s",
-                            _sk[:20], _e,
+                            _sk, _e,
                        )
                self._interrupt_running_agents(
                    _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
@@ -3347,7 +3222,7 @@ class GatewayRunner:
                logger.warning(
                    "Evicting stale _running_agents entry for %s "
                    "(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
-                    _quick_key[:30], _stale_age, _stale_idle,
+                    _quick_key, _stale_age, _stale_idle,
                    _raw_stale_timeout, _stale_detail,
                )
                self._invalidate_session_run_generation(
@@ -3383,7 +3258,7 @@ class GatewayRunner:
                    interrupt_reason=_INTERRUPT_REASON_STOP,
                    invalidation_reason="stop_command",
                )
-                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
+                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key)
                return "⚡ Stopped. You can continue this session."

            # /reset and /new must bypass the running-agent guard so they
@@ -3449,7 +3324,7 @@ class GatewayRunner:
                    try:
                        accepted = running_agent.steer(steer_text)
                    except Exception as exc:
-                        logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
+                        logger.warning("Steer failed for session %s: %s", _quick_key, exc)
                        return f"⚠️ Steer failed: {exc}"
                    if accepted:
                        preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
@@ -3532,7 +3407,7 @@ class GatewayRunner:
                )

            if event.message_type == MessageType.PHOTO:
-                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
+                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key)
                adapter = self.adapters.get(source.platform)
                if adapter:
                    merge_pending_message_event(adapter._pending_messages, _quick_key, event)
@@ -3552,7 +3427,7 @@ class GatewayRunner:
                logger.debug(
                    "Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
                    time.time() - _started_at,
-                    _quick_key[:20],
+                    _quick_key,
                )
                adapter = self.adapters.get(source.platform)
                if adapter:
@@ -3570,7 +3445,7 @@ class GatewayRunner:
                if event.get_command() == "stop":
                    # Force-clean the sentinel so the session is unlocked.
                    self._release_running_agent_state(_quick_key)
-                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
+                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key)
                    return "⚡ Force-stopped. The agent was still starting — session unlocked."
                # Queue the message so it will be picked up after the
                # agent starts.
@@ -3592,10 +3467,10 @@ class GatewayRunner:
                    else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
                )
            if self._busy_input_mode == "queue":
-                logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20])
+                logger.debug("PRIORITY queue follow-up for session %s", _quick_key)
                self._queue_or_replace_pending_event(_quick_key, event)
                return None
-            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
+            logger.debug("PRIORITY interrupt for session %s", _quick_key)
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
                self._pending_messages[_quick_key] += "\n" + event.text
@@ -4593,7 +4468,7 @@ class GatewayRunner:
            if not self._is_session_run_current(_quick_key, run_generation):
                logger.info(
                    "Discarding stale agent result for %s — generation %d is no longer current",
-                    _quick_key[:20] if _quick_key else "?",
+                    _quick_key or "?",
                    run_generation,
                )
                _stale_adapter = self.adapters.get(source.platform)
@@ -4644,7 +4519,7 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug(
                        "clear_resume_pending failed for %s: %s",
-                        session_key[:20], _e,
+                        session_key, _e,
                    )

            # Surface error details when the agent failed silently (final_response=None)
@@ -5021,19 +4896,11 @@ class GatewayRunner:
        # Get existing session key
        session_key = self._session_key_for_source(source)
        self._invalidate_session_run_generation(session_key, reason="session_reset")
-        
-        # Flush memories in the background (fire-and-forget) so the user
-        # gets the "Session reset!" response immediately.
-        try:
-            old_entry = self.session_store._entries.get(session_key)
-            if old_entry:
-                _flush_task = asyncio.create_task(
-                    self._async_flush_memories(old_entry.session_id, session_key)
-                )
-                self._background_tasks.add(_flush_task)
-                _flush_task.add_done_callback(self._background_tasks.discard)
-        except Exception as e:
-            logger.debug("Gateway memory flush on reset failed: %s", e)
+
+        # Snapshot the old entry so on_session_finalize can report the
+        # expiring session id before reset_session() rotates it.
+        old_entry = self.session_store._entries.get(session_key)
+
        # Close tool resources on the old agent (terminal sandboxes, browser
        # daemons, background processes) before evicting from cache.
        # Guard with getattr because test fixtures may skip __init__.
@@ -5291,7 +5158,7 @@ class GatewayRunner:
                interrupt_reason=_INTERRUPT_REASON_STOP,
                invalidation_reason="stop_command_pending",
            )
-            logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
+            logger.info("STOP (pending) for session %s — sentinel cleared", session_key)
            return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
        if agent:
            # Force-clean the session lock so a truly hung agent doesn't
@@ -7252,16 +7119,6 @@ class GatewayRunner:
        if current_entry.session_id == target_id:
            return f"📌 Already on session **{name}**."

-        # Flush memories for current session before switching
-        try:
-            _flush_task = asyncio.create_task(
-                self._async_flush_memories(current_entry.session_id, session_key)
-            )
-            self._background_tasks.add(_flush_task)
-            _flush_task.add_done_callback(self._background_tasks.discard)
-        except Exception as e:
-            logger.debug("Memory flush on resume failed: %s", e)
-
        # Clear any running agent for this session key
        self._release_running_agent_state(session_key)

@@ -8798,7 +8655,7 @@ class GatewayRunner:
        if reason:
            logger.info(
                "Invalidated run generation for %s → %d (%s)",
-                session_key[:20],
+                session_key,
                generation,
                reason,
            )
@@ -9205,7 +9062,7 @@ class GatewayRunner:
                        if not _run_still_current():
                            logger.info(
                                "Discarding stale proxy stream for %s — generation %d is no longer current",
-                                session_key[:20] if session_key else "?",
+                                session_key or "?",
                                run_generation or 0,
                            )
                            return {
@@ -9269,7 +9126,7 @@ class GatewayRunner:
        if not _run_still_current():
            logger.info(
                "Discarding stale proxy result for %s — generation %d is no longer current",
-                session_key[:20] if session_key else "?",
+                session_key or "?",
                run_generation or 0,
            )
            return {
@@ -9711,7 +9568,7 @@ class GatewayRunner:
                )
                logger.debug(
                    "run_agent resolved: model=%s provider=%s session=%s",
-                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
+                    model, runtime_kwargs.get("provider"), session_key or "",
                )
            except Exception as exc:
                return {
@@ -10322,7 +10179,7 @@ class GatewayRunner:
            ):
                logger.info(
                    "Skipping stale agent promotion for %s — generation %s is no longer current",
-                    (session_key or "")[:20],
+                    session_key or "",
                    run_generation,
                )
                return
@@ -10469,7 +10326,7 @@ class GatewayRunner:
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
-                                session_key[:20],
+                                session_key,
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
@@ -10529,7 +10386,7 @@ class GatewayRunner:
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
-                                session_key[:20],
+                                session_key,
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
@@ -10631,7 +10488,7 @@ class GatewayRunner:
                    if _is_control_interrupt_message(interrupt_message):
                        logger.info(
                            "Ignoring control interrupt message for session %s: %s",
-                            session_key[:20] if session_key else "?",
+                            session_key or "?",
                            interrupt_message,
                        )
                    else:
@@ -10675,7 +10532,7 @@ class GatewayRunner:
            if self._draining and (pending_event or pending):
                logger.info(
                    "Discarding pending follow-up for session %s during gateway %s",
-                    session_key[:20] if session_key else "?",
+                    session_key or "?",
                    self._status_action_label(),
                )
                pending_event = None
@@ -10732,7 +10589,7 @@ class GatewayRunner:
                        try:
                            logger.info(
                                "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
-                                session_key[:20] if session_key else "?",
+                                session_key or "?",
                            )
                            await adapter.send(
                                source.chat_id,
@@ -10744,7 +10601,7 @@ class GatewayRunner:
                    elif first_response:
                        logger.info(
                            "Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
-                            session_key[:20] if session_key else "?",
+                            session_key or "?",
                        )
                    # Release deferred bg-review notifications now that the
                    # first response has been delivered.  Pop from the
@@ -10879,7 +10736,7 @@ class GatewayRunner:
            if not _is_empty_sentinel and (_streamed or _previewed):
                logger.info(
                    "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
-                    session_key[:20] if session_key else "?",
+                    session_key or "?",
                    _streamed,
                    _previewed,
                )
@@ -87,6 +87,9 @@ class SessionSource:
    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
+    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
+    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
+    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
@@ -124,8 +127,14 @@ class SessionSource:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
+        if self.guild_id:
+            d["guild_id"] = self.guild_id
+        if self.parent_chat_id:
+            d["parent_chat_id"] = self.parent_chat_id
+        if self.message_id:
+            d["message_id"] = self.message_id
        return d
-    
+
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
@@ -139,6 +148,9 @@ class SessionSource:
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
+            guild_id=data.get("guild_id"),
+            parent_chat_id=data.get("parent_chat_id"),
+            message_id=data.get("message_id"),
        )
    

@@ -190,6 +202,31 @@ that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 and the LLM needs the real ID to tag users."""


+def _discord_tools_loaded() -> bool:
+    """True iff the agent will actually have Discord tools this session.
+
+    Two conditions must hold:
+      1. The `discord` or `discord_admin` toolset is enabled for the
+         Discord platform via `hermes tools` (opt-in, default OFF).
+      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
+         at registry time, so the toolset being enabled in config is not
+         enough if the token isn't configured.
+
+    Returns False (safe default — keeps the stale-API disclaimer) on any
+    error so a bad config can't silently promise tools the agent lacks.
+    """
+    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
+        return False
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.tools_config import _get_platform_tools
+        cfg = load_config()
+        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
+        return "discord" in enabled or "discord_admin" in enabled
+    except Exception:
+        return False
+
+
 def build_session_context_prompt(
    context: SessionContext,
    *,
@@ -277,14 +314,33 @@ def build_session_context_prompt(
            "that you can only read messages sent directly to you and respond."
        )
    elif context.source.platform == Platform.DISCORD:
-        lines.append("")
-        lines.append(
-            "**Platform notes:** You are running inside Discord. "
-            "You do NOT have access to Discord-specific APIs — you cannot search "
-            "channel history, pin messages, manage roles, or list server members. "
-            "Do not promise to perform these actions. If the user asks, explain "
-            "that you can only read messages sent directly to you and respond."
-        )
+        # Inject the Discord IDs block only when the agent actually has
+        # Discord tools loaded this session — i.e. the user opted into
+        # `discord` / `discord_admin` via `hermes tools` AND the bot
+        # token is configured.  Otherwise keep the stale-API disclaimer
+        # honest so we never promise tools the agent lacks.
+        if _discord_tools_loaded():
+            src = context.source
+            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
+            if src.guild_id:
+                id_lines.append(f"  - Guild: `{src.guild_id}`")
+            if src.thread_id and src.parent_chat_id:
+                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
+                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
+            else:
+                id_lines.append(f"  - Channel: `{src.chat_id}`")
+            if src.message_id:
+                id_lines.append(f"  - Triggering message: `{src.message_id}`")
+            lines.extend(id_lines)
+        else:
+            lines.append("")
+            lines.append(
+                "**Platform notes:** You are running inside Discord. "
+                "You do NOT have access to Discord-specific APIs — you cannot search "
+                "channel history, pin messages, manage roles, or list server members. "
+                "Do not promise to perform these actions. If the user asks, explain "
+                "that you can only read messages sent directly to you and respond."
+            )
    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
@@ -383,11 +439,11 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
-    # Set by the background expiry watcher after it successfully flushes
-    # memories for this session.  Persisted to sessions.json so the flag
-    # survives gateway restarts (the old in-memory _pre_flushed_sessions
-    # set was lost on restart, causing redundant re-flushes).
-    memory_flushed: bool = False
+    # Set by the background expiry watcher after it finalizes an expired
+    # session (invoking on_session_finalize hooks and evicting the cached
+    # agent).  Persisted to sessions.json so the flag survives gateway
+    # restarts — prevents redundant finalization runs.
+    expiry_finalized: bool = False

    # When True the next call to get_or_create_session() will auto-reset
    # this session (create a new session_id) so the user starts fresh.
@@ -423,7 +479,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
-            "memory_flushed": self.memory_flushed,
+            "expiry_finalized": self.expiry_finalized,
            "suspended": self.suspended,
            "resume_pending": self.resume_pending,
            "resume_reason": self.resume_reason,
@@ -475,7 +531,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
-            memory_flushed=data.get("memory_flushed", False),
+            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
            suspended=data.get("suspended", False),
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
@@ -103,7 +103,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    # Configuration
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
-    CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
+    CommandDef("model", "Switch model for this session", "Configuration",
+               aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
               cli_only=True),

@@ -612,14 +612,6 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
        },
-        "flush_memories": {
-            "provider": "auto",
-            "model": "",
-            "base_url": "",
-            "api_key": "",
-            "timeout": 30,
-            "extra_body": {},
-        },
        "title_generation": {
            "provider": "auto",
            "model": "",
@@ -783,6 +775,15 @@ DEFAULT_CONFIG = {
        # warning log if out of range.
        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
+        # When a subagent hits a dangerous-command approval prompt, the parent's
+        # prompt_toolkit TUI owns stdin — a thread-local input() call from the
+        # subagent worker would deadlock the parent UI. To avoid the deadlock,
+        # subagent threads ALWAYS resolve approvals non-interactively:
+        #   false (default) → auto-deny with a logger.warning audit line (safe)
+        #   true             → auto-approve "once" with a logger.warning audit line
+        # Flip to true only if you trust delegated work to run dangerous cmds
+        # without human review (cron pipelines, batch automation, etc.).
+        "subagent_auto_approve": False,
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -839,7 +840,7 @@ DEFAULT_CONFIG = {
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
-        # discord_server tool: restrict which actions the agent may call.
+        # discord / discord_admin tools: restrict which actions the agent may call.
        # Default (empty) = all actions allowed (subject to bot privileged intents).
        # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
        # or YAML list. Unknown names are dropped with a warning at load time.
@@ -51,6 +51,7 @@ import sys
 from pathlib import Path
 from typing import Optional

+
 def _add_accept_hooks_flag(parser) -> None:
    """Attach the ``--accept-hooks`` flag.  Shared across every agent
    subparser so the flag works regardless of CLI position."""
@@ -174,6 +175,7 @@ load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")
 try:
    if "HERMES_REDACT_SECRETS" not in os.environ:
        import yaml as _yaml_early
+
        _cfg_path = get_hermes_home() / "config.yaml"
        if _cfg_path.exists():
            with open(_cfg_path, encoding="utf-8") as _f:
@@ -839,6 +841,8 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:


 def _tui_build_needed(tui_dir: Path) -> bool:
+    if _hermes_ink_bundle_stale(tui_dir):
+        return True
    entry = tui_dir / "dist" / "entry.js"
    if not entry.exists():
        return True
@@ -1026,7 +1030,12 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
    return [node, str(root / "dist" / "entry.js")], root


-def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
+def _launch_tui(
+    resume_session_id: Optional[str] = None,
+    tui_dev: bool = False,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+):
    """Replace current process with the TUI."""
    tui_dir = PROJECT_ROOT / "ui-tui"

@@ -1036,6 +1045,12 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
    )
    env.setdefault("HERMES_PYTHON", sys.executable)
    env.setdefault("HERMES_CWD", os.getcwd())
+    if model:
+        env["HERMES_MODEL"] = model
+        env["HERMES_INFERENCE_MODEL"] = model
+    if provider:
+        env["HERMES_TUI_PROVIDER"] = provider
+        env["HERMES_INFERENCE_PROVIDER"] = provider
    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
    # large transcripts / reasoning blobs. Token-level merge: respect any
@@ -1174,6 +1189,8 @@ def cmd_chat(args):
        _launch_tui(
            getattr(args, "resume", None),
            tui_dev=getattr(args, "tui_dev", False),
+            model=getattr(args, "model", None),
+            provider=getattr(args, "provider", None),
        )

    # Import and run the CLI
@@ -1325,7 +1342,9 @@ def cmd_whatsapp(args):
        return

    if not (bridge_dir / "node_modules").exists():
-        print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
+        print(
+            "\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)..."
+        )
        npm = shutil.which("npm")
        if not npm:
            print("  ✗ npm not found on PATH — install Node.js first")
@@ -1701,15 +1720,14 @@ def _clear_stale_openai_base_url():

 # (task_key, display_name, short_description)
 _AUX_TASKS: list[tuple[str, str, str]] = [
-    ("vision",           "Vision",           "image/screenshot analysis"),
-    ("compression",      "Compression",      "context summarization"),
-    ("web_extract",      "Web extract",      "web page summarization"),
-    ("session_search",   "Session search",   "past-conversation recall"),
-    ("approval",         "Approval",         "smart command approval"),
-    ("mcp",              "MCP",              "MCP tool reasoning"),
-    ("flush_memories",   "Flush memories",   "memory consolidation"),
+    ("vision", "Vision", "image/screenshot analysis"),
+    ("compression", "Compression", "context summarization"),
+    ("web_extract", "Web extract", "web page summarization"),
+    ("session_search", "Session search", "past-conversation recall"),
+    ("approval", "Approval", "smart command approval"),
+    ("mcp", "MCP", "MCP tool reasoning"),
    ("title_generation", "Title generation", "session titles"),
-    ("skills_hub",       "Skills hub",       "skills search/install"),
+    ("skills_hub", "Skills hub", "skills search/install"),
 ]


@@ -1808,7 +1826,7 @@ def _aux_config_menu() -> None:
        print("  Auxiliary models — side-task routing")
        print()
        print("  Side tasks (vision, compression, web extraction, etc.) default")
-        print("  to your main chat model.  \"auto\" means \"use my main model\" —")
+        print('  to your main chat model.  "auto" means "use my main model" —')
        print("  Hermes only falls back to a lightweight backend (OpenRouter,")
        print("  Nous Portal) if the main model is unavailable.  Override a")
        print("  task below if you want it pinned to a specific provider/model.")
@@ -1819,15 +1837,20 @@ def _aux_config_menu() -> None:
        desc_col = max(len(desc) for _, _, desc in _AUX_TASKS) + 4
        entries: list[tuple[str, str]] = []
        for task_key, name, desc in _AUX_TASKS:
-            task_cfg = aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
+            task_cfg = (
+                aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
+            )
            current = _format_aux_current(task_cfg)
-            label = f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
+            label = (
+                f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
+            )
            entries.append((task_key, label))
        entries.append(("__reset__", "Reset all to auto"))
-        entries.append(("__back__",  "Back"))
+        entries.append(("__back__", "Back"))

        idx = _prompt_provider_choice(
-            [label for _, label in entries], default=0,
+            [label for _, label in entries],
+            default=0,
        )
        if idx is None:
            return
@@ -1875,7 +1898,9 @@ def _aux_select_for_task(task: str) -> None:

    entries: list[tuple[str, str, list[str]]] = []  # (slug, label, models)
    # "auto" always first
-    auto_marker = "  ← current" if current_provider == "auto" and not current_base_url else ""
+    auto_marker = (
+        "  ← current" if current_provider == "auto" and not current_base_url else ""
+    )
    entries.append(("__auto__", f"auto (recommended){auto_marker}", []))

    for p in providers:
@@ -1884,7 +1909,9 @@ def _aux_select_for_task(task: str) -> None:
        total = p.get("total_models", 0)
        models = p.get("models") or []
        model_hint = f" — {total} models" if total else ""
-        marker = "  ← current" if slug == current_provider and not current_base_url else ""
+        marker = (
+            "  ← current" if slug == current_provider and not current_base_url else ""
+        )
        entries.append((slug, f"{name}{model_hint}{marker}", list(models)))

    # Custom endpoint (raw base_url)
@@ -1952,14 +1979,17 @@ def _aux_flow_provider_model(
        selected = val or ""
    else:
        selected = _prompt_model_selection(
-            model_list, current_model=current_model, pricing=pricing,
+            model_list,
+            current_model=current_model,
+            pricing=pricing,
        )
        if selected is None:
            print("No change.")
            return

-    _save_aux_choice(task, provider=provider_slug, model=selected or "",
-                     base_url="", api_key="")
+    _save_aux_choice(
+        task, provider=provider_slug, model=selected or "", base_url="", api_key=""
+    )
    if selected:
        print(f"{display_name}: {provider_slug} · {selected}")
    else:
@@ -1979,7 +2009,9 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
    print("  Provide an OpenAI-compatible base URL (e.g. http://localhost:11434/v1)")
    print()
    try:
-        url_prompt = f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
+        url_prompt = (
+            f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
+        )
        url = input(url_prompt).strip()
    except (KeyboardInterrupt, EOFError):
        print()
@@ -1989,20 +2021,30 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
        print("No URL provided. No change.")
        return
    try:
-        model_prompt = f"Model slug (optional) [{current_model}]: " if current_model else "Model slug (optional): "
+        model_prompt = (
+            f"Model slug (optional) [{current_model}]: "
+            if current_model
+            else "Model slug (optional): "
+        )
        model = input(model_prompt).strip()
    except (KeyboardInterrupt, EOFError):
        print()
        return
    model = model or current_model
    try:
-        api_key = getpass.getpass("API key (optional, blank = use OPENAI_API_KEY): ").strip()
+        api_key = getpass.getpass(
+            "API key (optional, blank = use OPENAI_API_KEY): "
+        ).strip()
    except (KeyboardInterrupt, EOFError):
        print()
        return

    _save_aux_choice(
-        task, provider="custom", model=model, base_url=url, api_key=api_key,
+        task,
+        provider="custom",
+        model=model,
+        base_url=url,
+        api_key=api_key,
    )
    short_url = url.replace("https://", "").replace("http://", "").rstrip("/")
    print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))
@@ -2118,7 +2160,9 @@ def _model_flow_ai_gateway(config, current_model=""):
    api_key = get_env_value("AI_GATEWAY_API_KEY")
    if not api_key:
        print("No Vercel AI Gateway API key configured.")
-        print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
+        print(
+            "Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway"
+        )
        print("Add a payment method to get $5 in free credits.")
        print()
        try:
@@ -2918,7 +2962,9 @@ def _model_flow_named_custom(config, provider_info):

    print("Fetching available models...")
    models = fetch_api_models(
-        api_key, base_url, timeout=8.0,
+        api_key,
+        base_url,
+        timeout=8.0,
        api_mode=api_mode or None,
    )

@@ -3589,7 +3635,12 @@ def _model_flow_stepfun(config, current_model=""):
        _save_model_choice,
        deactivate_provider,
    )
-    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.config import (
+        get_env_value,
+        save_env_value,
+        load_config,
+        save_config,
+    )
    from hermes_cli.models import fetch_api_models

    provider_id = "stepfun"
@@ -3608,6 +3659,7 @@ def _model_flow_stepfun(config, current_model=""):
        if key_env:
            try:
                import getpass
+
                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
            except (KeyboardInterrupt, EOFError):
                print()
@@ -3633,7 +3685,10 @@ def _model_flow_stepfun(config, current_model=""):
    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)

    region_choices = [
-        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
+        (
+            "international",
+            f"International ({_stepfun_base_url_for_region('international')})",
+        ),
        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
    ]
    ordered_regions = []
@@ -4476,6 +4531,7 @@ def cmd_webhook(args):
 def cmd_hooks(args):
    """Shell-hook inspection and management."""
    from hermes_cli.hooks import hooks_command
+
    hooks_command(args)


@@ -6046,6 +6102,86 @@ def _cmd_update_impl(args, gateway_mode: bool):
            )
            import signal as _signal

+            def _wait_for_service_active(
+                scope_cmd_: list,
+                svc_name_: str,
+                timeout: float = 10.0,
+            ) -> bool:
+                """Poll ``systemctl is-active`` until the unit reports active.
+
+                systemd's Stopped -> Started transition after a graceful exit
+                (or a hard restart) is not instantaneous; a one-shot check
+                races that window and falsely reports the unit as down.
+                Poll every 0.5s up to ``timeout`` seconds before giving up.
+                """
+                deadline = _time.monotonic() + max(timeout, 0.5)
+                while True:
+                    try:
+                        _verify = subprocess.run(
+                            scope_cmd_ + ["is-active", svc_name_],
+                            capture_output=True,
+                            text=True,
+                            timeout=5,
+                        )
+                        if _verify.stdout.strip() == "active":
+                            return True
+                    except (FileNotFoundError, subprocess.TimeoutExpired):
+                        pass
+                    if _time.monotonic() >= deadline:
+                        return False
+                    _time.sleep(0.5)
+
+            def _service_restart_sec(
+                scope_cmd_: list,
+                svc_name_: str,
+                default: float = 0.0,
+            ) -> float:
+                """Read the unit's ``RestartUSec`` (RestartSec) in seconds.
+
+                After a graceful exit-75, systemd waits ``RestartSec`` before
+                respawning the unit.  Callers that poll for ``is-active``
+                must use a timeout >= ``RestartSec`` + transition slack, or
+                they'll give up *during* the cooldown window and wrongly
+                conclude the unit didn't relaunch.
+                """
+                try:
+                    _show = subprocess.run(
+                        scope_cmd_
+                        + [
+                            "show",
+                            svc_name_,
+                            "--property=RestartUSec",
+                            "--value",
+                        ],
+                        capture_output=True,
+                        text=True,
+                        timeout=5,
+                    )
+                except (FileNotFoundError, subprocess.TimeoutExpired):
+                    return default
+                raw = (_show.stdout or "").strip()
+                # systemd emits values like "30s", "100ms", "1min 30s", or
+                # "infinity".  Parse conservatively; on any miss return default.
+                if not raw or raw == "infinity":
+                    return default
+                total = 0.0
+                matched = False
+                for part in raw.split():
+                    for _suf, _mult in (
+                        ("ms", 0.001),
+                        ("us", 0.000001),
+                        ("min", 60.0),
+                        ("s", 1.0),
+                    ):
+                        if part.endswith(_suf):
+                            try:
+                                total += float(part[: -len(_suf)]) * _mult
+                                matched = True
+                            except ValueError:
+                                pass
+                            break
+                return total if matched else default
+
            # Drain budget for graceful SIGUSR1 restarts.  The gateway drains
            # for up to ``agent.restart_drain_timeout`` (default 60s) before
            # exiting with code 75; we wait slightly longer so the drain
@@ -6061,12 +6197,17 @@ def _cmd_update_impl(args, gateway_mode: bool):
            _cfg_drain = None
            try:
                from hermes_cli.config import load_config
-                _cfg_agent = (load_config().get("agent") or {})
+
+                _cfg_agent = load_config().get("agent") or {}
                _cfg_drain = _cfg_agent.get("restart_drain_timeout")
            except Exception:
                pass
            try:
-                _drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
+                _drain_budget = (
+                    float(_cfg_drain)
+                    if _cfg_drain is not None
+                    else float(_DEFAULT_DRAIN)
+                )
            except (TypeError, ValueError):
                _drain_budget = float(_DEFAULT_DRAIN)
            # Add a 15s margin so the drain loop + final exit finish before
@@ -6131,14 +6272,23 @@ def _cmd_update_impl(args, gateway_mode: bool):
                            _main_pid = 0
                            try:
                                _show = subprocess.run(
-                                    scope_cmd + [
-                                        "show", svc_name,
-                                        "--property=MainPID", "--value",
+                                    scope_cmd
+                                    + [
+                                        "show",
+                                        svc_name,
+                                        "--property=MainPID",
+                                        "--value",
                                    ],
-                                    capture_output=True, text=True, timeout=5,
+                                    capture_output=True,
+                                    text=True,
+                                    timeout=5,
                                )
                                _main_pid = int((_show.stdout or "").strip() or 0)
-                            except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
+                            except (
+                                ValueError,
+                                subprocess.TimeoutExpired,
+                                FileNotFoundError,
+                            ):
                                _main_pid = 0

                            _graceful_ok = False
@@ -6147,19 +6297,33 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    f"  → {svc_name}: draining (up to {int(_drain_budget)}s)..."
                                )
                                _graceful_ok = _graceful_restart_via_sigusr1(
-                                    _main_pid, drain_timeout=_drain_budget,
+                                    _main_pid,
+                                    drain_timeout=_drain_budget,
                                )

                            if _graceful_ok:
                                # Gateway exited 75; systemd should relaunch
-                                # via Restart=on-failure.  Verify the new
-                                # process came up.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
-                                    capture_output=True, text=True, timeout=5,
+                                # via Restart=on-failure.  The unit's
+                                # RestartSec (default 30s on ours) gates the
+                                # respawn — poll past that + slack so we
+                                # don't give up mid-cooldown and falsely
+                                # print "drained but didn't relaunch".  For
+                                # units without RestartSec set we fall back
+                                # to the original 10s budget.
+                                _restart_sec = _service_restart_sec(
+                                    scope_cmd,
+                                    svc_name,
+                                    default=0.0,
                                )
-                                if verify.stdout.strip() == "active":
+                                _post_drain_timeout = max(
+                                    10.0,
+                                    _restart_sec + 10.0,
+                                )
+                                if _wait_for_service_active(
+                                    scope_cmd,
+                                    svc_name,
+                                    timeout=_post_drain_timeout,
+                                ):
                                    restarted_services.append(svc_name)
                                    continue
                                # Process exited but wasn't respawned (older
@@ -6185,14 +6349,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                # Verify the service actually survived the
                                # restart.  systemctl restart returns 0 even
                                # if the new process crashes immediately.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
-                                    capture_output=True,
-                                    text=True,
-                                    timeout=5,
-                                )
-                                if verify.stdout.strip() == "active":
+                                if _wait_for_service_active(
+                                    scope_cmd,
+                                    svc_name,
+                                    timeout=10.0,
+                                ):
                                    restarted_services.append(svc_name)
                                else:
                                    # Retry once — transient startup failures
@@ -6207,14 +6368,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                        text=True,
                                        timeout=15,
                                    )
-                                    _time.sleep(3)
-                                    verify2 = subprocess.run(
-                                        scope_cmd + ["is-active", svc_name],
-                                        capture_output=True,
-                                        text=True,
-                                        timeout=5,
-                                    )
-                                    if verify2.stdout.strip() == "active":
+                                    if _wait_for_service_active(
+                                        scope_cmd,
+                                        svc_name,
+                                        timeout=10.0,
+                                    ):
                                        restarted_services.append(svc_name)
                                        print(f"  ✓ {svc_name} recovered on retry")
                                    else:
@@ -6732,13 +6890,17 @@ def cmd_dashboard(args):

    from hermes_cli.web_server import start_server

-    embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
+    gui_mode = getattr(args, "gui", False)
+    embedded_chat = (
+        gui_mode or args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
+    )
    start_server(
        host=args.host,
        port=args.port,
        open_browser=not args.no_open,
        allow_public=getattr(args, "insecure", False),
        embedded_chat=embedded_chat,
+        gui_mode=gui_mode,
    )


@@ -6821,6 +6983,40 @@ For more help on a command:
    parser.add_argument(
        "--version", "-V", action="store_true", help="Show version and exit"
    )
+    parser.add_argument(
+        "-z",
+        "--oneshot",
+        metavar="PROMPT",
+        default=None,
+        help=(
+            "One-shot mode: send a single prompt and print ONLY the final "
+            "response text to stdout. No banner, no spinner, no tool "
+            "previews, no session_id line. Tools, memory, rules, and "
+            "AGENTS.md in the CWD are loaded as normal; approvals are "
+            "auto-bypassed. Intended for scripts / pipes."
+        ),
+    )
+    # --model / --provider are accepted at the top level so they can pair
+    # with -z without needing the `chat` subcommand.  If neither -z nor a
+    # subcommand consumes them, they fall through harmlessly as None.
+    # Mirrors `hermes chat --model ... --provider ...` semantics.
+    parser.add_argument(
+        "-m",
+        "--model",
+        default=None,
+        help=(
+            "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
+        ),
+    )
+    parser.add_argument(
+        "--provider",
+        default=None,
+        help=(
+            "Provider override for this invocation (e.g. openrouter, anthropic). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
+        ),
+    )
    parser.add_argument(
        "--resume",
        "-r",
@@ -7398,17 +7594,39 @@ For more help on a command:
        "reset", help="Clear exhaustion status for all credentials for a provider"
    )
    auth_reset.add_argument("provider", help="Provider id")
-    auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider")
+    auth_status = auth_subparsers.add_parser(
+        "status", help="Show auth status for a provider"
+    )
    auth_status.add_argument("provider", help="Provider id")
-    auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state")
+    auth_logout = auth_subparsers.add_parser(
+        "logout", help="Log out a provider and clear stored auth state"
+    )
    auth_logout.add_argument("provider", help="Provider id")
-    auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE")
-    auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login")
-    auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)")
-    auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app")
+    auth_spotify = auth_subparsers.add_parser(
+        "spotify", help="Authenticate Hermes with Spotify via PKCE"
+    )
+    auth_spotify.add_argument(
+        "spotify_action",
+        nargs="?",
+        choices=["login", "status", "logout"],
+        default="login",
+    )
+    auth_spotify.add_argument(
+        "--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)"
+    )
+    auth_spotify.add_argument(
+        "--redirect-uri",
+        help="Allow-listed localhost redirect URI for your Spotify app",
+    )
    auth_spotify.add_argument("--scope", help="Override requested Spotify scopes")
-    auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically")
-    auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds")
+    auth_spotify.add_argument(
+        "--no-browser",
+        action="store_true",
+        help="Do not attempt to open the browser automatically",
+    )
+    auth_spotify.add_argument(
+        "--timeout", type=float, help="Callback/token exchange timeout in seconds"
+    )
    auth_parser.set_defaults(func=cmd_auth)

    # =========================================================================
@@ -7618,7 +7836,8 @@ For more help on a command:
    hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")

    hooks_subparsers.add_parser(
-        "list", aliases=["ls"],
+        "list",
+        aliases=["ls"],
        help="List configured hooks with matcher, timeout, and consent status",
    )

@@ -7631,14 +7850,18 @@ For more help on a command:
        help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
    )
    _hk_test.add_argument(
-        "--for-tool", dest="for_tool", default=None,
+        "--for-tool",
+        dest="for_tool",
+        default=None,
        help=(
            "Only fire hooks whose matcher matches this tool name "
            "(used for pre_tool_call / post_tool_call)"
        ),
    )
    _hk_test.add_argument(
-        "--payload-file", dest="payload_file", default=None,
+        "--payload-file",
+        dest="payload_file",
+        default=None,
        help=(
            "Path to a JSON file whose contents are merged into the "
            "synthetic payload before execution"
@@ -7646,7 +7869,8 @@ For more help on a command:
    )

    _hk_revoke = hooks_subparsers.add_parser(
-        "revoke", aliases=["remove", "rm"],
+        "revoke",
+        aliases=["remove", "rm"],
        help="Remove a command's allowlist entries (takes effect on next restart)",
    )
    _hk_revoke.add_argument(
@@ -8932,6 +9156,11 @@ Examples:
            "Alternatively set HERMES_DASHBOARD_TUI=1."
        ),
    )
+    dashboard_parser.add_argument(
+        "--gui",
+        action="store_true",
+        help="Run dashboard in GUI-shell mode; implies --tui",
+    )
    dashboard_parser.set_defaults(func=cmd_dashboard)

    # =========================================================================
@@ -9074,26 +9303,28 @@ Examples:
    # the nested subcommand (dest varies by parser).
    _AGENT_COMMANDS = {None, "chat", "acp", "rl"}
    _AGENT_SUBCOMMANDS = {
-        "cron":    ("cron_command",    {"run", "tick"}),
+        "cron": ("cron_command", {"run", "tick"}),
        "gateway": ("gateway_command", {"run"}),
-        "mcp":     ("mcp_action",      {"serve"}),
+        "mcp": ("mcp_action", {"serve"}),
    }
    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
-    if (
-        args.command in _AGENT_COMMANDS
-        or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
+    if args.command in _AGENT_COMMANDS or (
+        _sub_attr and getattr(args, _sub_attr, None) in _sub_set
    ):
        _accept_hooks = bool(getattr(args, "accept_hooks", False))
        try:
            from hermes_cli.plugins import discover_plugins
+
            discover_plugins()
        except Exception:
            logger.debug(
-                "plugin discovery failed at CLI startup", exc_info=True,
+                "plugin discovery failed at CLI startup",
+                exc_info=True,
            )
        try:
            from hermes_cli.config import load_config
            from agent.shell_hooks import register_from_config
+
            register_from_config(load_config(), accept_hooks=_accept_hooks)
        except Exception:
            logger.debug(
@@ -9101,6 +9332,19 @@ Examples:
                exc_info=True,
            )

+    # Handle top-level --oneshot / -z: single-shot mode, stdout = final
+    # response only, nothing else. Bypasses cli.py entirely.
+    if getattr(args, "oneshot", None):
+        from hermes_cli.oneshot import run_oneshot
+
+        sys.exit(
+            run_oneshot(
+                args.oneshot,
+                model=getattr(args, "model", None),
+                provider=getattr(args, "provider", None),
+            )
+        )
+
    # Handle top-level --resume / --continue as shortcut to chat
    if (args.resume or args.continue_last) and args.command is None:
        args.command = "chat"
@@ -1379,27 +1379,93 @@ def curated_models_for_provider(
    return [(m, "") for m in models]


-def detect_provider_for_model(
+def _provider_keys(provider: str) -> set[str]:
+    key = (provider or "").strip().lower()
+    normalized = normalize_provider(provider)
+    return {k for k in (key, normalized) if k}
+
+
+def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
+    return any(
+        name_lower == model.lower()
+        for provider in providers
+        for model in _PROVIDER_MODELS.get(provider, [])
+    )
+
+
+_AGGREGATOR_PROVIDERS = frozenset(
+    {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
+)
+
+
+def _resolve_static_model_alias(
+    name_lower: str,
+    current_keys: set[str],
+) -> Optional[tuple[str, str]]:
+    """Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
+    try:
+        from hermes_cli.model_switch import MODEL_ALIASES
+    except Exception:
+        return None
+
+    identity = MODEL_ALIASES.get(name_lower)
+    if identity is None:
+        return None
+
+    vendor = identity.vendor
+    family = identity.family
+
+    def _match(provider: str) -> Optional[str]:
+        models = _PROVIDER_MODELS.get(provider, [])
+        if not models:
+            return None
+        prefix = (
+            f"{vendor}/{family}"
+            if provider in _AGGREGATOR_PROVIDERS
+            else family
+        ).lower()
+        for model in models:
+            if model.lower().startswith(prefix):
+                return model
+        return None
+
+    for provider in current_keys:
+        if matched := _match(provider):
+            return provider, matched
+
+    for provider in _PROVIDER_MODELS:
+        if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
+            continue
+        if matched := _match(provider):
+            return provider, matched
+
+    for provider in _AGGREGATOR_PROVIDERS:
+        if provider in current_keys and (matched := _match(provider)):
+            return provider, matched
+
+    return None
+
+
+def detect_static_provider_for_model(
    model_name: str,
    current_provider: str,
 ) -> Optional[tuple[str, str]]:
-    """Auto-detect the best provider for a model name.
+    """Auto-detect a provider from static catalogs only.

-    Returns ``(provider_id, model_name)`` — the model name may be remapped
-    (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
+    Returns ``(provider_id, model_name)``. The model name may be remapped
+    when a static alias or bare provider name resolves to a catalog default.
    Returns ``None`` when no confident match is found.
-
-    Priority:
-    0. Bare provider name → switch to that provider's default model
-    1. Direct provider with credentials (highest)
-    2. Direct provider without credentials → remap to OpenRouter slug
-    3. OpenRouter catalog match
    """
    name = (model_name or "").strip()
    if not name:
        return None

    name_lower = name.lower()
+    current_keys = _provider_keys(current_provider)
+
+    alias_match = _resolve_static_model_alias(name_lower, current_keys)
+    if alias_match:
+        return alias_match

    # --- Step 0: bare provider name typed as model ---
    # If someone types `/model nous` or `/model anthropic`, treat it as a
@@ -1412,64 +1478,49 @@ def detect_provider_for_model(
        if (
            resolved_provider in _PROVIDER_LABELS
            and default_models
-            and resolved_provider != normalize_provider(current_provider)
+            and resolved_provider not in current_keys
        ):
            return (resolved_provider, default_models[0])

    # Aggregators list other providers' models — never auto-switch TO them
-    _AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
-
    # If the model belongs to the current provider's catalog, don't suggest switching
-    current_models = _PROVIDER_MODELS.get(current_provider, [])
-    if any(name_lower == m.lower() for m in current_models):
+    if _model_in_provider_catalog(name_lower, current_keys):
        return None

    # --- Step 1: check static provider catalogs for a direct match ---
-    direct_match: Optional[str] = None
    for pid, models in _PROVIDER_MODELS.items():
-        if pid == current_provider or pid in _AGGREGATORS:
+        if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
            continue
        if any(name_lower == m.lower() for m in models):
-            direct_match = pid
-            break
+            return (pid, name)

-    if direct_match:
-        # Check if we have credentials for this provider — env vars,
-        # credential pool, or auth store entries.
-        has_creds = False
-        try:
-            from hermes_cli.auth import PROVIDER_REGISTRY
-            pconfig = PROVIDER_REGISTRY.get(direct_match)
-            if pconfig:
-                for env_var in pconfig.api_key_env_vars:
-                    if os.getenv(env_var, "").strip():
-                        has_creds = True
-                        break
-        except Exception:
-            pass
-        # Also check credential pool and auth store — covers OAuth,
-        # Claude Code tokens, and other non-env-var credentials (#10300).
-        if not has_creds:
-            try:
-                from agent.credential_pool import load_pool
-                pool = load_pool(direct_match)
-                if pool.has_credentials():
-                    has_creds = True
-            except Exception:
-                pass
-        if not has_creds:
-            try:
-                from hermes_cli.auth import _load_auth_store
-                store = _load_auth_store()
-                if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
-                    has_creds = True
-            except Exception:
-                pass
+    return None

-        # Always return the direct provider match.  If credentials are
-        # missing, the client init will give a clear error rather than
-        # silently routing through the wrong provider (#10300).
-        return (direct_match, name)
+
+def detect_provider_for_model(
+    model_name: str,
+    current_provider: str,
+) -> Optional[tuple[str, str]]:
+    """Auto-detect the best provider for a model name.
+
+    Returns ``(provider_id, model_name)`` — the model name may be remapped
+    (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
+    Returns ``None`` when no confident match is found.
+
+    Priority:
+    0. Bare provider name → switch to that provider's default model
+    1. Direct provider static catalog match
+    2. OpenRouter catalog match
+    """
+    name = (model_name or "").strip()
+    if not name:
+        return None
+
+    static_match = detect_static_provider_for_model(name, current_provider)
+    if static_match:
+        return static_match
+    if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
+        return None

    # --- Step 2: check OpenRouter catalog ---
    # First try exact match (handles provider/model format)
@@ -0,0 +1,202 @@
+"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
+
+Bypasses cli.py entirely.  No banner, no spinner, no session_id line,
+no stderr chatter.  Just the agent's final text to stdout.
+
+Toolsets = whatever the user has configured for "cli" in `hermes tools`.
+Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
+Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
+Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
+
+Model / provider selection mirrors `hermes chat`:
+    - Both optional. If omitted, use the user's configured default.
+    - If both given, pair them exactly as given.
+    - If only --model given, auto-detect the provider that serves it.
+    - If only --provider given, error out (ambiguous — caller must pick a model).
+
+Env var fallbacks (used when the corresponding arg is not passed):
+    - HERMES_INFERENCE_MODEL
+    - HERMES_INFERENCE_PROVIDER  (already read by resolve_runtime_provider)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from contextlib import redirect_stderr, redirect_stdout
+from typing import Optional
+
+
+def run_oneshot(
+    prompt: str,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+) -> int:
+    """Execute a single prompt and print only the final content block.
+
+    Args:
+        prompt: The user message to send.
+        model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
+            env var, then config.yaml's model.default / model.model.
+        provider: Optional provider override. Falls back to
+            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
+            then "auto".
+
+    Returns the exit code.  Caller should sys.exit() with the return.
+    """
+    # Silence every stdlib logger for the duration.  AIAgent, tools, and
+    # provider adapters all log to stderr through the root logger; file
+    # handlers added by setup_logging() keep working (they're attached to
+    # the root logger's handler list, not affected by level), but no
+    # bytes reach the terminal.
+    logging.disable(logging.CRITICAL)
+
+    # --provider without --model is ambiguous: carrying the user's configured
+    # model across to a different provider is usually wrong (that provider may
+    # not host it), and silently picking the provider's catalog default hides
+    # the mismatch.  Require the caller to be explicit.  Validate BEFORE the
+    # stderr redirect so the message actually reaches the terminal.
+    env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
+    if provider and not ((model or "").strip() or env_model_early):
+        sys.stderr.write(
+            "hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
+            "Pass both explicitly, or neither to use your configured defaults.\n"
+        )
+        return 2
+
+    # Auto-approve any shell / tool approvals.  Non-interactive by
+    # definition — a prompt would hang forever.
+    os.environ["HERMES_YOLO_MODE"] = "1"
+    os.environ["HERMES_ACCEPT_HOOKS"] = "1"
+
+    # Redirect stderr AND stdout to devnull for the entire call tree.
+    # We'll print the final response to the real stdout at the end.
+    real_stdout = sys.stdout
+    devnull = open(os.devnull, "w")
+
+    try:
+        with redirect_stdout(devnull), redirect_stderr(devnull):
+            response = _run_agent(prompt, model=model, provider=provider)
+    finally:
+        try:
+            devnull.close()
+        except Exception:
+            pass
+
+    if response:
+        real_stdout.write(response)
+        if not response.endswith("\n"):
+            real_stdout.write("\n")
+        real_stdout.flush()
+    return 0
+
+
+def _run_agent(
+    prompt: str,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+) -> str:
+    """Build an AIAgent exactly like a normal CLI chat turn would, then
+    run a single conversation.  Returns the final response string."""
+    # Imports are local so they don't run when hermes is invoked for
+    # other commands (keeps top-level CLI startup cheap).
+    from hermes_cli.config import load_config
+    from hermes_cli.models import detect_provider_for_model
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+    from hermes_cli.tools_config import _get_platform_tools
+    from run_agent import AIAgent
+
+    cfg = load_config()
+
+    # Resolve effective model: explicit arg → env var → config.
+    model_cfg = cfg.get("model") or {}
+    if isinstance(model_cfg, str):
+        cfg_model = model_cfg
+    else:
+        cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
+
+    env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
+    effective_model = (model or "").strip() or env_model or cfg_model
+
+    # Resolve effective provider: explicit arg → (auto-detect from model if
+    # model was explicit) → env / config (handled inside resolve_runtime_provider).
+    #
+    # When --model is given without --provider, auto-detect the provider that
+    # serves that model — same semantic as `/model <name>` in an interactive
+    # session.  Without this, resolve_runtime_provider() would fall back to
+    # the user's configured default provider, which may not host the model
+    # the caller just asked for.
+    effective_provider = (provider or "").strip() or None
+    if effective_provider is None and (model or env_model):
+        # Only auto-detect when the model was explicitly requested via arg or
+        # env var (not when it came from config — that's the "use my defaults"
+        # path and the configured provider is already correct).
+        explicit_model = (model or "").strip() or env_model
+        if explicit_model:
+            cfg_provider = ""
+            if isinstance(model_cfg, dict):
+                cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+            current_provider = (
+                cfg_provider
+                or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+                or "auto"
+            )
+            detected = detect_provider_for_model(explicit_model, current_provider)
+            if detected:
+                effective_provider, effective_model = detected
+
+    runtime = resolve_runtime_provider(
+        requested=effective_provider,
+        target_model=effective_model or None,
+    )
+
+    # Pull in whatever toolsets the user has enabled for "cli".
+    # sorted() gives stable ordering; set→list for AIAgent's signature.
+    toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
+
+    agent = AIAgent(
+        api_key=runtime.get("api_key"),
+        base_url=runtime.get("base_url"),
+        provider=runtime.get("provider"),
+        api_mode=runtime.get("api_mode"),
+        model=effective_model,
+        enabled_toolsets=toolsets_list,
+        quiet_mode=True,
+        platform="cli",
+        credential_pool=runtime.get("credential_pool"),
+        # Interactive callbacks are intentionally NOT wired beyond this
+        # one.  In oneshot mode there's no user sitting at a terminal:
+        #   - clarify  → returns a synthetic "pick a default" instruction
+        #                so the agent continues instead of stalling on
+        #                the tool's built-in "not available" error
+        #   - sudo password prompt → terminal_tool gates on
+        #                HERMES_INTERACTIVE which we never set
+        #   - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
+        #                (set above); also falls back to deny on non-tty
+        #   - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
+        #   - skill secret capture → returns gracefully when no callback set
+        clarify_callback=_oneshot_clarify_callback,
+    )
+
+    # Belt-and-braces: make sure AIAgent doesn't invoke any streaming
+    # display callbacks that would bypass our stdout capture.
+    agent.suppress_status_output = True
+    agent.stream_delta_callback = None
+    agent.tool_gen_callback = None
+
+    return agent.chat(prompt) or ""
+
+
+def _oneshot_clarify_callback(question: str, choices=None) -> str:
+    """Clarify is disabled in oneshot mode — tell the agent to pick a
+    default and proceed instead of stalling or erroring."""
+    if choices:
+        return (
+            f"[oneshot mode: no user available. Pick the best option from "
+            f"{choices} using your own judgment and continue.]"
+        )
+    return (
+        "[oneshot mode: no user available. Make the most reasonable "
+        "assumption you can and continue.]"
+    )
@@ -68,25 +68,58 @@ CONFIGURABLE_TOOLSETS = [
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
    ("spotify",          "🎵 Spotify",                  "playback, search, playlists, library"),
+    ("discord",         "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
+    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
 ]

 # Toolsets that are OFF by default for new installs.
 # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
 # but the setup checklist won't pre-select them for first-time users.
-_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
+
+# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
+# these platforms, and only resolve/save for these platforms.  A toolset
+# absent from this map is available on every platform (current behaviour).
+#
+# Use this for tools whose APIs only make sense on one platform (Discord
+# server admin, Slack workspace admin, etc.).  Keeps every other platform's
+# checklist from filling up with irrelevant toggles.
+_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
+    "discord": {"discord"},
+    "discord_admin": {"discord"},
+}
+
+
+def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
+    """Return True if ``ts_key`` is configurable on ``platform``.
+
+    Toolsets without a restriction entry are allowed everywhere (the default).
+    """
+    allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
+    return allowed is None or platform in allowed


 def _get_effective_configurable_toolsets():
    """Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.

    Plugin toolsets are appended at the end so they appear after the
-    built-in toolsets in the TUI checklist.
+    built-in toolsets in the TUI checklist. A plugin whose toolset key
+    already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
+    plugins (e.g. ``plugins/spotify``) share their toolset key with the
+    built-in entry, and we want the built-in label/description to win.
+    Without the dedupe, ``hermes tools`` → "reconfigure existing" would
+    list the same toolset twice.
    """
    result = list(CONFIGURABLE_TOOLSETS)
+    seen = {ts_key for ts_key, _, _ in result}
    try:
        from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
        discover_plugins()  # idempotent — ensures plugins are loaded
-        result.extend(get_plugin_toolsets())
+        for entry in get_plugin_toolsets():
+            if entry[0] in seen:
+                continue
+            seen.add(entry[0])
+            result.append(entry)
    except Exception:
        pass
    return result
@@ -591,7 +624,7 @@ def _get_platform_tools(
    include_default_mcp_servers: bool = True,
 ) -> Set[str]:
    """Resolve which individual toolset names are enabled for a platform."""
-    from toolsets import resolve_toolset
+    from toolsets import resolve_toolset, TOOLSETS

    platform_toolsets = config.get("platform_toolsets") or {}
    toolset_names = platform_toolsets.get(platform)
@@ -605,6 +638,8 @@ def _get_platform_tools(
    toolset_names = [str(ts) for ts in toolset_names]

    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    plugin_ts_keys = _get_plugin_toolset_keys()
+    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}

    # If the saved list contains any configurable keys directly, the user
    # has explicitly configured this platform — use direct membership.
@@ -614,7 +649,10 @@ def _get_platform_tools(
    has_explicit_config = any(ts in configurable_keys for ts in toolset_names)

    if has_explicit_config:
-        enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
+        enabled_toolsets = {
+            ts for ts in toolset_names
+            if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
+        }
    else:
        # No explicit config — fall back to resolving composite toolset names
        # (e.g. "hermes-cli") to individual tool names and reverse-mapping.
@@ -624,14 +662,52 @@ def _get_platform_tools(

        enabled_toolsets = set()
        for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
+            if not _toolset_allowed_for_platform(ts_key, platform):
+                continue
            ts_tools = set(resolve_toolset(ts_key))
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)
+
        default_off = set(_DEFAULT_OFF_TOOLSETS)
-        if platform in default_off:
+        # Legacy safety: if the platform's own name matches a default-off
+        # toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
+        # keep that toolset enabled on first install.  Skip this dodge for
+        # platform-restricted toolsets — those are always opt-in even on
+        # their own platform (e.g. `discord` + `discord` should stay OFF).
+        if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
            default_off.remove(platform)
        enabled_toolsets -= default_off

+    # Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
+    # feishu_drive).  These are part of the platform's default composite but
+    # absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
+    # checklist or in a user-saved config.  Must run in BOTH branches —
+    # otherwise saving via `hermes tools` (which flips has_explicit_config
+    # to True) silently drops them.
+    platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
+    configurable_tool_universe = set()
+    for ck in configurable_keys:
+        configurable_tool_universe.update(resolve_toolset(ck))
+    claimed = set()
+    for ts_key in enabled_toolsets:
+        claimed.update(resolve_toolset(ts_key))
+    skip = configurable_keys | plugin_ts_keys | platform_default_keys
+    skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
+    skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
+    for ts_key, ts_def in TOOLSETS.items():
+        if ts_key in skip:
+            continue
+        if ts_def.get("includes"):
+            continue
+        ts_tools = set(resolve_toolset(ts_key))
+        if not ts_tools or not ts_tools.issubset(platform_tool_universe):
+            continue
+        if ts_tools.issubset(configurable_tool_universe):
+            continue
+        if not ts_tools.issubset(claimed):
+            enabled_toolsets.add(ts_key)
+            claimed.update(ts_tools)
+
    # Plugin toolsets: enabled by default unless explicitly disabled, or
    # unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
    # shipped as a bundled plugin but user must opt in via `hermes tools`
@@ -639,7 +715,6 @@ def _get_platform_tools(
    # A plugin toolset is "known" for a platform once `hermes tools`
    # has been saved for that platform (tracked via known_plugin_toolsets).
    # Unknown plugins default to enabled; known-but-absent = disabled.
-    plugin_ts_keys = _get_plugin_toolset_keys()
    if plugin_ts_keys:
        known_map = config.get("known_plugin_toolsets", {})
        known_for_platform = set(known_map.get(platform, []))
@@ -657,7 +732,6 @@ def _get_platform_tools(

    # Preserve any explicit non-configurable toolset entries (for example,
    # custom toolsets or MCP server names saved in platform_toolsets).
-    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
    explicit_passthrough = {
        ts
        for ts in toolset_names
@@ -703,6 +777,14 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    """
    config.setdefault("platform_toolsets", {})

+    # Drop platform-scoped toolsets that don't apply here.  Prevents the
+    # "Configure all platforms" checklist (or a hand-edited config.yaml)
+    # from turning on, say, the `discord` toolset for Telegram.
+    enabled_toolset_keys = {
+        ts for ts in enabled_toolset_keys
+        if _toolset_allowed_for_platform(ts, platform)
+    }
+
    # Get the set of all configurable toolset keys (built-in + plugin)
    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
    plugin_keys = _get_plugin_toolset_keys()
@@ -717,6 +799,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
    if not isinstance(existing_toolsets, list):
        existing_toolsets = []
+    existing_toolsets = [str(ts) for ts in existing_toolsets]

    # Preserve any entries that are NOT configurable toolsets and NOT platform
    # defaults (i.e. only MCP server names should be preserved)
@@ -724,6 +807,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
        entry for entry in existing_toolsets
        if entry not in configurable_keys and entry not in platform_default_keys
    }
+    # Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
+    # saving from the picker as consent to clear the "no_mcp" sentinel. The
+    # picker has no checkbox for no_mcp, so without this users who once set it
+    # by hand could never re-enable MCP servers through the UI.
+    preserved_entries.discard("no_mcp")

    # Merge preserved entries with new enabled toolsets
    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
@@ -831,7 +919,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    return _tool_token_cache


-def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
+def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
    """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
    from hermes_cli.curses_ui import curses_checklist
    from toolsets import resolve_toolset
@@ -839,7 +927,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
    # Pre-compute per-tool token counts (cached after first call).
    tool_tokens = _estimate_tool_tokens()

-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    # Drop platform-scoped toolsets that don't apply to this platform.
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]

    labels = []
    for ts_key, ts_label, ts_desc in effective:
@@ -1753,7 +1846,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS

            # Show checklist
-            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
+            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)

            added = new_enabled - current_enabled
            removed = current_enabled - new_enabled
@@ -2109,7 +2202,11 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]

 def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
    """Print a summary of enabled/disabled toolsets and MCP tool filters."""
-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]
    builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}

    print(f"Built-in toolsets ({platform}):")
@@ -2175,6 +2272,20 @@ def tools_disable_enable_command(args):
            _print_error(f"Unknown toolset '{name}'")
        toolset_targets = [t for t in toolset_targets if t in valid_toolsets]

+    # Reject platform-scoped toolsets on platforms that don't allow them.
+    restricted_targets = [
+        t for t in toolset_targets
+        if not _toolset_allowed_for_platform(t, platform)
+    ]
+    if restricted_targets:
+        for name in restricted_targets:
+            allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
+            _print_error(
+                f"Toolset '{name}' is not available on platform '{platform}' "
+                f"(only: {', '.join(allowed)})"
+            )
+        toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
+
    if toolset_targets:
        _apply_toolset_change(config, platform, toolset_targets, action)

@@ -288,30 +288,34 @@ def get_tool_definitions(
                filtered_tools[i] = {"type": "function", "function": dynamic_schema}
                break

-    # Rebuild discord_server schema based on the bot's privileged intents
-    # (detected from GET /applications/@me) and the user's action allowlist
-    # in config.  Hides actions the bot's intents don't support so the
-    # model never attempts them, and annotates fetch_messages when the
+    # Rebuild discord / discord_admin schemas based on the bot's privileged
+    # intents (detected from GET /applications/@me) and the user's action
+    # allowlist in config.  Hides actions the bot's intents don't support so
+    # the model never attempts them, and annotates fetch_messages when the
    # MESSAGE_CONTENT intent is missing.
-    if "discord_server" in available_tool_names:
-        try:
-            from tools.discord_tool import get_dynamic_schema
-            dynamic = get_dynamic_schema()
-        except Exception:  # pragma: no cover — defensive, fall back to static
-            dynamic = None
-        if dynamic is None:
-            # Tool filtered out entirely (empty allowlist or detection disabled
-            # the only remaining actions).  Drop it from the schema list.
-            filtered_tools = [
-                t for t in filtered_tools
-                if t.get("function", {}).get("name") != "discord_server"
-            ]
-            available_tool_names.discard("discord_server")
-        else:
-            for i, td in enumerate(filtered_tools):
-                if td.get("function", {}).get("name") == "discord_server":
-                    filtered_tools[i] = {"type": "function", "function": dynamic}
-                    break
+    _discord_schema_fns = {
+        "discord": "get_dynamic_schema_core",
+        "discord_admin": "get_dynamic_schema_admin",
+    }
+    for discord_tool_name in _discord_schema_fns:
+        if discord_tool_name in available_tool_names:
+            try:
+                from tools import discord_tool as _dt
+                schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
+                dynamic = schema_fn()
+            except Exception:
+                dynamic = None
+            if dynamic is None:
+                filtered_tools = [
+                    t for t in filtered_tools
+                    if t.get("function", {}).get("name") != discord_tool_name
+                ]
+                available_tool_names.discard(discord_tool_name)
+            else:
+                for i, td in enumerate(filtered_tools):
+                    if td.get("function", {}).get("name") == discord_tool_name:
+                        filtered_tools[i] = {"type": "function", "function": dynamic}
+                        break

    # Strip web tool cross-references from browser_navigate description when
    # web_search / web_extract are not available.  The static schema says
@@ -91,4 +91,29 @@

  // Register this plugin — the dashboard picks it up automatically.
  window.__HERMES_PLUGINS__.register("example", ExamplePage);
+
+  // ─────────────────────────────────────────────────────────────────────
+  // Page-scoped slot demo: inject a small banner at the top of /sessions.
+  //
+  // Built-in pages expose named slots (<page>:top, <page>:bottom) that
+  // plugins can populate without overriding the whole route. The
+  // manifest lists the slots we use in its `slots` array so the shell
+  // knows to render <PluginSlot name="sessions:top" /> there.
+  // ─────────────────────────────────────────────────────────────────────
+  function SessionsTopBanner() {
+    return React.createElement(Card, {
+      className: "border-dashed",
+    },
+      React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
+        React.createElement(Badge, { variant: "outline" }, "Example"),
+        React.createElement("span", {
+          className: "text-xs text-muted-foreground",
+        }, "This banner was injected into the Sessions page by the example plugin via the ",
+          React.createElement("code", { className: "font-courier" }, "sessions:top"),
+          " slot."),
+      ),
+    );
+  }
+
+  window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
 })();
@@ -8,6 +8,7 @@
    "path": "/example",
    "position": "after:skills"
  },
+  "slots": ["sessions:top"],
  "entry": "dist/index.js",
  "api": "plugin_api.py"
 }
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
 # ---------------------------------------------------------------------------
 # Process-level atexit safety net — ensures pending sessions are committed
 # even if shutdown_memory_provider is never called (e.g. gateway crash,
-# SIGKILL, or exception in _async_flush_memories preventing shutdown).
+# SIGKILL, or exception in the session expiry watcher preventing shutdown).
 # ---------------------------------------------------------------------------
 _last_active_provider: Optional["OpenVikingMemoryProvider"] = None

@@ -1578,7 +1578,6 @@ class AIAgent:
        self._memory_enabled = False
        self._user_profile_enabled = False
        self._memory_nudge_interval = 10
-        self._memory_flush_min_turns = 6
        self._turns_since_memory = 0
        self._iters_since_skill = 0
        if not skip_memory:
@@ -1587,7 +1586,6 @@ class AIAgent:
                self._memory_enabled = mem_config.get("memory_enabled", False)
                self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
                self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
-                self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
                if self._memory_enabled or self._user_profile_enabled:
                    from tools.memory_tool import MemoryStore
                    self._memory_store = MemoryStore(
@@ -2399,6 +2397,7 @@ class AIAgent:
                base_url=aux_base_url,
                api_key=aux_api_key,
                config_context_length=getattr(self, "_aux_compression_context_length_config", None),
+                provider=getattr(self, "provider", ""),
            )

            # Hard floor: the auxiliary compression model must have at least
@@ -2425,6 +2424,11 @@ class AIAgent:
                # compression actually works this session.  The hard floor
                # above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
                # so the new threshold is always >= 64K.
+                #
+                # The compression summariser sends a single user-role
+                # prompt (no system prompt, no tools) to the aux model, so
+                # new_threshold == aux_context is safe: the request is
+                # the raw messages plus a small summarisation instruction.
                old_threshold = threshold
                new_threshold = aux_context
                self.context_compressor.threshold_tokens = new_threshold
@@ -5137,6 +5141,8 @@ class AIAgent:
        # response.incomplete instead of response.completed).
        self._codex_streamed_text_parts: list = []
        for attempt in range(max_stream_retries + 1):
+            if self._interrupt_requested:
+                raise InterruptedError("Agent interrupted before Codex stream retry")
            collected_output_items: list = []
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
@@ -6306,6 +6312,14 @@ class AIAgent:

            try:
                for _stream_attempt in range(_max_stream_retries + 1):
+                    # Check for interrupt before each retry attempt.  Without
+                    # this, /stop closes the HTTP connection (outer poll loop),
+                    # but the retry loop opens a FRESH connection — negating the
+                    # interrupt entirely.  On slow providers (ollama-cloud) each
+                    # retry can block for the full stream-read timeout (120s+),
+                    # causing multi-minute delays between /stop and response.
+                    if self._interrupt_requested:
+                        raise InterruptedError("Agent interrupted before stream retry")
                    try:
                        if self.api_mode == "anthropic_messages":
                            self._try_refresh_anthropic_client_credentials()
@@ -7910,251 +7924,6 @@ class AIAgent:
        """
        return self.api_mode != "codex_responses"

-    def flush_memories(self, messages: list = None, min_turns: int = None):
-        """Give the model one turn to persist memories before context is lost.
-
-        Called before compression, session reset, or CLI exit. Injects a flush
-        message, makes one API call, executes any memory tool calls, then
-        strips all flush artifacts from the message list.
-
-        Args:
-            messages: The current conversation messages. If None, uses
-                      self._session_messages (last run_conversation state).
-            min_turns: Minimum user turns required to trigger the flush.
-                       None = use config value (flush_min_turns).
-                       0 = always flush (used for compression).
-        """
-        if self._memory_flush_min_turns == 0 and min_turns is None:
-            return
-        if "memory" not in self.valid_tool_names or not self._memory_store:
-            return
-        effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
-        if self._user_turn_count < effective_min:
-            return
-
-        if messages is None:
-            messages = getattr(self, '_session_messages', None)
-        if not messages or len(messages) < 3:
-            return
-
-        flush_content = (
-            "[System: The session is being compressed. "
-            "Save anything worth remembering — prioritize user preferences, "
-            "corrections, and recurring patterns over task-specific details.]"
-        )
-        _sentinel = f"__flush_{id(self)}_{time.monotonic()}"
-        flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
-        messages.append(flush_msg)
-
-        try:
-            # Build API messages for the flush call
-            _needs_sanitize = self._should_sanitize_tool_calls()
-            api_messages = []
-            for msg in messages:
-                api_msg = msg.copy()
-                self._copy_reasoning_content_for_api(msg, api_msg)
-                api_msg.pop("reasoning", None)
-                api_msg.pop("finish_reason", None)
-                api_msg.pop("_flush_sentinel", None)
-                api_msg.pop("_thinking_prefill", None)
-                if _needs_sanitize:
-                    self._sanitize_tool_calls_for_strict_api(api_msg)
-                api_messages.append(api_msg)
-
-            if self._cached_system_prompt:
-                api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
-
-            # Make one API call with only the memory tool available
-            memory_tool_def = None
-            for t in (self.tools or []):
-                if t.get("function", {}).get("name") == "memory":
-                    memory_tool_def = t
-                    break
-
-            if not memory_tool_def:
-                messages.pop()  # remove flush msg
-                return
-
-            # Use auxiliary client for the flush call when available --
-            # it's cheaper and avoids Codex Responses API incompatibility.
-            from agent.auxiliary_client import (
-                call_llm as _call_llm,
-                _fixed_temperature_for_model,
-                OMIT_TEMPERATURE,
-            )
-            _aux_available = True
-            # Kimi models manage temperature server-side — omit it entirely.
-            # Other models with a fixed contract get that value; everyone else
-            # gets the historical 0.3 default.
-            _fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
-            _omit_temperature = _fixed_temp is OMIT_TEMPERATURE
-            if _omit_temperature:
-                _flush_temperature = None
-            elif _fixed_temp is not None:
-                _flush_temperature = _fixed_temp
-            else:
-                _flush_temperature = 0.3
-            aux_error = None
-            try:
-                response = _call_llm(
-                    task="flush_memories",
-                    messages=api_messages,
-                    tools=[memory_tool_def],
-                    temperature=_flush_temperature,
-                    max_tokens=5120,
-                    # timeout resolved from auxiliary.flush_memories.timeout config
-                )
-            except Exception as e:
-                aux_error = e
-                _aux_available = False
-                response = None
-
-            if not _aux_available and self.api_mode == "codex_responses":
-                # No auxiliary client -- use the Codex Responses path directly
-                codex_kwargs = self._build_api_kwargs(api_messages)
-                _ct_flush = self._get_transport()
-                if _ct_flush is not None:
-                    codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
-                elif not codex_kwargs.get("tools"):
-                    codex_kwargs["tools"] = [memory_tool_def]
-                if _flush_temperature is not None:
-                    codex_kwargs["temperature"] = _flush_temperature
-                else:
-                    codex_kwargs.pop("temperature", None)
-                if "max_output_tokens" in codex_kwargs:
-                    codex_kwargs["max_output_tokens"] = 5120
-                response = self._run_codex_stream(codex_kwargs)
-            elif not _aux_available and self.api_mode == "anthropic_messages":
-                # Native Anthropic — use the transport for kwargs
-                _tflush = self._get_transport()
-                ant_kwargs = _tflush.build_kwargs(
-                    model=self.model, messages=api_messages,
-                    tools=[memory_tool_def], max_tokens=5120,
-                    reasoning_config=None,
-                    preserve_dots=self._anthropic_preserve_dots(),
-                )
-                response = self._anthropic_messages_create(ant_kwargs)
-            elif not _aux_available:
-                api_kwargs = {
-                    "model": self.model,
-                    "messages": api_messages,
-                    "tools": [memory_tool_def],
-                    **self._max_tokens_param(5120),
-                }
-                if _flush_temperature is not None:
-                    api_kwargs["temperature"] = _flush_temperature
-                from agent.auxiliary_client import _get_task_timeout
-                response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
-                    **api_kwargs, timeout=_get_task_timeout("flush_memories")
-                )
-
-            if aux_error is not None:
-                logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
-                self._emit_auxiliary_failure("memory flush", aux_error)
-
-            def _openai_tool_calls(resp):
-                if resp is not None and hasattr(resp, "choices") and resp.choices:
-                    msg = getattr(resp.choices[0], "message", None)
-                    calls = getattr(msg, "tool_calls", None)
-                    if calls:
-                        return calls
-                return []
-
-            def _codex_output_tool_calls(resp):
-                calls = []
-                for item in getattr(resp, "output", []) or []:
-                    if getattr(item, "type", None) == "function_call":
-                        calls.append(SimpleNamespace(
-                            id=getattr(item, "call_id", None),
-                            type="function",
-                            function=SimpleNamespace(
-                                name=getattr(item, "name", ""),
-                                arguments=getattr(item, "arguments", "{}"),
-                            ),
-                        ))
-                return calls
-
-            # Extract tool calls from the response, handling all API formats
-            tool_calls = []
-            if self.api_mode == "codex_responses" and not _aux_available:
-                _ct_flush = self._get_transport()
-                _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
-                if _cnr_flush and _cnr_flush.tool_calls:
-                    tool_calls = [
-                        SimpleNamespace(
-                            id=tc.id, type="function",
-                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _cnr_flush.tool_calls
-                    ]
-                else:
-                    tool_calls = _codex_output_tool_calls(response)
-            elif self.api_mode == "anthropic_messages" and not _aux_available:
-                _tfn = self._get_transport()
-                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_result and _flush_result.tool_calls:
-                    tool_calls = [
-                        SimpleNamespace(
-                            id=tc.id, type="function",
-                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _flush_result.tool_calls
-                    ]
-            elif self.api_mode in ("chat_completions", "bedrock_converse"):
-                # chat_completions / bedrock — normalize through transport
-                _tfn = self._get_transport()
-                _flush_result = _tfn.normalize_response(response) if _tfn is not None else None
-                if _flush_result and _flush_result.tool_calls:
-                    tool_calls = _flush_result.tool_calls
-                else:
-                    tool_calls = _openai_tool_calls(response)
-            elif _aux_available and hasattr(response, "choices") and response.choices:
-                # Auxiliary client returned OpenAI-shaped response while main
-                # api_mode is codex/anthropic — extract tool_calls from .choices
-                tool_calls = _openai_tool_calls(response)
-
-            for tc in tool_calls:
-                if tc.function.name == "memory":
-                    try:
-                        args = json.loads(tc.function.arguments)
-                        flush_target = args.get("target", "memory")
-                        from tools.memory_tool import memory_tool as _memory_tool
-                        _memory_tool(
-                            action=args.get("action"),
-                            target=flush_target,
-                            content=args.get("content"),
-                            old_text=args.get("old_text"),
-                            store=self._memory_store,
-                        )
-                        if self._memory_manager and args.get("action") in ("add", "replace"):
-                            try:
-                                self._memory_manager.on_memory_write(
-                                    args.get("action", ""),
-                                    flush_target,
-                                    args.get("content", ""),
-                                    metadata=self._build_memory_write_metadata(
-                                        write_origin="memory_flush",
-                                        execution_context="flush_memories",
-                                    ),
-                                )
-                            except Exception:
-                                pass
-                        if not self.quiet_mode:
-                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                    except Exception as e:
-                        logger.warning("Memory flush tool call failed: %s", e)
-                        self._emit_auxiliary_failure("memory flush tool", e)
-        except Exception as e:
-            logger.warning("Memory flush API call failed: %s", e)
-            self._emit_auxiliary_failure("memory flush", e)
-        finally:
-            # Strip flush artifacts: remove everything from the flush message onward.
-            # Use sentinel marker instead of identity check for robustness.
-            while messages and messages[-1].get("_flush_sentinel") != _sentinel:
-                messages.pop()
-                if not messages:
-                    break
-            if messages and messages[-1].get("_flush_sentinel") == _sentinel:
-                messages.pop()
-
    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
        """Compress conversation context and split the session in SQLite.

@@ -8173,8 +7942,6 @@ class AIAgent:
            f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
            focus_topic,
        )
-        # Pre-compression memory flush: let the model save memories before they're lost
-        self.flush_memories(messages, min_turns=0)

        # Notify external memory provider before compression discards context
        if self._memory_manager:
@@ -29,10 +29,25 @@ BOLD='\033[1m'
 REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
 REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
 HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
-INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
+# INSTALL_DIR is resolved AFTER arg parsing and OS detection so we can pick an
+# FHS-style layout for root installs.  Track whether the user gave us an
+# explicit directory — if so we never override it.
+if [ -n "${HERMES_INSTALL_DIR:-}" ]; then
+    INSTALL_DIR="$HERMES_INSTALL_DIR"
+    INSTALL_DIR_EXPLICIT=true
+else
+    INSTALL_DIR=""
+    INSTALL_DIR_EXPLICIT=false
+fi
 PYTHON_VERSION="3.11"
 NODE_VERSION="22"

+# FHS-style root install layout (set by resolve_install_layout when applicable):
+#   code at /usr/local/lib/hermes-agent, command at /usr/local/bin/hermes,
+#   data still at /root/.hermes (HERMES_HOME).  Matches Claude Code / Codex CLI
+#   and keeps Docker bind-mounted /root/ volumes lean.
+ROOT_FHS_LAYOUT=false
+
 # Options
 USE_VENV=true
 RUN_SETUP=true
@@ -64,6 +79,7 @@ while [[ $# -gt 0 ]]; do
            ;;
        --dir)
            INSTALL_DIR="$2"
+            INSTALL_DIR_EXPLICIT=true
            shift 2
            ;;
        --hermes-home)
@@ -79,9 +95,20 @@ while [[ $# -gt 0 ]]; do
            echo "  --no-venv      Don't create virtual environment"
            echo "  --skip-setup   Skip interactive setup wizard"
            echo "  --branch NAME  Git branch to install (default: main)"
-            echo "  --dir PATH     Installation directory (default: ~/.hermes/hermes-agent)"
+            echo "  --dir PATH     Installation directory"
+            echo "                   default (non-root):  ~/.hermes/hermes-agent"
+            echo "                   default (root, Linux): /usr/local/lib/hermes-agent"
            echo "  --hermes-home PATH  Data directory (default: ~/.hermes, or \$HERMES_HOME)"
            echo "  -h, --help     Show this help"
+            echo ""
+            echo "Notes:"
+            echo "  When running as root on Linux, Hermes installs the code under"
+            echo "  /usr/local/lib/hermes-agent and links the command into"
+            echo "  /usr/local/bin/hermes (FHS layout — matches Claude Code / Codex CLI)."
+            echo "  Data, config, sessions, and logs still live in \$HERMES_HOME"
+            echo "  (default /root/.hermes).  This keeps Docker bind-mounted volumes"
+            echo "  small and ensures the command is on PATH for all shells."
+            echo "  Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place."
            exit 0
            ;;
        *)
@@ -163,9 +190,60 @@ is_termux() {
    [ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]]
 }

+# Decide where the repo checkout + venv live, and where the `hermes` command
+# symlink goes.  Called after detect_os so $OS/$DISTRO are known.
+#
+# Defaults:
+#   - Non-root, any OS:       INSTALL_DIR = $HERMES_HOME/hermes-agent
+#                             command link in $HOME/.local/bin
+#   - Termux (any uid):       INSTALL_DIR = $HERMES_HOME/hermes-agent
+#                             command link in $PREFIX/bin (already on PATH)
+#   - Root on Linux (new):    INSTALL_DIR = /usr/local/lib/hermes-agent
+#                             command link in /usr/local/bin
+#                             (unless a legacy install already exists at
+#                              $HERMES_HOME/hermes-agent — then preserve it)
+#
+# Always no-op when the user set --dir or $HERMES_INSTALL_DIR.
+resolve_install_layout() {
+    if [ "$INSTALL_DIR_EXPLICIT" = true ]; then
+        log_info "Install directory: $INSTALL_DIR (explicit)"
+        return 0
+    fi
+
+    # Termux: package manager manages /data/data/..., keep code in HERMES_HOME.
+    if is_termux; then
+        INSTALL_DIR="$HERMES_HOME/hermes-agent"
+        return 0
+    fi
+
+    # Root on Linux: prefer FHS layout unless a legacy install already exists.
+    # macOS root installs keep the legacy layout because /usr/local/ on macOS
+    # is Homebrew territory and we don't want to fight that.
+    if [ "$OS" = "linux" ] && [ "$(id -u)" -eq 0 ]; then
+        if [ -d "$HERMES_HOME/hermes-agent/.git" ]; then
+            INSTALL_DIR="$HERMES_HOME/hermes-agent"
+            log_info "Existing install detected at $INSTALL_DIR — keeping legacy layout"
+            log_info "  (new root installs use /usr/local/lib/hermes-agent)"
+            return 0
+        fi
+        INSTALL_DIR="/usr/local/lib/hermes-agent"
+        ROOT_FHS_LAYOUT=true
+        log_info "Root install on Linux — using FHS layout"
+        log_info "  Code:    $INSTALL_DIR"
+        log_info "  Command: /usr/local/bin/hermes"
+        log_info "  Data:    $HERMES_HOME (unchanged)"
+        return 0
+    fi
+
+    # Default: non-root, non-Termux → legacy user-scoped layout.
+    INSTALL_DIR="$HERMES_HOME/hermes-agent"
+}
+
 get_command_link_dir() {
    if is_termux && [ -n "${PREFIX:-}" ]; then
        echo "$PREFIX/bin"
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo "/usr/local/bin"
    else
        echo "$HOME/.local/bin"
    fi
@@ -174,6 +252,8 @@ get_command_link_dir() {
 get_command_link_display_dir() {
    if is_termux && [ -n "${PREFIX:-}" ]; then
        echo '$PREFIX/bin'
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo '/usr/local/bin'
    else
        echo '~/.local/bin'
    fi
@@ -975,6 +1055,14 @@ setup_path() {
        return 0
    fi

+    # FHS layout: /usr/local/bin is on PATH for every standard shell, nothing to inject.
+    if [ "$ROOT_FHS_LAYOUT" = true ]; then
+        export PATH="$command_link_dir:$PATH"
+        log_info "/usr/local/bin is already on PATH for all shells"
+        log_success "hermes command ready"
+        return 0
+    fi
+
    # Check if ~/.local/bin is on PATH; if not, add it to shell config.
    # Detect the user's actual login shell (not the shell running this script,
    # which is always bash when piped from curl).
@@ -1339,12 +1427,12 @@ print_success() {
    echo ""

    # Show file locations
-    echo -e "${CYAN}${BOLD}📁 Your files (all in ~/.hermes/):${NC}"
+    echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
    echo ""
-    echo -e "   ${YELLOW}Config:${NC}    ~/.hermes/config.yaml"
-    echo -e "   ${YELLOW}API Keys:${NC}  ~/.hermes/.env"
-    echo -e "   ${YELLOW}Data:${NC}      ~/.hermes/cron/, sessions/, logs/"
-    echo -e "   ${YELLOW}Code:${NC}      ~/.hermes/hermes-agent/"
+    echo -e "   ${YELLOW}Config:${NC}    $HERMES_HOME/config.yaml"
+    echo -e "   ${YELLOW}API Keys:${NC}  $HERMES_HOME/.env"
+    echo -e "   ${YELLOW}Data:${NC}      $HERMES_HOME/cron/, sessions/, logs/"
+    echo -e "   ${YELLOW}Code:${NC}      $INSTALL_DIR"
    echo ""

    echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
@@ -1364,6 +1452,9 @@ print_success() {
    if [ "$DISTRO" = "termux" ]; then
        echo -e "${YELLOW}⚡ 'hermes' was linked into $(get_command_link_display_dir), which is already on PATH in Termux.${NC}"
        echo ""
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo -e "${YELLOW}⚡ 'hermes' was linked into /usr/local/bin and is ready to use — no shell reload needed.${NC}"
+        echo ""
    else
        echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}"
        echo ""
@@ -1415,6 +1506,7 @@ main() {
    print_banner

    detect_os
+    resolve_install_layout
    install_uv
    check_python
    check_git
@@ -92,6 +92,7 @@ AUTHOR_MAP = {
    "104278804+Sertug17@users.noreply.github.com": "Sertug17",
    "112503481+caentzminger@users.noreply.github.com": "caentzminger",
    "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
+    "xydarcher@uestc.edu.cn": "Readon",
    "sir_even@icloud.com": "sirEven",
    "36056348+sirEven@users.noreply.github.com": "sirEven",
    "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
@@ -503,6 +504,8 @@ AUTHOR_MAP = {
    "codex@openai.invalid": "teknium1",
    "screenmachine@gmail.com": "teknium1",
    "chenzeshi@live.com": "chen1749144759",
+    "mor.aleksandr@yahoo.com": "MorAlekss",
+    "ash@users.noreply.github.com": "ash",
 }


@@ -386,7 +386,7 @@ class TestProvidersDictApiModeAnthropicMessages:
                },
            },
            "auxiliary": {
-                "flush_memories": {
+                "compression": {
                    "provider": "myrelay",
                    "model": "claude-sonnet-4.6",
                },
@@ -399,11 +399,11 @@ class TestProvidersDictApiModeAnthropicMessages:
            AnthropicAuxiliaryClient,
            AsyncAnthropicAuxiliaryClient,
        )
-        async_client, async_model = get_async_text_auxiliary_client("flush_memories")
+        async_client, async_model = get_async_text_auxiliary_client("compression")
        assert isinstance(async_client, AsyncAnthropicAuxiliaryClient)
        assert async_model == "claude-sonnet-4.6"

-        sync_client, sync_model = get_text_auxiliary_client("flush_memories")
+        sync_client, sync_model = get_text_auxiliary_client("compression")
        assert isinstance(sync_client, AnthropicAuxiliaryClient)
        assert sync_model == "claude-sonnet-4.6"

@@ -847,6 +847,32 @@ class TestTokenBudgetTailProtection:
        assert isinstance(pruned, int)


+class TestUpdateModelBudgets:
+    """Regression: update_model() must recalculate token budgets."""
+
+    def test_tail_budget_recalculated(self):
+        """tail_token_budget must change after switching to a different context length."""
+        from unittest.mock import patch
+        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
+            comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
+        old_tail = comp.tail_token_budget
+        old_max_summary = comp.max_summary_tokens
+
+        comp.update_model("model-b", context_length=32_000)
+        assert comp.tail_token_budget != old_tail, "tail_token_budget should change"
+        assert comp.tail_token_budget < old_tail, "smaller context → smaller budget"
+        assert comp.max_summary_tokens != old_max_summary, "max_summary_tokens should change"
+
+    def test_budgets_proportional(self):
+        """Budgets should be proportional to context_length after update."""
+        from unittest.mock import patch
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
+        comp.update_model("model-b", context_length=10_000)
+        assert comp.tail_token_budget == int(comp.threshold_tokens * comp.summary_target_ratio)
+        assert comp.max_summary_tokens == min(int(10_000 * 0.05), 4000)
+
+
 class TestTruncateToolCallArgsJson:
    """Regression tests for #11762.

@@ -0,0 +1,201 @@
+"""Regression tests for the generic unsupported-parameter detector in
+``agent.auxiliary_client``.
+
+The original temperature-specific detector (PR #15621) was generalized so the
+same reactive-retry strategy covers any provider that rejects an arbitrary
+request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not
+just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization
+pattern.
+
+These tests lock in:
+  * ``_is_unsupported_parameter_error(exc, param)`` across common phrasings
+  * the back-compat wrapper ``_is_unsupported_temperature_error`` still works
+  * the max_tokens retry branch no longer pops a key that was never set
+    (``max_tokens is None`` gate)
+  * the max_tokens retry branch matches via the generic helper on top of the
+    legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks
+"""
+
+from unittest.mock import patch, MagicMock, AsyncMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    call_llm,
+    async_call_llm,
+    _is_unsupported_parameter_error,
+    _is_unsupported_temperature_error,
+)
+
+
+class TestIsUnsupportedParameterError:
+    """The generic detector must match real provider phrasings for any param."""
+
+    @pytest.mark.parametrize("param,message", [
+        # temperature phrasings (regression coverage via the generic API)
+        ("temperature", "HTTP 400: Unsupported parameter: temperature"),
+        ("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"),
+        ("temperature", "this model does not support temperature"),
+        # max_tokens phrasings
+        ("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"),
+        ("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"),
+        ("max_tokens", "Invalid parameter: max_tokens is not supported"),
+        # arbitrary future params
+        ("seed", "HTTP 400: unrecognized parameter: seed"),
+        ("top_p", "Error: top_p is not supported for this model"),
+    ])
+    def test_matches_real_provider_messages(self, param, message):
+        assert _is_unsupported_parameter_error(RuntimeError(message), param) is True
+
+    @pytest.mark.parametrize("param,message", [
+        # Param not mentioned at all
+        ("temperature", "HTTP 400: max_tokens is too large"),
+        # Param mentioned but not flagged as unsupported
+        ("temperature", "temperature must be between 0 and 2"),
+        # Totally unrelated 400
+        ("max_tokens", "Rate limit exceeded"),
+        # Connection-level errors
+        ("temperature", "Connection reset by peer"),
+    ])
+    def test_does_not_match_unrelated_errors(self, param, message):
+        assert _is_unsupported_parameter_error(RuntimeError(message), param) is False
+
+    def test_empty_param_returns_false(self):
+        assert _is_unsupported_parameter_error(
+            RuntimeError("HTTP 400: Unsupported parameter: temperature"), ""
+        ) is False
+
+    def test_temperature_wrapper_delegates_to_generic(self):
+        """Back-compat: ``_is_unsupported_temperature_error`` still routes through."""
+        msg = "HTTP 400: Unsupported parameter: temperature"
+        assert _is_unsupported_temperature_error(RuntimeError(msg)) is True
+        # And the unrelated-case still holds
+        assert _is_unsupported_temperature_error(
+            RuntimeError("max_tokens is too large")) is False
+
+
+def _dummy_response():
+    """Sentinel — real code calls ``_validate_llm_response`` which we patch out."""
+    return {"ok": True}
+
+
+class TestMaxTokensRetryHardening:
+    """The max_tokens retry branch now (a) gates on ``max_tokens is not None``
+    and (b) also matches the generic phrasings via the helper.
+    """
+
+    def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self):
+        """No max_tokens kwarg → must not pop/retry even if the error mentions it.
+
+        Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but
+        ``kwargs["max_completion_tokens"] = max_tokens`` would set a None
+        value and hit the provider again. The gate skips the whole branch.
+        """
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
+        client.chat.completions.create.side_effect = err
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            with pytest.raises(RuntimeError):
+                call_llm(
+                    task="session_search",
+                    messages=[{"role": "user", "content": "hi"}],
+                    temperature=0.3,
+                    # max_tokens omitted on purpose
+                )
+
+        # Only the initial attempt — no retry because the gate blocked it
+        assert client.chat.completions.create.call_count == 1
+
+    def test_sync_max_tokens_retry_matches_generic_phrasing(self):
+        """A 400 saying "Unknown parameter: max_tokens" (not the legacy
+        substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token)
+        now triggers the retry via the generic helper.
+        """
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("Unknown parameter: max_tokens")
+        response = _dummy_response()
+        client.chat.completions.create.side_effect = [err, response]
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+                temperature=0.3,
+                max_tokens=512,
+            )
+
+        assert result is response
+        assert client.chat.completions.create.call_count == 2
+        second_call = client.chat.completions.create.call_args_list[1]
+        assert "max_tokens" not in second_call.kwargs
+        assert second_call.kwargs["max_completion_tokens"] == 512
+
+    @pytest.mark.asyncio
+    async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
+        client.chat.completions.create = AsyncMock(side_effect=err)
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            with pytest.raises(RuntimeError):
+                await async_call_llm(
+                    task="session_search",
+                    messages=[{"role": "user", "content": "hi"}],
+                    temperature=0.3,
+                )
+
+        assert client.chat.completions.create.call_count == 1
+
+    @pytest.mark.asyncio
+    async def test_async_max_tokens_retry_matches_generic_phrasing(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        err = RuntimeError("Unknown parameter: max_tokens")
+        response = _dummy_response()
+        client.chat.completions.create = AsyncMock(side_effect=[err, response])
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "hi"}],
+                temperature=0.3,
+                max_tokens=512,
+            )
+
+        assert result is response
+        assert client.chat.completions.create.await_count == 2
+        second_call = client.chat.completions.create.call_args_list[1]
+        assert "max_tokens" not in second_call.kwargs
+        assert second_call.kwargs["max_completion_tokens"] == 512
@@ -0,0 +1,237 @@
+"""Regression tests for the universal "unsupported temperature" retry in
+``agent.auxiliary_client``.
+
+Auxiliary callers (context compression, session search,
+web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
+reasons. Several provider/model combinations reject ``temperature`` with a
+400:
+
+  * OpenAI Responses (gpt-5/o-series reasoning models)
+  * Copilot Responses (reasoning models)
+  * OpenRouter reasoning models (gpt-5.5, some anthropic via OAI-compat)
+  * Anthropic Opus 4.7+ via OpenAI-compat endpoints
+  * Kimi/Moonshot (server-managed)
+
+``_fixed_temperature_for_model`` catches Kimi up front, and
+``build_chat_completion_kwargs`` drops temperature for Anthropic Opus 4.7+,
+but the same backend can accept ``temperature`` for some models and reject
+it for others (for example gpt-5.4 accepts but gpt-5.5 rejects on the same
+endpoint). An allow/deny-list is not maintainable across providers.
+
+The universal fix is reactive: when a call returns an
+``Unsupported parameter: temperature`` 400, retry once without temperature.
+These tests lock in that behaviour for both sync and async paths.
+"""
+
+from unittest.mock import patch, MagicMock, AsyncMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    call_llm,
+    async_call_llm,
+    _is_unsupported_temperature_error,
+)
+
+
+class TestIsUnsupportedTemperatureError:
+    """The detector must match the phrasings providers actually return."""
+
+    @pytest.mark.parametrize("message", [
+        # OpenAI / Codex Responses
+        "HTTP 400: Unsupported parameter: temperature",
+        "Error code: 400 - {'error': {'message': \"Unsupported parameter: 'temperature'\"}}",
+        # Copilot / OpenAI error-code form
+        "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
+        # OpenRouter-style
+        "Provider returned error: temperature is not supported for this model",
+        "this model does not support temperature",
+        # Anthropic-style via OAI-compat
+        "temperature: unknown parameter",
+        # Some gateways
+        "unrecognized request argument supplied: temperature",
+    ])
+    def test_matches_real_provider_messages(self, message):
+        assert _is_unsupported_temperature_error(RuntimeError(message)) is True
+
+    @pytest.mark.parametrize("message", [
+        # Unrelated 400s must NOT trigger a silent-retry
+        "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'...",
+        "max_tokens is too large for this model",
+        "Rate limit exceeded",
+        "Connection reset by peer",
+        # Temperature value error is a different class of problem
+        "temperature must be between 0 and 2",
+    ])
+    def test_does_not_match_unrelated_errors(self, message):
+        assert _is_unsupported_temperature_error(RuntimeError(message)) is False
+
+
+def _dummy_response():
+    # The real code calls _validate_llm_response which inspects
+    # response.choices[0].message.  The tests here patch that out, so
+    # any sentinel object is fine.
+    return {"ok": True}
+
+
+class TestCallLlmUnsupportedTemperatureRetry:
+    """``call_llm`` retries once without temperature and returns on success."""
+
+    def _setup(self, first_exc):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        client.chat.completions.create.side_effect = [first_exc, _dummy_response()]
+        return client
+
+    @pytest.mark.parametrize("error_message", [
+        "HTTP 400: Unsupported parameter: temperature",
+        "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
+        "Provider error: this model does not support temperature",
+    ])
+    def test_retries_once_without_temperature(self, error_message):
+        client = self._setup(RuntimeError(error_message))
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "remember this"}],
+                temperature=0.3,
+                max_tokens=500,
+            )
+
+        assert result == {"ok": True}
+        assert client.chat.completions.create.call_count == 2
+        first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
+        retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
+        assert first_kwargs["temperature"] == 0.3
+        assert "temperature" not in retry_kwargs
+        # other kwargs preserved
+        assert retry_kwargs["max_tokens"] == 500
+
+    def test_non_temperature_400_does_not_retry_as_temperature(self):
+        """Unrelated 400s (e.g. bad tool role) must not silently drop temp."""
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        non_temp_err = RuntimeError(
+            "HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'..."
+        )
+        client.chat.completions.create.side_effect = non_temp_err
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._try_payment_fallback",
+                  return_value=None),
+        ):
+            with pytest.raises(RuntimeError, match="Invalid value"):
+                call_llm(
+                    task="compression",
+                    messages=[{"role": "user", "content": "x"}],
+                    temperature=0.3,
+                    max_tokens=500,
+                )
+        # Should NOT have retried (non-temperature 400 doesn't match)
+        assert client.chat.completions.create.call_count == 1
+
+    def test_no_retry_when_temperature_not_in_kwargs(self):
+        """If caller didn't send temperature, don't invent a temperature-retry."""
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        # Provider complains about temperature even though we didn't send it.
+        # (Pathological but possible with misleading error text.)  The guard
+        # ``"temperature" in kwargs`` must prevent an unnecessary retry.
+        err = RuntimeError("HTTP 400: Unsupported parameter: temperature")
+        client.chat.completions.create.side_effect = err
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._try_payment_fallback",
+                  return_value=None),
+        ):
+            with pytest.raises(RuntimeError):
+                call_llm(
+                    task="compression",
+                    messages=[{"role": "user", "content": "x"}],
+                    temperature=None,  # explicit: no temperature sent
+                    max_tokens=500,
+                )
+        assert client.chat.completions.create.call_count == 1
+
+
+class TestAsyncCallLlmUnsupportedTemperatureRetry:
+    """``async_call_llm`` mirror of the sync retry semantics."""
+
+    @pytest.mark.asyncio
+    async def test_async_retries_once_without_temperature(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        client.chat.completions.create = AsyncMock(side_effect=[
+            RuntimeError("HTTP 400: Unsupported parameter: temperature"),
+            _dummy_response(),
+        ])
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+        ):
+            result = await async_call_llm(
+                task="session_search",
+                messages=[{"role": "user", "content": "query"}],
+                temperature=0.3,
+                max_tokens=500,
+            )
+
+        assert result == {"ok": True}
+        assert client.chat.completions.create.await_count == 2
+        first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
+        retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
+        assert first_kwargs["temperature"] == 0.3
+        assert "temperature" not in retry_kwargs
+        assert retry_kwargs["max_tokens"] == 500
+
+    @pytest.mark.asyncio
+    async def test_async_non_temperature_400_does_not_retry(self):
+        client = MagicMock()
+        client.base_url = "https://api.openai.com/v1"
+        client.chat.completions.create = AsyncMock(
+            side_effect=RuntimeError("HTTP 400: Invalid value: 'tool'"),
+        )
+
+        with (
+            patch("agent.auxiliary_client._resolve_task_provider_model",
+                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
+            patch("agent.auxiliary_client._get_cached_client",
+                  return_value=(client, "gpt-5.5")),
+            patch("agent.auxiliary_client._validate_llm_response",
+                  side_effect=lambda resp, _task: resp),
+            patch("agent.auxiliary_client._try_payment_fallback",
+                  return_value=None),
+        ):
+            with pytest.raises(RuntimeError, match="Invalid value"):
+                await async_call_llm(
+                    task="session_search",
+                    messages=[{"role": "user", "content": "x"}],
+                    temperature=0.3,
+                    max_tokens=500,
+                )
+        assert client.chat.completions.create.await_count == 1
@@ -33,7 +33,6 @@ class _FakeAgent:
        self._todo_store.write(
            [{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
        )
-        self.flush_memories = MagicMock()
        self.commit_memory_session = MagicMock()
        self._invalidate_system_prompt = MagicMock()

@@ -157,7 +156,6 @@ def test_new_command_creates_real_fresh_session_and_resets_agent_state(tmp_path)
    assert cli.agent._todo_store.read() == []
    assert cli.session_start > old_session_start
    assert cli.agent.session_start == cli.session_start
-    cli.agent.flush_memories.assert_called_once_with([{"role": "user", "content": "hello"}])
    cli.agent._invalidate_system_prompt.assert_called_once()


@@ -0,0 +1,390 @@
+"""Tests for cron job context_from feature (issue #5439 Option C)."""
+
+import sys
+from pathlib import Path
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+
+@pytest.fixture
+def cron_env(tmp_path, monkeypatch):
+    """Isolated cron environment with temp HERMES_HOME."""
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "cron").mkdir()
+    (hermes_home / "cron" / "output").mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import cron.jobs as jobs_mod
+    monkeypatch.setattr(jobs_mod, "HERMES_DIR", hermes_home)
+    monkeypatch.setattr(jobs_mod, "CRON_DIR", hermes_home / "cron")
+    monkeypatch.setattr(jobs_mod, "JOBS_FILE", hermes_home / "cron" / "jobs.json")
+    monkeypatch.setattr(jobs_mod, "OUTPUT_DIR", hermes_home / "cron" / "output")
+
+    return hermes_home
+
+
+class TestJobContextFromField:
+    """Test that context_from is stored and retrieved correctly."""
+
+    def test_create_job_with_context_from_string(self, cron_env):
+        from cron.jobs import create_job, get_job
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize findings",
+            schedule="every 2h",
+            context_from=job_a["id"],
+        )
+
+        assert job_b["context_from"] == [job_a["id"]]
+        loaded = get_job(job_b["id"])
+        assert loaded["context_from"] == [job_a["id"]]
+
+    def test_create_job_with_context_from_list(self, cron_env):
+        from cron.jobs import create_job, get_job
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(prompt="Find weather", schedule="every 1h")
+        job_c = create_job(
+            prompt="Summarize everything",
+            schedule="every 2h",
+            context_from=[job_a["id"], job_b["id"]],
+        )
+
+        assert job_c["context_from"] == [job_a["id"], job_b["id"]]
+
+    def test_create_job_without_context_from(self, cron_env):
+        from cron.jobs import create_job
+
+        job = create_job(prompt="Hello", schedule="every 1h")
+        assert job.get("context_from") is None
+
+    def test_context_from_empty_string_normalized_to_none(self, cron_env):
+        from cron.jobs import create_job
+
+        job = create_job(prompt="Hello", schedule="every 1h", context_from="")
+        assert job.get("context_from") is None
+
+    def test_context_from_empty_list_normalized_to_none(self, cron_env):
+        from cron.jobs import create_job
+
+        job = create_job(prompt="Hello", schedule="every 1h", context_from=[])
+        assert job.get("context_from") is None
+
+
+class TestBuildJobPromptContextFrom:
+    """Test that _build_job_prompt() injects context from referenced jobs."""
+
+    def test_injects_latest_output(self, cron_env):
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+
+        # Записываем output для job_a
+        output_dir = OUTPUT_DIR / job_a["id"]
+        output_dir.mkdir(parents=True, exist_ok=True)
+        (output_dir / "2026-04-22_10-00-00.md").write_text(
+            "Today's top story: AI is everywhere.", encoding="utf-8"
+        )
+
+        job_b = create_job(
+            prompt="Summarize the news",
+            schedule="every 2h",
+            context_from=job_a["id"],
+        )
+
+        prompt = _build_job_prompt(job_b)
+        assert "Today's top story: AI is everywhere." in prompt
+        assert f"Output from job '{job_a['id']}'" in prompt
+
+    def test_uses_most_recent_output(self, cron_env):
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+        import time
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        output_dir = OUTPUT_DIR / job_a["id"]
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        old_file = output_dir / "2026-04-22_08-00-00.md"
+        old_file.write_text("Old output", encoding="utf-8")
+        time.sleep(0.01)
+        new_file = output_dir / "2026-04-22_10-00-00.md"
+        new_file.write_text("New output", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
+        )
+        prompt = _build_job_prompt(job_b)
+        assert "New output" in prompt
+        assert "Old output" not in prompt
+
+    def test_graceful_when_no_output_yet(self, cron_env):
+        from cron.jobs import create_job
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
+        )
+
+        # job_a never ran — output dir does not exist
+        # expect silent skip: no placeholder injected, base prompt intact
+        prompt = _build_job_prompt(job_b)
+        assert "no output" not in prompt.lower()
+        assert "not found" not in prompt.lower()
+        assert "Summarize" in prompt
+
+    def test_injects_multiple_context_jobs(self, cron_env):
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(prompt="Find weather", schedule="every 1h")
+
+        for job, content in [(job_a, "News: AI boom"), (job_b, "Weather: Sunny")]:
+            out_dir = OUTPUT_DIR / job["id"]
+            out_dir.mkdir(parents=True, exist_ok=True)
+            (out_dir / "2026-04-22_10-00-00.md").write_text(content, encoding="utf-8")
+
+        job_c = create_job(
+            prompt="Daily briefing",
+            schedule="every 2h",
+            context_from=[job_a["id"], job_b["id"]],
+        )
+        prompt = _build_job_prompt(job_c)
+        assert "News: AI boom" in prompt
+        assert "Weather: Sunny" in prompt
+
+    def test_context_injected_before_prompt(self, cron_env):
+        """Context should appear before the job's own prompt."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "2026-04-22_10-00-00.md").write_text("Context data", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process the data above",
+            schedule="every 2h",
+            context_from=job_a["id"],
+        )
+        prompt = _build_job_prompt(job_b)
+        context_pos = prompt.find("Context data")
+        prompt_pos = prompt.find("Process the data above")
+        assert context_pos < prompt_pos
+
+    def test_output_truncated_at_8k_chars(self, cron_env):
+        """Output longer than 8000 chars should be truncated."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        big_output = "x" * 10000
+        (out_dir / "2026-04-22_10-00-00.md").write_text(big_output, encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process", schedule="every 2h", context_from=job_a["id"]
+        )
+        prompt = _build_job_prompt(job_b)
+        assert "truncated" in prompt
+        assert "x" * 10000 not in prompt
+
+    def test_graceful_when_file_deleted_between_listing_and_reading(self, cron_env):
+        """Job should not crash if output file is deleted mid-read."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+        from unittest.mock import patch
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process", schedule="every 2h", context_from=job_a["id"]
+        )
+
+        # Simulate file deleted between glob() and read_text()
+        original_read = Path.read_text
+        def mock_read_text(self, *args, **kwargs):
+            if self.suffix == ".md":
+                raise FileNotFoundError("file deleted mid-read")
+            return original_read(self, *args, **kwargs)
+
+        with patch.object(Path, "read_text", mock_read_text):
+            prompt = _build_job_prompt(job_b)
+
+        # Job should not crash, prompt should still contain the base prompt
+        assert "Process" in prompt
+
+    def test_graceful_when_permission_error(self, cron_env):
+        """Job should not crash if output directory is not readable."""
+        from cron.jobs import create_job, OUTPUT_DIR
+        from cron.scheduler import _build_job_prompt
+        from unittest.mock import patch
+
+        job_a = create_job(prompt="Find data", schedule="every 1h")
+        out_dir = OUTPUT_DIR / job_a["id"]
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
+
+        job_b = create_job(
+            prompt="Process", schedule="every 2h", context_from=job_a["id"]
+        )
+
+        # Simulate permission error on read
+        original_read = Path.read_text
+        def mock_read_text(self, *args, **kwargs):
+            if self.suffix == ".md":
+                raise PermissionError("permission denied")
+            return original_read(self, *args, **kwargs)
+
+        with patch.object(Path, "read_text", mock_read_text):
+            prompt = _build_job_prompt(job_b)
+
+        # Job should not crash, prompt should still contain the base prompt
+        assert "Process" in prompt
+
+    def test_invalid_job_id_skipped(self, cron_env):
+        """context_from with path traversal job_id should be skipped."""
+        from cron.jobs import create_job
+        from cron.scheduler import _build_job_prompt
+
+        job = create_job(prompt="Process", schedule="every 2h")
+        # Manually inject invalid context_from (simulating tampered jobs.json)
+        job["context_from"] = ["../../../etc/passwd"]
+        prompt = _build_job_prompt(job)
+        # Should not crash and should not inject anything malicious
+        assert "Process" in prompt
+        assert "etc/passwd" not in prompt
+
+
+
+class TestUpdateContextFrom:
+    """Verify the cronjob tool's `update` action wires context_from through.
+
+    Without this, the create-path stores the field but users can never modify
+    or clear it via the tool (schema promises "pass an empty array to clear").
+    """
+
+    def test_update_adds_context_from_to_existing_job(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(prompt="Summarize", schedule="every 2h")
+        assert job_b.get("context_from") is None
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=job_a["id"],
+        ))
+        assert result["success"] is True
+
+        reloaded = get_job(job_b["id"])
+        assert reloaded["context_from"] == [job_a["id"]]
+
+    def test_update_changes_context_from_reference(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_a2 = create_job(prompt="Find weather", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+        assert job_b["context_from"] == [job_a["id"]]
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=[job_a2["id"]],
+        ))
+        assert result["success"] is True
+        assert get_job(job_b["id"])["context_from"] == [job_a2["id"]]
+
+    def test_update_clears_context_from_with_empty_list(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+        assert get_job(job_b["id"])["context_from"] == [job_a["id"]]
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=[],
+        ))
+        assert result["success"] is True
+        assert get_job(job_b["id"])["context_from"] is None
+
+    def test_update_clears_context_from_with_empty_string(self, cron_env):
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from="",
+        ))
+        assert result["success"] is True
+        assert get_job(job_b["id"])["context_from"] is None
+
+    def test_update_rejects_unknown_job_reference(self, cron_env):
+        from cron.jobs import create_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_b = create_job(prompt="Summarize", schedule="every 2h")
+
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            context_from=["deadbeef0000"],
+        ))
+        assert result["success"] is False
+        assert "not found" in result["error"]
+
+    def test_update_preserves_context_from_when_not_passed(self, cron_env):
+        """Updating other fields must not clobber context_from."""
+        from cron.jobs import create_job, get_job
+        from tools.cronjob_tools import cronjob
+        import json
+
+        job_a = create_job(prompt="Find news", schedule="every 1h")
+        job_b = create_job(
+            prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
+        )
+
+        # Update an unrelated field
+        result = json.loads(cronjob(
+            action="update",
+            job_id=job_b["id"],
+            prompt="Summarize v2",
+        ))
+        assert result["success"] is True
+        reloaded = get_job(job_b["id"])
+        assert reloaded["prompt"] == "Summarize v2"
+        assert reloaded["context_from"] == [job_a["id"]]
@@ -1,249 +0,0 @@
-"""Tests for proactive memory flush on session expiry.
-
-Verifies that:
-1. _is_session_expired() works from a SessionEntry alone (no source needed)
-2. The sync callback is no longer called in get_or_create_session
-3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
-4. The background watcher can detect expired sessions
-"""
-
-import pytest
-from datetime import datetime, timedelta
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-from gateway.config import Platform, GatewayConfig, SessionResetPolicy
-from gateway.session import SessionSource, SessionStore, SessionEntry
-
-
-@pytest.fixture()
-def idle_store(tmp_path):
-    """SessionStore with a 60-minute idle reset policy."""
-    config = GatewayConfig(
-        default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60),
-    )
-    with patch("gateway.session.SessionStore._ensure_loaded"):
-        s = SessionStore(sessions_dir=tmp_path, config=config)
-    s._db = None
-    s._loaded = True
-    return s
-
-
-@pytest.fixture()
-def no_reset_store(tmp_path):
-    """SessionStore with no reset policy (mode=none)."""
-    config = GatewayConfig(
-        default_reset_policy=SessionResetPolicy(mode="none"),
-    )
-    with patch("gateway.session.SessionStore._ensure_loaded"):
-        s = SessionStore(sessions_dir=tmp_path, config=config)
-    s._db = None
-    s._loaded = True
-    return s
-
-
-class TestIsSessionExpired:
-    """_is_session_expired should detect expiry from entry alone."""
-
-    def test_idle_session_expired(self, idle_store):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_1",
-            created_at=datetime.now() - timedelta(hours=3),
-            updated_at=datetime.now() - timedelta(minutes=120),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert idle_store._is_session_expired(entry) is True
-
-    def test_active_session_not_expired(self, idle_store):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_2",
-            created_at=datetime.now() - timedelta(hours=1),
-            updated_at=datetime.now() - timedelta(minutes=10),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert idle_store._is_session_expired(entry) is False
-
-    def test_none_mode_never_expires(self, no_reset_store):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_3",
-            created_at=datetime.now() - timedelta(days=30),
-            updated_at=datetime.now() - timedelta(days=30),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert no_reset_store._is_session_expired(entry) is False
-
-    def test_active_processes_prevent_expiry(self, idle_store):
-        """Sessions with active background processes should never expire."""
-        idle_store._has_active_processes_fn = lambda key: True
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_4",
-            created_at=datetime.now() - timedelta(hours=5),
-            updated_at=datetime.now() - timedelta(hours=5),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert idle_store._is_session_expired(entry) is False
-
-    def test_daily_mode_expired(self, tmp_path):
-        """Daily mode should expire sessions from before today's reset hour."""
-        config = GatewayConfig(
-            default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4),
-        )
-        with patch("gateway.session.SessionStore._ensure_loaded"):
-            store = SessionStore(sessions_dir=tmp_path, config=config)
-        store._db = None
-        store._loaded = True
-
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm",
-            session_id="sid_5",
-            created_at=datetime.now() - timedelta(days=2),
-            updated_at=datetime.now() - timedelta(days=2),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert store._is_session_expired(entry) is True
-
-
-class TestGetOrCreateSessionNoCallback:
-    """get_or_create_session should NOT call a sync flush callback."""
-
-    def test_auto_reset_creates_new_session_after_flush(self, idle_store):
-        """When a flushed session auto-resets, a new session_id is created."""
-        source = SessionSource(
-            platform=Platform.TELEGRAM,
-            chat_id="123",
-            chat_type="dm",
-        )
-        # Create initial session
-        entry1 = idle_store.get_or_create_session(source)
-        old_sid = entry1.session_id
-
-        # Simulate the watcher having flushed it
-        entry1.memory_flushed = True
-
-        # Simulate the session going idle
-        entry1.updated_at = datetime.now() - timedelta(minutes=120)
-        idle_store._save()
-
-        # Next call should auto-reset
-        entry2 = idle_store.get_or_create_session(source)
-        assert entry2.session_id != old_sid
-        assert entry2.was_auto_reset is True
-        # New session starts with memory_flushed=False
-        assert entry2.memory_flushed is False
-
-    def test_no_sync_callback_invoked(self, idle_store):
-        """No synchronous callback should block during auto-reset."""
-        source = SessionSource(
-            platform=Platform.TELEGRAM,
-            chat_id="123",
-            chat_type="dm",
-        )
-        entry1 = idle_store.get_or_create_session(source)
-        entry1.updated_at = datetime.now() - timedelta(minutes=120)
-        idle_store._save()
-
-        # Verify no _on_auto_reset attribute
-        assert not hasattr(idle_store, '_on_auto_reset')
-
-        # This should NOT block (no sync LLM call)
-        entry2 = idle_store.get_or_create_session(source)
-        assert entry2.was_auto_reset is True
-
-
-class TestMemoryFlushedFlag:
-    """The memory_flushed flag on SessionEntry prevents double-flushing."""
-
-    def test_defaults_to_false(self):
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm:123",
-            session_id="sid_new",
-            created_at=datetime.now(),
-            updated_at=datetime.now(),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        assert entry.memory_flushed is False
-
-    def test_persists_through_save_load(self, idle_store):
-        """memory_flushed=True must survive a save/load cycle (simulates restart)."""
-        key = "agent:main:discord:thread:789"
-        entry = SessionEntry(
-            session_key=key,
-            session_id="sid_flushed",
-            created_at=datetime.now() - timedelta(hours=5),
-            updated_at=datetime.now() - timedelta(hours=5),
-            platform=Platform.DISCORD,
-            chat_type="thread",
-            memory_flushed=True,
-        )
-        idle_store._entries[key] = entry
-        idle_store._save()
-
-        # Simulate restart: clear in-memory state, reload from disk
-        idle_store._entries.clear()
-        idle_store._loaded = False
-        idle_store._ensure_loaded()
-
-        reloaded = idle_store._entries[key]
-        assert reloaded.memory_flushed is True
-
-    def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
-        """An entry without memory_flushed stays False after reload."""
-        key = "agent:main:telegram:dm:456"
-        entry = SessionEntry(
-            session_key=key,
-            session_id="sid_not_flushed",
-            created_at=datetime.now() - timedelta(hours=2),
-            updated_at=datetime.now() - timedelta(hours=2),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-        )
-        idle_store._entries[key] = entry
-        idle_store._save()
-
-        idle_store._entries.clear()
-        idle_store._loaded = False
-        idle_store._ensure_loaded()
-
-        reloaded = idle_store._entries[key]
-        assert reloaded.memory_flushed is False
-
-    def test_roundtrip_to_dict_from_dict(self):
-        """to_dict/from_dict must preserve memory_flushed."""
-        entry = SessionEntry(
-            session_key="agent:main:telegram:dm:999",
-            session_id="sid_rt",
-            created_at=datetime.now(),
-            updated_at=datetime.now(),
-            platform=Platform.TELEGRAM,
-            chat_type="dm",
-            memory_flushed=True,
-        )
-        d = entry.to_dict()
-        assert d["memory_flushed"] is True
-
-        restored = SessionEntry.from_dict(d)
-        assert restored.memory_flushed is True
-
-    def test_legacy_entry_without_field_defaults_false(self):
-        """Old sessions.json entries missing memory_flushed should default to False."""
-        data = {
-            "session_key": "agent:main:telegram:dm:legacy",
-            "session_id": "sid_legacy",
-            "created_at": datetime.now().isoformat(),
-            "updated_at": datetime.now().isoformat(),
-            "platform": "telegram",
-            "chat_type": "dm",
-            # no memory_flushed key
-        }
-        entry = SessionEntry.from_dict(data)
-        assert entry.memory_flushed is False
@@ -1,240 +0,0 @@
-"""Tests for memory flush stale-overwrite prevention (#2670).
-
-Verifies that:
-1. Cron sessions are skipped (no flush for headless cron runs)
-2. Current memory state is injected into the flush prompt so the
-   flush agent can see what's already saved and avoid overwrites
-3. The flush still works normally when memory files don't exist
-"""
-
-import sys
-import types
-import pytest
-from pathlib import Path
-from unittest.mock import MagicMock, patch, call
-
-
-@pytest.fixture(autouse=True)
-def _mock_dotenv(monkeypatch):
-    """gateway.run imports dotenv at module level; stub it so tests run without the package."""
-    fake = types.ModuleType("dotenv")
-    fake.load_dotenv = lambda *a, **kw: None
-    monkeypatch.setitem(sys.modules, "dotenv", fake)
-
-
-def _make_runner():
-    from gateway.run import GatewayRunner
-
-    runner = object.__new__(GatewayRunner)
-    runner._honcho_managers = {}
-    runner._honcho_configs = {}
-    runner._running_agents = {}
-    runner._pending_messages = {}
-    runner._pending_approvals = {}
-    runner.adapters = {}
-    runner.hooks = MagicMock()
-    runner.session_store = MagicMock()
-    return runner
-
-
-_TRANSCRIPT_4_MSGS = [
-    {"role": "user", "content": "hello"},
-    {"role": "assistant", "content": "hi there"},
-    {"role": "user", "content": "remember my name is Alice"},
-    {"role": "assistant", "content": "Got it, Alice!"},
-]
-
-
-class TestCronSessionBypass:
-    """Cron sessions should never trigger a memory flush."""
-
-    def test_cron_session_skipped(self):
-        runner = _make_runner()
-        runner._flush_memories_for_session("cron_job123_20260323_120000")
-        # session_store.load_transcript should never be called
-        runner.session_store.load_transcript.assert_not_called()
-
-    def test_cron_session_with_prefix_skipped(self):
-        """Cron sessions with different prefixes are still skipped."""
-        runner = _make_runner()
-        runner._flush_memories_for_session("cron_daily_20260323")
-        runner.session_store.load_transcript.assert_not_called()
-
-    def test_non_cron_session_proceeds(self):
-        """Non-cron sessions should still attempt the flush."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = []
-        runner._flush_memories_for_session("session_abc123")
-        runner.session_store.load_transcript.assert_called_once_with("session_abc123")
-
-
-def _make_flush_context(monkeypatch, memory_dir=None):
-    """Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
-    tmp_agent = MagicMock()
-    fake_run_agent = types.ModuleType("run_agent")
-    fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
-    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
-
-    runner = _make_runner()
-    runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-    return runner, tmp_agent, memory_dir
-
-
-class TestMemoryInjection:
-    """The flush prompt should include current memory state from disk."""
-
-    def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
-        """When memory files exist, their content appears in the flush prompt."""
-        memory_dir = tmp_path / "memories"
-        memory_dir.mkdir()
-        (memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
-        (memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
-
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
-        ):
-            runner._flush_memories_for_session("session_123")
-
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-
-        assert "Agent knows Python" in flush_prompt
-        assert "User prefers dark mode" in flush_prompt
-        assert "Name: Alice" in flush_prompt
-        assert "Timezone: PST" in flush_prompt
-        assert "Do NOT overwrite or remove entries" in flush_prompt
-        assert "current live state of memory" in flush_prompt
-
-    def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
-        """When no memory files exist, flush still runs without the guard."""
-        empty_dir = tmp_path / "no_memories"
-        empty_dir.mkdir()
-
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: empty_dir)}),
-        ):
-            runner._flush_memories_for_session("session_456")
-
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "Do NOT overwrite or remove entries" not in flush_prompt
-        assert "Review the conversation above" in flush_prompt
-
-    def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
-        """Empty memory files should not trigger the guard section."""
-        memory_dir = tmp_path / "memories"
-        memory_dir.mkdir()
-        (memory_dir / "MEMORY.md").write_text("")
-        (memory_dir / "USER.md").write_text("  \n  ")  # whitespace only
-
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
-        ):
-            runner._flush_memories_for_session("session_789")
-
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "current live state of memory" not in flush_prompt
-
-
-class TestFlushAgentSilenced:
-    """The flush agent must not produce any terminal output."""
-
-    def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
-        """_print_fn on the flush agent must be a no-op so tool output never leaks."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        captured_agent = {}
-
-        def _fake_ai_agent(*args, **kwargs):
-            agent = MagicMock()
-            captured_agent["instance"] = agent
-            return agent
-
-        fake_run_agent = types.ModuleType("run_agent")
-        fake_run_agent.AIAgent = _fake_ai_agent
-        monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: tmp_path)}),
-        ):
-            runner._flush_memories_for_session("session_silent")
-
-        agent = captured_agent["instance"]
-        assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
-        # Confirm it is callable and produces no output (no exception)
-        agent._print_fn("should be silenced")
-
-    def test_kawaii_spinner_respects_print_fn(self):
-        """KawaiiSpinner must route all output through print_fn when supplied."""
-        from agent.display import KawaiiSpinner
-
-        written = []
-        spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
-        spinner._write("hello")
-        assert written == [("hello",)], "spinner should route through print_fn"
-
-        # A no-op print_fn must produce no output to stdout
-        import io, sys
-        buf = io.StringIO()
-        old_stdout = sys.stdout
-        sys.stdout = buf
-        try:
-            silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
-            silent_spinner._write("should not appear")
-            silent_spinner.stop("done")
-        finally:
-            sys.stdout = old_stdout
-        assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
-
-    def test_flush_agent_closes_resources_after_run(self, monkeypatch):
-        """Memory flush should close temporary agent resources after the turn."""
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-        tmp_agent.shutdown_memory_provider = MagicMock()
-        tmp_agent.close = MagicMock()
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
-        ):
-            runner._flush_memories_for_session("session_cleanup")
-
-        tmp_agent.shutdown_memory_provider.assert_called_once()
-        tmp_agent.close.assert_called_once()
-
-
-class TestFlushPromptStructure:
-    """Verify the flush prompt retains its core instructions."""
-
-    def test_core_instructions_present(self, monkeypatch):
-        """The flush prompt should still contain the original guidance."""
-        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
-        ):
-            runner._flush_memories_for_session("session_struct")
-
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "automatically reset" in flush_prompt
-        assert "Save any important facts" in flush_prompt
-        assert "consider saving it as a skill" in flush_prompt
-        assert "Do NOT respond to the user" in flush_prompt
@@ -197,10 +197,14 @@ def _make_fake_mautrix():
            self.account_id = account_id
            self.pickle_key = pickle_key
            self.db = db
+            self._device_id = ""

        async def open(self):
            pass

+        async def put_device_id(self, device_id):
+            self._device_id = device_id
+
    mautrix_crypto_store_asyncpg.PgCryptoStore = PgCryptoStore

    # --- mautrix.util ---
@@ -4,7 +4,7 @@ Tests the _handle_resume_command handler (switch to a previously-named session)
 across gateway messenger platforms.
 """

-from unittest.mock import MagicMock, AsyncMock
+from unittest.mock import MagicMock

 import pytest

@@ -53,9 +53,6 @@ def _make_runner(session_db=None, current_session_id="current_session_001",
    mock_store.switch_session.return_value = mock_session_entry
    runner.session_store = mock_store

-    # Stub out memory flushing
-    runner._async_flush_memories = AsyncMock()
-
    return runner


@@ -233,28 +230,3 @@ class TestHandleResumeCommand:

        assert real_key not in runner._running_agents
        db.close()
-
-    @pytest.mark.asyncio
-    async def test_resume_flushes_memories(self, tmp_path):
-        """Resume should flush memories from the current session before switching."""
-        from hermes_state import SessionDB
-
-        db = SessionDB(db_path=tmp_path / "state.db")
-        db.create_session("old_session", "telegram")
-        db.set_session_title("old_session", "Old Work")
-        db.create_session("current_session_001", "telegram")
-
-        event = _make_event(text="/resume Old Work")
-        runner = _make_runner(
-            session_db=db,
-            current_session_id="current_session_001",
-            event=event,
-        )
-
-        await runner._handle_resume_command(event)
-
-        runner._async_flush_memories.assert_called_once_with(
-            "current_session_001",
-            "agent:main:telegram:dm:67890",
-        )
-        db.close()
@@ -177,8 +177,8 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
    its reset policy (idle timeout, scheduled reset), it must fire
    ``on_session_finalize`` so plugin providers get the same final-pass
    extraction opportunity they'd get from /new or CLI shutdown.  Before
-    the fix, the expiry path flushed memories and evicted the agent but
-    silently skipped the hook.
+    the fix, the expiry path evicted the agent but silently skipped the
+    hook.
    """
    from datetime import datetime, timedelta

@@ -200,7 +200,7 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
        platform=Platform.TELEGRAM,
        chat_type="dm",
    )
-    expired_entry.memory_flushed = False
+    expired_entry.expiry_finalized = False

    runner.session_store = MagicMock()
    runner.session_store._ensure_loaded = MagicMock()
@@ -211,24 +211,24 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
    runner.session_store._lock.__exit__ = MagicMock(return_value=None)
    runner.session_store._save = MagicMock()

-    runner._async_flush_memories = AsyncMock()
    runner._evict_cached_agent = MagicMock()
    runner._cleanup_agent_resources = MagicMock()
    runner._sweep_idle_cached_agents = MagicMock(return_value=0)

    # The watcher starts with `await asyncio.sleep(60)` and loops while
-    # `self._running`. Patch sleep so the 60s initial delay is instant, then
-    # flip `_running` false inside the flush call so the loop exits cleanly
-    # after one pass.
+    # `self._running`.  Patch sleep so the 60s initial delay is instant, and
+    # make the expiry hook invocation flip `_running` false so the loop
+    # exits cleanly after one pass.
    _orig_sleep = __import__("asyncio").sleep

    async def _fast_sleep(_):
        await _orig_sleep(0)

-    async def _flush_and_stop(session_id, key):
-        runner._running = False  # terminate the loop after this iteration
+    def _hook_and_stop(*a, **kw):
+        runner._running = False
+        return None

-    runner._async_flush_memories = AsyncMock(side_effect=_flush_and_stop)
+    mock_invoke_hook.side_effect = _hook_and_stop

    with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
        await runner._session_expiry_watcher(interval=0)
@@ -1,7 +1,7 @@
 """Regression tests for approval-state cleanup on session boundaries."""

 from datetime import datetime
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import MagicMock

 import pytest

@@ -72,7 +72,6 @@ def _make_resume_runner():
    runner = object.__new__(GatewayRunner)
    runner.adapters = {}
    runner._background_tasks = set()
-    runner._async_flush_memories = AsyncMock()
    runner._running_agents = {}
    runner._running_agents_ts = {}
    runner._busy_ack_ts = {}
@@ -256,6 +256,17 @@ class TestDetectProviderForModel:
        """Models belonging to the current provider should not trigger a switch."""
        assert detect_provider_for_model("gpt-5.3-codex", "openai-codex") is None

+    def test_short_alias_resolves_to_static_model(self):
+        """Short aliases (e.g. sonnet) should resolve without network lookups."""
+        with patch(
+            "hermes_cli.models.fetch_openrouter_models",
+            side_effect=AssertionError("network lookup should not run"),
+        ):
+            result = detect_provider_for_model("sonnet", "auto")
+        assert result is not None
+        assert result[0] == "anthropic"
+        assert result[1].startswith("claude-sonnet")
+
    def test_openrouter_slug_match(self):
        """Models in the OpenRouter catalog should be found."""
        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
@@ -601,3 +601,189 @@ class TestImagegenModelPicker:
            _configure_imagegen_model("fal", config)
        assert isinstance(config["image_gen"], dict)
        assert config["image_gen"]["model"] == "fal-ai/flux-2/klein/9b"
+
+
+def test_save_platform_tools_normalizes_numeric_entries():
+    """YAML may parse bare numeric toolset names as int. They should be
+    normalized to str so they survive the save round-trip.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", 12306, "custom-mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "12306" in saved
+    assert 12306 not in saved
+
+
+def test_save_platform_tools_clears_no_mcp_sentinel():
+    """`hermes tools` has no UI for no_mcp, so saving from the picker clears
+    the sentinel unconditionally — otherwise a user who once set no_mcp by
+    hand could never re-enable MCP servers through the UI.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", "no_mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "no_mcp" not in saved
+
+
+def test_save_platform_tools_preserves_mcp_server_names():
+    """Non-sentinel passthrough entries (MCP server names) must still survive
+    the save — we only clear `no_mcp`, not every non-configurable entry.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", "custom-mcp", "another-mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "custom-mcp" in saved
+    assert "another-mcp" in saved
+
+
+def test_get_platform_tools_recovers_non_configurable_toolsets_from_composite():
+    """Non-configurable toolsets whose tools are in the composite but not in
+    CONFIGURABLE_TOOLSETS should still appear in the result.
+    """
+    from toolsets import TOOLSETS
+    from hermes_cli.tools_config import PLATFORMS
+    from unittest.mock import patch as mock_patch
+
+    fake_toolsets = dict(TOOLSETS)
+    fake_toolsets["_test_platform_tool"] = {
+        "description": "test",
+        "tools": ["_test_special_tool"],
+        "includes": [],
+    }
+    fake_toolsets["hermes-_test_platform"] = {
+        "description": "test composite",
+        "tools": ["web_search", "web_extract", "terminal", "process", "_test_special_tool"],
+        "includes": [],
+    }
+
+    test_platforms = {
+        "_test_platform": {"label": "Test", "default_toolset": "hermes-_test_platform"},
+    }
+
+    with mock_patch("hermes_cli.tools_config.PLATFORMS", {**PLATFORMS, **test_platforms}):
+        with mock_patch("toolsets.TOOLSETS", fake_toolsets):
+            enabled = _get_platform_tools({}, "_test_platform")
+
+    assert "_test_platform_tool" in enabled
+    assert "web" in enabled
+    assert "terminal" in enabled
+
+
+def test_get_platform_tools_second_pass_skips_fully_claimed_toolsets():
+    """Toolsets whose tools are fully covered by configurable keys should NOT
+    be added by the second pass (prevents 'search', 'hermes-acp' noise).
+    """
+    enabled = _get_platform_tools({}, "cli")
+
+    assert "search" not in enabled
+
+
+def test_get_platform_tools_discord_both_off_by_default():
+    """Both `discord` and `discord_admin` are opt-in via `hermes tools`,
+    even on the Discord platform itself.  Users shouldn't auto-inherit 19
+    extra tools just because DISCORD_BOT_TOKEN is set."""
+    enabled = _get_platform_tools({}, "discord")
+    assert "discord" not in enabled
+    assert "discord_admin" not in enabled
+
+
+def test_discord_toolsets_in_configurable_toolsets():
+    keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    assert "discord" in keys
+    assert "discord_admin" in keys
+
+
+def test_discord_toolsets_in_default_off():
+    assert "discord" in _DEFAULT_OFF_TOOLSETS
+    assert "discord_admin" in _DEFAULT_OFF_TOOLSETS
+
+
+def test_discord_toolsets_not_available_on_other_platforms():
+    """Platform-scoping: discord / discord_admin should not appear on CLI,
+    Telegram, etc. — not even as an opt-in."""
+    from hermes_cli.tools_config import _toolset_allowed_for_platform
+    for plat in ["cli", "telegram", "slack", "whatsapp", "signal"]:
+        assert not _toolset_allowed_for_platform("discord", plat), (
+            f"`discord` toolset leaked onto {plat}"
+        )
+        assert not _toolset_allowed_for_platform("discord_admin", plat), (
+            f"`discord_admin` toolset leaked onto {plat}"
+        )
+    assert _toolset_allowed_for_platform("discord", "discord")
+    assert _toolset_allowed_for_platform("discord_admin", "discord")
+
+
+def test_discord_toolsets_user_enabled_are_honored():
+    """When the user opts in via `hermes tools`, the toolset appears."""
+    config = {"platform_toolsets": {"discord": ["web", "terminal", "discord"]}}
+    enabled = _get_platform_tools(config, "discord")
+    assert "discord" in enabled
+    assert "discord_admin" not in enabled
+
+
+def test_save_platform_tools_strips_restricted_toolsets():
+    """Hand-edited or all-platforms checklist with `discord` selected for
+    Telegram must be stripped at save time."""
+    from hermes_cli.tools_config import _save_platform_tools
+    config = {}
+    _save_platform_tools(config, "telegram", {"web", "terminal", "discord", "discord_admin"})
+    saved = config["platform_toolsets"]["telegram"]
+    assert "discord" not in saved
+    assert "discord_admin" not in saved
+    assert "web" in saved
+    assert "terminal" in saved
+
+
+def test_get_platform_tools_feishu_includes_doc_and_drive():
+    enabled = _get_platform_tools({}, "feishu")
+    assert "feishu_doc" in enabled
+    assert "feishu_drive" in enabled
+
+
+def test_get_platform_tools_feishu_tools_not_on_other_platforms():
+    for plat in ["cli", "telegram", "discord"]:
+        enabled = _get_platform_tools({}, plat)
+        assert "feishu_doc" not in enabled, f"feishu_doc leaked onto {plat}"
+        assert "feishu_drive" not in enabled, f"feishu_drive leaked onto {plat}"
+
+
+def test_get_effective_configurable_toolsets_dedupes_bundled_plugins():
+    """Bundled plugins (plugins/spotify) share their toolset key with the
+    built-in CONFIGURABLE_TOOLSETS entry. The effective list must not list
+    them twice — otherwise `hermes tools` → "reconfigure existing" shows
+    the same toolset two rows in a row.
+    """
+    from hermes_cli.tools_config import _get_effective_configurable_toolsets
+
+    all_ts = _get_effective_configurable_toolsets()
+    keys = [ts_key for ts_key, _, _ in all_ts]
+    assert len(keys) == len(set(keys)), (
+        f"duplicate toolset keys in effective list: "
+        f"{[k for k in keys if keys.count(k) > 1]}"
+    )
+    # Spotify specifically — the bug that motivated the dedupe.
+    spotify_rows = [t for t in all_ts if t[0] == "spotify"]
+    assert len(spotify_rows) == 1, spotify_rows
+    # Built-in label wins over the plugin label.
+    assert spotify_rows[0][1] == "🎵 Spotify"
@@ -19,6 +19,18 @@ def _touch_ink(root: Path) -> None:
    ink.write_text("{}")


+def _touch_tui_entry(root: Path) -> None:
+    entry = root / "dist" / "entry.js"
+    entry.parent.mkdir(parents=True, exist_ok=True)
+    entry.write_text("console.log('tui')")
+
+
+def _touch_ink_bundle(root: Path) -> None:
+    bundle = root / "packages" / "hermes-ink" / "dist" / "ink-bundle.js"
+    bundle.parent.mkdir(parents=True, exist_ok=True)
+    bundle.write_text("export {}")
+
+
 def test_need_install_when_ink_missing(tmp_path: Path, main_mod) -> None:
    (tmp_path / "package-lock.json").write_text("{}")
    assert main_mod._tui_need_npm_install(tmp_path) is True
@@ -51,3 +63,19 @@ def test_need_install_when_marker_missing(tmp_path: Path, main_mod) -> None:
 def test_no_install_without_lockfile_when_ink_present(tmp_path: Path, main_mod) -> None:
    _touch_ink(tmp_path)
    assert main_mod._tui_need_npm_install(tmp_path) is False
+
+
+def test_build_needed_when_local_ink_bundle_missing(tmp_path: Path, main_mod) -> None:
+    _touch_tui_entry(tmp_path)
+    _touch_ink(tmp_path)
+
+    assert main_mod._tui_need_npm_install(tmp_path) is False
+    assert main_mod._tui_build_needed(tmp_path) is True
+
+
+def test_build_not_needed_when_entry_and_ink_bundle_present(tmp_path: Path, main_mod) -> None:
+    _touch_tui_entry(tmp_path)
+    _touch_ink(tmp_path)
+    _touch_ink_bundle(tmp_path)
+
+    assert main_mod._tui_build_needed(tmp_path) is False
@@ -1,4 +1,5 @@
 from argparse import Namespace
+from pathlib import Path
 import sys
 import types

@@ -8,8 +9,11 @@ import pytest
 def _args(**overrides):
    base = {
        "continue_last": None,
+        "model": None,
+        "provider": None,
        "resume": None,
        "tui": True,
+        "tui_dev": False,
    }
    base.update(overrides)
    return Namespace(**base)
@@ -31,7 +35,7 @@ def test_cmd_chat_tui_continue_uses_latest_tui_session(monkeypatch, main_mod):
        calls.append(source)
        return "20260408_235959_a1b2c3" if source == "tui" else None

-    def fake_launch(resume_session_id=None, tui_dev=False):
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@@ -58,7 +62,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
            return "20260408_235959_d4e5f6"
        return None

-    def fake_launch(resume_session_id=None, tui_dev=False):
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@@ -76,7 +80,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
 def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod):
    captured = {}

-    def fake_launch(resume_session_id=None, tui_dev=False):
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
        captured["resume"] = resume_session_id
        raise SystemExit(0)

@@ -89,6 +93,60 @@ def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod)
    assert captured["resume"] == "20260409_000000_aa11bb"


+def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
+    captured = {}
+
+    def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
+        captured.update(
+            {
+                "model": model,
+                "provider": provider,
+                "resume": resume_session_id,
+                "tui_dev": tui_dev,
+            }
+        )
+        raise SystemExit(0)
+
+    monkeypatch.setattr(main_mod, "_launch_tui", fake_launch)
+
+    with pytest.raises(SystemExit):
+        main_mod.cmd_chat(
+            _args(model="anthropic/claude-sonnet-4.6", provider="anthropic")
+        )
+
+    assert captured == {
+        "model": "anthropic/claude-sonnet-4.6",
+        "provider": "anthropic",
+        "resume": None,
+        "tui_dev": False,
+    }
+
+
+def test_launch_tui_exports_model_and_provider(monkeypatch, main_mod):
+    captured = {}
+
+    monkeypatch.setattr(
+        main_mod,
+        "_make_tui_argv",
+        lambda tui_dir, tui_dev: (["node", "dist/entry.js"], Path(".")),
+    )
+
+    def fake_call(argv, cwd=None, env=None):
+        captured.update({"argv": argv, "cwd": cwd, "env": env})
+        return 1
+
+    monkeypatch.setattr(main_mod.subprocess, "call", fake_call)
+
+    with pytest.raises(SystemExit):
+        main_mod._launch_tui(model="nous/hermes-test", provider="nous")
+
+    env = captured["env"]
+    assert env["HERMES_MODEL"] == "nous/hermes-test"
+    assert env["HERMES_INFERENCE_MODEL"] == "nous/hermes-test"
+    assert env["HERMES_TUI_PROVIDER"] == "nous"
+    assert env["HERMES_INFERENCE_PROVIDER"] == "nous"
+
+
 def test_print_tui_exit_summary_includes_resume_and_token_totals(monkeypatch, capsys):
    import hermes_cli.main as main_mod

@@ -31,7 +31,6 @@ def _make_agent_with_engine(engine):
    agent._vprint = lambda *a, **kw: None
    agent._last_flushed_db_idx = 0
    # Stub the few AIAgent methods _compress_context uses.
-    agent.flush_memories = lambda *a, **kw: None
    agent._invalidate_system_prompt = lambda *a, **kw: None
    agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
    agent.commit_memory_session = lambda *a, **kw: None
@@ -41,6 +41,7 @@ def _make_agent(
    agent.tool_progress_callback = None
    agent._compression_warning = None
    agent._aux_compression_context_length_config = None
+    agent.tools = []

    compressor = MagicMock(spec=ContextCompressor)
    compressor.context_length = main_context
@@ -82,7 +83,7 @@ def test_auto_corrects_threshold_when_aux_context_below_threshold(mock_get_clien
    assert "threshold:" in messages[0]
    # Warning stored for gateway replay
    assert agent._compression_warning is not None
-    # Threshold on the live compressor was actually lowered
+    # Threshold on the live compressor was actually lowered to aux_context.
    assert agent.context_compressor.threshold_tokens == 80_000


@@ -180,6 +181,7 @@ def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ct
        base_url="http://custom-endpoint:8080/v1",
        api_key="sk-custom",
        config_context_length=1_000_000,
+        provider="openrouter",
    )


@@ -202,6 +204,7 @@ def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_
        base_url="http://custom:8080/v1",
        api_key="sk-test",
        config_context_length=None,
+        provider="openrouter",
    )


@@ -254,6 +257,7 @@ def test_init_feasibility_check_uses_aux_context_override_from_config():
        base_url="http://custom-endpoint:8080/v1",
        api_key="sk-custom",
        config_context_length=1_000_000,
+        provider="",
    )


@@ -1,329 +0,0 @@
-"""Tests for flush_memories() working correctly across all provider modes.
-
-Catches the bug where Codex mode called chat.completions.create on a
-Responses-only client, which would fail silently or with a 404.
-"""
-
-import json
-import os
-import sys
-import types
-from types import SimpleNamespace
-from unittest.mock import patch, MagicMock, call
-
-import pytest
-
-sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
-sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
-sys.modules.setdefault("fal_client", types.SimpleNamespace())
-
-import run_agent
-
-
-class _FakeOpenAI:
-    def __init__(self, **kwargs):
-        self.kwargs = kwargs
-        self.api_key = kwargs.get("api_key", "test")
-        self.base_url = kwargs.get("base_url", "http://test")
-
-    def close(self):
-        pass
-
-
-def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
-    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
-    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
-        {
-            "type": "function",
-            "function": {
-                "name": "memory",
-                "description": "Manage memories.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "action": {"type": "string"},
-                        "target": {"type": "string"},
-                        "content": {"type": "string"},
-                    },
-                },
-            },
-        },
-    ])
-    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
-    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
-
-    agent = run_agent.AIAgent(
-        api_key="test-key",
-        base_url="https://test.example.com/v1",
-        provider=provider,
-        api_mode=api_mode,
-        max_iterations=4,
-        quiet_mode=True,
-        skip_context_files=True,
-        skip_memory=True,
-    )
-    # Give it a valid memory store
-    agent._memory_store = MagicMock()
-    agent._memory_flush_min_turns = 1
-    agent._user_turn_count = 5
-    return agent
-
-
-def _chat_response_with_memory_call():
-    """Simulated chat completions response with a memory tool call."""
-    return SimpleNamespace(
-        choices=[SimpleNamespace(
-            finish_reason="tool_calls",
-            message=SimpleNamespace(
-                content=None,
-                tool_calls=[SimpleNamespace(
-                    id="call_mem_0",
-                    type="function",
-                    function=SimpleNamespace(
-                        name="memory",
-                        arguments=json.dumps({
-                            "action": "add",
-                            "target": "notes",
-                            "content": "User prefers dark mode.",
-                        }),
-                    ),
-                )],
-            ),
-        )],
-        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
-    )
-
-
-class TestFlushMemoriesRespectsConfigTimeout:
-    """flush_memories() must NOT hardcode timeout=30.0 — it should defer
-    to the config value via auxiliary.flush_memories.timeout."""
-
-    def test_auxiliary_path_omits_explicit_timeout(self, monkeypatch):
-        """When calling _call_llm, timeout should NOT be passed so that
-        _get_task_timeout('flush_memories') reads from config."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Note this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        mock_call.assert_called_once()
-        call_kwargs = mock_call.call_args
-        # timeout must NOT be explicitly passed (so _get_task_timeout resolves it)
-        assert "timeout" not in call_kwargs.kwargs, (
-            "flush_memories should not pass explicit timeout to _call_llm; "
-            "let _get_task_timeout('flush_memories') resolve from config"
-        )
-
-    def test_fallback_path_uses_config_timeout(self, monkeypatch):
-        """When auxiliary client is unavailable and we fall back to direct
-        OpenAI client, timeout should come from _get_task_timeout, not hardcoded."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent.client = MagicMock()
-        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
-
-        custom_timeout = 180.0
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
-             patch("agent.auxiliary_client._get_task_timeout", return_value=custom_timeout) as mock_gtt, \
-             patch("tools.memory_tool.memory_tool", return_value="Saved."):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Save this"},
-            ]
-            agent.flush_memories(messages)
-
-        mock_gtt.assert_called_once_with("flush_memories")
-        agent.client.chat.completions.create.assert_called_once()
-        call_kwargs = agent.client.chat.completions.create.call_args
-        assert call_kwargs.kwargs.get("timeout") == custom_timeout, (
-            f"Expected timeout={custom_timeout} from config, got {call_kwargs.kwargs.get('timeout')}"
-        )
-
-
-class TestFlushMemoriesUsesAuxiliaryClient:
-    """When an auxiliary client is available, flush_memories should use it
-    instead of self.client -- especially critical in Codex mode."""
-
-    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
-        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi there"},
-                {"role": "user", "content": "Remember this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
-                agent.flush_memories(messages)
-
-        mock_call.assert_called_once()
-        call_kwargs = mock_call.call_args
-        assert call_kwargs.kwargs.get("task") == "flush_memories"
-
-    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
-        """Non-Codex mode with no auxiliary falls back to self.client."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent.client = MagicMock()
-        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi there"},
-                {"role": "user", "content": "Save this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        agent.client.chat.completions.create.assert_called_once()
-
-    def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
-        """Provider/API failures from auxiliary flush must be visible.
-
-        Exhausted keys and rate limits are not always RuntimeError. They used
-        to fall into the broad outer handler and disappear into debug logs.
-        """
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent.client = MagicMock()
-        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
-        events = []
-        agent.status_callback = lambda kind, text=None: events.append((kind, text))
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
-             patch("tools.memory_tool.memory_tool", return_value="Saved."):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi there"},
-                {"role": "user", "content": "Save this"},
-            ]
-            agent.flush_memories(messages)
-
-        agent.client.chat.completions.create.assert_called_once()
-        assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
-
-    def test_flush_executes_memory_tool_calls(self, monkeypatch):
-        """Verify that memory tool calls from the flush response actually get executed."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Note this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
-                agent.flush_memories(messages)
-
-        mock_memory.assert_called_once()
-        call_kwargs = mock_memory.call_args
-        assert call_kwargs.kwargs["action"] == "add"
-        assert call_kwargs.kwargs["target"] == "notes"
-        assert "dark mode" in call_kwargs.kwargs["content"]
-
-    def test_flush_bridges_memory_write_metadata(self, monkeypatch):
-        """Flush memory writes notify external providers with flush provenance."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-        agent._memory_manager = MagicMock()
-        agent.session_id = "sess-flush"
-        agent.platform = "cli"
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Note this"},
-            ]
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        agent._memory_manager.on_memory_write.assert_called_once()
-        call_kwargs = agent._memory_manager.on_memory_write.call_args
-        assert call_kwargs.args[:3] == ("add", "notes", "User prefers dark mode.")
-        assert call_kwargs.kwargs["metadata"]["write_origin"] == "memory_flush"
-        assert call_kwargs.kwargs["metadata"]["execution_context"] == "flush_memories"
-        assert call_kwargs.kwargs["metadata"]["session_id"] == "sess-flush"
-
-    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
-        """After flush, the flush prompt and any response should be removed from messages."""
-        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
-
-        mock_response = _chat_response_with_memory_call()
-
-        with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Remember X"},
-            ]
-            original_len = len(messages)
-            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
-                agent.flush_memories(messages)
-
-        # Messages should not grow from the flush
-        assert len(messages) <= original_len
-        # No flush sentinel should remain
-        for msg in messages:
-            assert "_flush_sentinel" not in msg
-
-
-class TestFlushMemoriesCodexFallback:
-    """When no auxiliary client exists and we're in Codex mode, flush should
-    use the Codex Responses API path instead of chat.completions."""
-
-    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
-        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
-
-        codex_response = SimpleNamespace(
-            output=[
-                SimpleNamespace(
-                    type="function_call",
-                    call_id="call_1",
-                    name="memory",
-                    arguments=json.dumps({
-                        "action": "add",
-                        "target": "notes",
-                        "content": "Codex flush test",
-                    }),
-                ),
-            ],
-            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
-            status="completed",
-            model="gpt-5-codex",
-        )
-
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
-             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
-             patch.object(agent, "_build_api_kwargs") as mock_build, \
-             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
-            mock_build.return_value = {
-                "model": "gpt-5-codex",
-                "instructions": "test",
-                "input": [],
-                "tools": [],
-                "max_output_tokens": 4096,
-            }
-            messages = [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "Save this"},
-            ]
-            agent.flush_memories(messages)
-
-        mock_stream.assert_called_once()
-        mock_memory.assert_called_once()
-        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
@@ -12,7 +12,7 @@ from types import SimpleNamespace
 from unittest.mock import patch, MagicMock

 import pytest
-from agent.codex_responses_adapter import _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items
+from agent.codex_responses_adapter import _chat_content_to_responses_parts, _chat_messages_to_responses_input, _normalize_codex_response, _preflight_codex_input_items

 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@@ -520,6 +520,111 @@ class TestChatMessagesToResponsesInput:
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 0

+    def test_user_multimodal_content_uses_input_text(self, monkeypatch):
+        """User messages with list content must use input_text type."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": [
+            {"type": "text", "text": "find files"},
+        ]}]
+        items = _chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "user"
+        content = items[0]["content"]
+        assert isinstance(content, list)
+        assert content[0]["type"] == "input_text"
+        assert content[0]["text"] == "find files"
+
+    def test_assistant_multimodal_content_uses_output_text(self, monkeypatch):
+        """Assistant messages with list content must use output_text type.
+
+        This is the fix for #15687 — the Responses API rejects input_text
+        inside assistant messages.
+        """
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "assistant", "content": [
+            {"type": "text", "text": "I found the files."},
+        ]}]
+        items = _chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "assistant"
+        content = items[0]["content"]
+        assert isinstance(content, list)
+        assert content[0]["type"] == "output_text"
+        assert content[0]["text"] == "I found the files."
+
+    def test_preflight_preserves_assistant_output_text(self, monkeypatch):
+        """_preflight_codex_input_items must preserve output_text for assistant."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        raw_input = [
+            {"role": "user", "content": [{"type": "input_text", "text": "hi"}]},
+            {"role": "assistant", "content": [{"type": "output_text", "text": "hello"}]},
+        ]
+        normalized = _preflight_codex_input_items(raw_input)
+        user_content = normalized[0]["content"]
+        asst_content = normalized[1]["content"]
+        assert user_content[0]["type"] == "input_text"
+        assert asst_content[0]["type"] == "output_text"
+
+    def test_full_round_trip_with_list_content(self, monkeypatch):
+        """End-to-end: user + assistant with list content through both stages."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "user", "content": [{"type": "text", "text": "hello"}]},
+            {"role": "assistant", "content": [{"type": "text", "text": "hi there"}]},
+            {"role": "user", "content": [{"type": "text", "text": "continue"}]},
+        ]
+        items = _chat_messages_to_responses_input(messages)
+        normalized = _preflight_codex_input_items(items)
+
+        # User items use input_text
+        assert normalized[0]["content"][0]["type"] == "input_text"
+        assert normalized[2]["content"][0]["type"] == "input_text"
+        # Assistant item uses output_text
+        assert normalized[1]["content"][0]["type"] == "output_text"
+
+
+class TestChatContentToResponsesParts:
+    """Unit tests for _chat_content_to_responses_parts role parameter (#15687)."""
+
+    def test_default_role_emits_input_text(self):
+        """Default (user) role emits input_text."""
+        result = _chat_content_to_responses_parts([{"type": "text", "text": "hello"}])
+        assert result[0]["type"] == "input_text"
+
+    def test_explicit_user_role_emits_input_text(self):
+        result = _chat_content_to_responses_parts(
+            [{"type": "text", "text": "hello"}], role="user"
+        )
+        assert result[0]["type"] == "input_text"
+
+    def test_assistant_role_emits_output_text(self):
+        result = _chat_content_to_responses_parts(
+            [{"type": "text", "text": "hello"}], role="assistant"
+        )
+        assert result[0]["type"] == "output_text"
+
+    def test_assistant_role_with_string_parts(self):
+        """String parts in assistant content also get output_text."""
+        result = _chat_content_to_responses_parts(["hello"], role="assistant")
+        assert result[0]["type"] == "output_text"
+        assert result[0]["text"] == "hello"
+
+    def test_assistant_role_with_mixed_input_output_text_types(self):
+        """Parts already marked input_text or output_text get normalized to role's type."""
+        parts = [
+            {"type": "input_text", "text": "a"},
+            {"type": "output_text", "text": "b"},
+            {"type": "text", "text": "c"},
+        ]
+        result = _chat_content_to_responses_parts(parts, role="assistant")
+        # All text parts should become output_text regardless of original type
+        assert all(p["type"] == "output_text" for p in result)
+        assert [p["text"] for p in result] == ["a", "b", "c"]
+

 # ── Response normalization tests ─────────────────────────────────────────────

@@ -3078,48 +3078,6 @@ class TestRetryExhaustion:
        assert "bad messages" in result["error"]


-# ---------------------------------------------------------------------------
-# Flush sentinel leak
-# ---------------------------------------------------------------------------
-
-
-class TestFlushSentinelNotLeaked:
-    """_flush_sentinel must be stripped before sending messages to the API."""
-
-    def test_flush_sentinel_stripped_from_api_messages(self, agent_with_memory_tool):
-        """Verify _flush_sentinel is not sent to the API provider."""
-        agent = agent_with_memory_tool
-        agent._memory_store = MagicMock()
-        agent._memory_flush_min_turns = 1
-        agent._user_turn_count = 10
-        agent._cached_system_prompt = "system"
-
-        messages = [
-            {"role": "user", "content": "hello"},
-            {"role": "assistant", "content": "hi"},
-            {"role": "user", "content": "remember this"},
-        ]
-
-        # Mock the API to return a simple response (no tool calls)
-        mock_msg = SimpleNamespace(content="OK", tool_calls=None)
-        mock_choice = SimpleNamespace(message=mock_msg)
-        mock_response = SimpleNamespace(choices=[mock_choice])
-        agent.client.chat.completions.create.return_value = mock_response
-
-        # Bypass auxiliary client so flush uses agent.client directly
-        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
-            agent.flush_memories(messages, min_turns=0)
-
-        # Check what was actually sent to the API
-        call_args = agent.client.chat.completions.create.call_args
-        assert call_args is not None, "flush_memories never called the API"
-        api_messages = call_args.kwargs.get("messages") or call_args[1].get("messages")
-        for msg in api_messages:
-            assert "_flush_sentinel" not in msg, (
-                f"_flush_sentinel leaked to API in message: {msg}"
-            )
-
-
 # ---------------------------------------------------------------------------
 # Conversation history mutation
 # ---------------------------------------------------------------------------
@@ -0,0 +1,162 @@
+"""Tests that /stop interrupts streaming retry loops immediately.
+
+When the agent is interrupted during a streaming API call, the outer poll
+loop closes the HTTP connection.  The inner `_call()` thread sees a
+connection error and enters its retry loop.  Before this fix, the retry
+loop would open a FRESH connection without checking `_interrupt_requested`,
+making /stop take multiple retry cycles × read-timeout to actually stop
+(510+ seconds observed on slow ollama-cloud providers).
+
+The fix adds an `_interrupt_requested` check at the top of the retry loop
+so the agent exits immediately instead of retrying.
+"""
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+def _make_agent(**kwargs):
+    """Create a minimal AIAgent for streaming tests."""
+    from run_agent import AIAgent
+
+    defaults = dict(
+        api_key="test-key",
+        base_url="https://example.com/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    defaults.update(kwargs)
+    agent = AIAgent(**defaults)
+    agent.api_mode = "chat_completions"
+    return agent
+
+
+class TestStreamInterruptBeforeRetry:
+    """Verify _interrupt_requested is checked before each streaming retry."""
+
+    @pytest.mark.filterwarnings(
+        "ignore::pytest.PytestUnhandledThreadExceptionWarning"
+    )
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_interrupt_prevents_stream_retry(self, mock_close, mock_create):
+        """When _interrupt_requested is set during a transient stream error,
+        the retry loop must NOT retry — it should raise InterruptedError
+        immediately instead of opening a fresh connection."""
+        import httpx
+
+        attempt_count = [0]
+
+        def fail_once_then_interrupt(*args, **kwargs):
+            attempt_count[0] += 1
+            if attempt_count[0] == 1:
+                # First attempt: simulate normal failure, then set interrupt
+                # (as if /stop arrived while the retry loop processes the error)
+                agent._interrupt_requested = True
+                raise httpx.ConnectError("connection reset by /stop")
+            # Should never reach here — the interrupt check should fire first
+            raise httpx.ConnectError("unexpected retry — interrupt not checked!")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = fail_once_then_interrupt
+        mock_create.return_value = mock_client
+
+        agent = _make_agent()
+        agent._interrupt_requested = False
+
+        with pytest.raises(InterruptedError, match="interrupted"):
+            agent._interruptible_streaming_api_call({})
+
+        # Only 1 attempt should have been made — the interrupt should prevent retry
+        assert attempt_count[0] == 1, (
+            f"Expected 1 attempt but got {attempt_count[0]}. "
+            "The retry loop retried despite _interrupt_requested being set."
+        )
+
+    @pytest.mark.filterwarnings(
+        "ignore::pytest.PytestUnhandledThreadExceptionWarning"
+    )
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_interrupt_before_first_attempt(self, mock_close, mock_create):
+        """If _interrupt_requested is already set when the streaming call
+        starts, it should exit immediately without making any API call."""
+        mock_client = MagicMock()
+        mock_create.return_value = mock_client
+
+        agent = _make_agent()
+        agent._interrupt_requested = True  # Pre-set before call
+
+        with pytest.raises(InterruptedError, match="interrupted"):
+            agent._interruptible_streaming_api_call({})
+
+        # No API call should have been made at all
+        assert mock_client.chat.completions.create.call_count == 0
+
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_normal_retry_still_works_without_interrupt(self, mock_close, mock_create):
+        """Without an interrupt, transient errors should still retry normally."""
+        import httpx
+
+        attempts = [0]
+
+        def fail_twice_then_succeed(*args, **kwargs):
+            attempts[0] += 1
+            if attempts[0] <= 2:
+                raise httpx.ConnectError("transient failure")
+            # Third attempt succeeds
+            chunks = [
+                SimpleNamespace(
+                    choices=[
+                        SimpleNamespace(
+                            index=0,
+                            delta=SimpleNamespace(
+                                content="ok",
+                                tool_calls=None,
+                                reasoning_content=None,
+                                reasoning=None,
+                            ),
+                            finish_reason=None,
+                        )
+                    ],
+                    model="test/model",
+                    usage=None,
+                ),
+                SimpleNamespace(
+                    choices=[
+                        SimpleNamespace(
+                            index=0,
+                            delta=SimpleNamespace(
+                                content=None,
+                                tool_calls=None,
+                                reasoning_content=None,
+                                reasoning=None,
+                            ),
+                            finish_reason="stop",
+                        )
+                    ],
+                    model="test/model",
+                    usage=None,
+                ),
+            ]
+            stream = MagicMock()
+            stream.__iter__ = MagicMock(return_value=iter(chunks))
+            stream.response = MagicMock()
+            stream.response.headers = {}
+            return stream
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = fail_twice_then_succeed
+        mock_create.return_value = mock_client
+
+        agent = _make_agent()
+        agent._interrupt_requested = False
+
+        # Should succeed on the third attempt
+        result = agent._interruptible_streaming_api_call({})
+        assert result is not None
+        assert attempts[0] == 3
@@ -200,8 +200,8 @@ class TestToolsetConsistency:
    def test_hermes_platforms_share_core_tools(self):
        """All hermes-* platform toolsets share the same core tools.

-        Platform-specific additions (e.g. ``discord_server`` on
-        hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
+        Platform-specific additions (e.g. ``discord`` / ``discord_admin``
+        on hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
        the invariant is that the core set is identical across platforms.
        """
        platforms = ["hermes-cli", "hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"]
@@ -83,6 +83,100 @@ def test_status_callback_accepts_single_message_argument():
    )


+def test_resolve_model_uses_inference_model_env(monkeypatch):
+    monkeypatch.delenv("HERMES_MODEL", raising=False)
+    monkeypatch.setenv("HERMES_INFERENCE_MODEL", " anthropic/claude-sonnet-4.6\n")
+
+    assert server._resolve_model() == "anthropic/claude-sonnet-4.6"
+
+
+def test_resolve_model_strips_config_model(monkeypatch):
+    monkeypatch.delenv("HERMES_MODEL", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_MODEL", raising=False)
+    monkeypatch.setattr(
+        server, "_load_cfg", lambda: {"model": {"default": " nous/hermes-test "}}
+    )
+
+    assert server._resolve_model() == "nous/hermes-test"
+
+
+def test_startup_runtime_uses_tui_provider_env(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "nous/hermes-test")
+    monkeypatch.setenv("HERMES_TUI_PROVIDER", "nous")
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+
+    assert server._resolve_startup_runtime() == ("nous/hermes-test", "nous")
+
+
+def test_startup_runtime_does_not_treat_inference_provider_as_explicit(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "nous/hermes-test")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
+    monkeypatch.setattr(
+        "hermes_cli.models.detect_static_provider_for_model",
+        lambda model, provider: None,
+    )
+
+    assert server._resolve_startup_runtime() == ("nous/hermes-test", None)
+
+
+def test_startup_runtime_detects_provider_for_model_env(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "sonnet")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
+
+    def fake_detect(model, current_provider):
+        assert model == "sonnet"
+        assert current_provider == "auto"
+        return "anthropic", "anthropic/claude-sonnet-4.6"
+
+    monkeypatch.setattr(
+        "hermes_cli.models.detect_static_provider_for_model", fake_detect
+    )
+
+    assert server._resolve_startup_runtime() == (
+        "anthropic/claude-sonnet-4.6",
+        "anthropic",
+    )
+
+
+def test_startup_runtime_resolves_short_alias_without_network(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "sonnet")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
+    monkeypatch.setattr(
+        "hermes_cli.models.fetch_openrouter_models",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("network lookup should not run")
+        ),
+    )
+
+    model, provider = server._resolve_startup_runtime()
+
+    assert provider == "anthropic"
+    assert model.startswith("claude-sonnet")
+
+
+def test_startup_runtime_does_not_call_network_detector(monkeypatch):
+    monkeypatch.setenv("HERMES_MODEL", "sonnet")
+    monkeypatch.delenv("HERMES_TUI_PROVIDER", raising=False)
+    monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"model": {"provider": "auto"}})
+    monkeypatch.setattr(
+        "hermes_cli.models.detect_provider_for_model",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("network detector called")
+        ),
+    )
+
+    model, provider = server._resolve_startup_runtime()
+
+    assert model
+    assert provider in {None, "anthropic"}
+
+
 def _session(agent=None, **extra):
    return {
        "agent": agent if agent is not None else types.SimpleNamespace(),
@@ -245,6 +339,14 @@ def test_setup_status_reports_provider_config(monkeypatch):
    assert resp["result"]["provider_configured"] is False


+def test_complete_slash_includes_provider_alias():
+    resp = server.handle_request(
+        {"id": "1", "method": "complete.slash", "params": {"text": "/pro"}}
+    )
+
+    assert any(item["text"] == "provider" for item in resp["result"]["items"])
+
+
 def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypatch):
    monkeypatch.setattr(server, "_hermes_home", tmp_path)
    agent = types.SimpleNamespace(reasoning_config=None)
@@ -415,6 +517,57 @@ def test_config_set_model_syncs_inference_provider_env(monkeypatch):
    assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"


+def test_config_set_model_syncs_tui_provider_env(monkeypatch):
+    class Agent:
+        model = "gpt-5.3-codex"
+        provider = "openai-codex"
+        base_url = ""
+        api_key = ""
+
+        def switch_model(self, **kwargs):
+            self.model = kwargs["new_model"]
+            self.provider = kwargs["new_provider"]
+
+    agent = Agent()
+    server._sessions["sid"] = _session(agent=agent)
+    monkeypatch.setenv("HERMES_TUI_PROVIDER", "openai-codex")
+    monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
+    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
+
+    def fake_switch_model(**kwargs):
+        return types.SimpleNamespace(
+            success=True,
+            new_model="anthropic/claude-sonnet-4.6",
+            target_provider="anthropic",
+            api_key="key",
+            base_url="https://api.anthropic.com",
+            api_mode="anthropic_messages",
+            warning_message="",
+        )
+
+    monkeypatch.setattr("hermes_cli.model_switch.switch_model", fake_switch_model)
+
+    try:
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {
+                    "session_id": "sid",
+                    "key": "model",
+                    "value": "anthropic/claude-sonnet-4.6 --provider anthropic",
+                },
+            }
+        )
+
+        assert resp["result"]["value"] == "anthropic/claude-sonnet-4.6"
+        assert os.environ["HERMES_TUI_PROVIDER"] == "anthropic"
+        assert os.environ["HERMES_MODEL"] == "anthropic/claude-sonnet-4.6"
+        assert os.environ["HERMES_INFERENCE_MODEL"] == "anthropic/claude-sonnet-4.6"
+    finally:
+        server._sessions.clear()
+
+
 def test_config_set_personality_rejects_unknown_name(monkeypatch):
    monkeypatch.setattr(
        server,
@@ -2128,5 +2128,103 @@ class TestOrchestratorEndToEnd(unittest.TestCase):
        self.assertFalse(built_agents[2]["is_orchestrator_prompt"])


+class TestSubagentApprovalCallback(unittest.TestCase):
+    """Subagent worker threads must have a non-interactive approval callback
+    installed so dangerous-command prompts don't fall back to input() and
+    deadlock the parent's prompt_toolkit TUI.
+
+    Governed by delegation.subagent_auto_approve:
+      false (default) → _subagent_auto_deny
+      true            → _subagent_auto_approve
+    """
+
+    def test_auto_deny_returns_deny(self):
+        from tools.delegate_tool import _subagent_auto_deny
+        self.assertEqual(
+            _subagent_auto_deny("rm -rf /tmp/x", "dangerous"),
+            "deny",
+        )
+
+    def test_auto_approve_returns_once(self):
+        from tools.delegate_tool import _subagent_auto_approve
+        self.assertEqual(
+            _subagent_auto_approve("rm -rf /tmp/x", "dangerous"),
+            "once",
+        )
+
+    @patch("tools.delegate_tool._load_config", return_value={})
+    def test_getter_defaults_to_deny(self, _mock_cfg):
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_deny,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
+
+    @patch(
+        "tools.delegate_tool._load_config",
+        return_value={"subagent_auto_approve": False},
+    )
+    def test_getter_explicit_false_is_deny(self, _mock_cfg):
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_deny,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
+
+    @patch(
+        "tools.delegate_tool._load_config",
+        return_value={"subagent_auto_approve": True},
+    )
+    def test_getter_true_is_approve(self, _mock_cfg):
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_approve,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
+
+    @patch(
+        "tools.delegate_tool._load_config",
+        return_value={"subagent_auto_approve": "yes"},
+    )
+    def test_getter_truthy_string_is_approve(self, _mock_cfg):
+        """is_truthy_value accepts 'yes'/'1'/'true' as truthy."""
+        from tools.delegate_tool import (
+            _get_subagent_approval_callback,
+            _subagent_auto_approve,
+        )
+        self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
+
+    def test_executor_initializer_installs_callback_in_worker(self):
+        """The initializer sets the callback on the worker thread's TLS,
+        not the parent's — verifies the fix actually scopes to workers.
+        """
+        from concurrent.futures import ThreadPoolExecutor
+        from tools.terminal_tool import (
+            set_approval_callback as _set_cb,
+            _get_approval_callback,
+        )
+        from tools.delegate_tool import _subagent_auto_deny
+
+        # Parent thread has no callback.
+        _set_cb(None)
+        self.assertIsNone(_get_approval_callback())
+
+        seen = []
+
+        def worker():
+            seen.append(_get_approval_callback())
+
+        with ThreadPoolExecutor(
+            max_workers=1,
+            initializer=_set_cb,
+            initargs=(_subagent_auto_deny,),
+        ) as executor:
+            executor.submit(worker).result()
+
+        self.assertEqual(seen, [_subagent_auto_deny])
+        # Parent's callback slot is still empty (TLS isolates threads).
+        self.assertIsNone(_get_approval_callback())
+
+
 if __name__ == "__main__":
    unittest.main()
@@ -11,6 +11,8 @@ import pytest
 from tools.discord_tool import (
    DiscordAPIError,
    _ACTIONS,
+    _ADMIN_ACTIONS,
+    _CORE_ACTIONS,
    _available_actions,
    _build_schema,
    _channel_type_name,
@@ -21,8 +23,11 @@ from tools.discord_tool import (
    _load_allowed_actions_config,
    _reset_capability_cache,
    check_discord_tool_requirements,
-    discord_server,
+    discord_admin_handler,
+    discord_core,
    get_dynamic_schema,
+    get_dynamic_schema_admin,
+    get_dynamic_schema_core,
 )


@@ -147,32 +152,32 @@ class TestDiscordRequest:
 class TestDiscordServerValidation:
    def test_no_token(self, monkeypatch):
        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "error" in result
        assert "DISCORD_BOT_TOKEN" in result["error"]

    def test_unknown_action(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="bad_action"))
+        result = json.loads(discord_core(action="bad_action"))
        assert "error" in result
        assert "Unknown action" in result["error"]
        assert "available_actions" in result

    def test_missing_required_guild_id(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="list_channels"))
+        result = json.loads(discord_admin_handler(action="list_channels"))
        assert "error" in result
        assert "guild_id" in result["error"]

    def test_missing_required_channel_id(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="fetch_messages"))
+        result = json.loads(discord_core(action="fetch_messages"))
        assert "error" in result
        assert "channel_id" in result["error"]

    def test_missing_multiple_params(self, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
-        result = json.loads(discord_server(action="add_role"))
+        result = json.loads(discord_admin_handler(action="add_role"))
        assert "error" in result
        assert "guild_id" in result["error"]
        assert "user_id" in result["error"]
@@ -191,7 +196,7 @@ class TestListGuilds:
            {"id": "111", "name": "Test Server", "icon": "abc", "owner": True, "permissions": "123"},
            {"id": "222", "name": "Other Server", "icon": None, "owner": False, "permissions": "456"},
        ]
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert result["count"] == 2
        assert result["guilds"][0]["name"] == "Test Server"
        assert result["guilds"][1]["id"] == "222"
@@ -219,7 +224,7 @@ class TestServerInfo:
            "premium_subscription_count": 5,
            "verification_level": 1,
        }
-        result = json.loads(discord_server(action="server_info", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="server_info", guild_id="111"))
        assert result["name"] == "My Server"
        assert result["member_count"] == 42
        assert result["online_count"] == 10
@@ -242,7 +247,7 @@ class TestListChannels:
            {"id": "12", "name": "voice", "type": 2, "position": 1, "parent_id": "10", "topic": None, "nsfw": False},
            {"id": "13", "name": "no-category", "type": 0, "position": 0, "parent_id": None, "topic": None, "nsfw": False},
        ]
-        result = json.loads(discord_server(action="list_channels", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
        assert result["total_channels"] == 3  # excludes the category itself
        groups = result["channel_groups"]
        # Uncategorized first
@@ -257,7 +262,7 @@ class TestListChannels:
    def test_empty_guild(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = []
-        result = json.loads(discord_server(action="list_channels", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
        assert result["total_channels"] == 0


@@ -274,7 +279,7 @@ class TestChannelInfo:
            "topic": "Welcome!", "nsfw": False, "position": 0,
            "parent_id": "10", "rate_limit_per_user": 0, "last_message_id": "999",
        }
-        result = json.loads(discord_server(action="channel_info", channel_id="11"))
+        result = json.loads(discord_admin_handler(action="channel_info", channel_id="11"))
        assert result["name"] == "general"
        assert result["type"] == "text"
        assert result["guild_id"] == "111"
@@ -293,7 +298,7 @@ class TestListRoles:
            {"id": "2", "name": "Admin", "position": 2, "color": 16711680, "mentionable": True, "managed": False, "hoist": True},
            {"id": "3", "name": "Mod", "position": 1, "color": 255, "mentionable": True, "managed": False, "hoist": True},
        ]
-        result = json.loads(discord_server(action="list_roles", guild_id="111"))
+        result = json.loads(discord_admin_handler(action="list_roles", guild_id="111"))
        assert result["count"] == 3
        # Should be sorted by position descending
        assert result["roles"][0]["name"] == "Admin"
@@ -317,7 +322,7 @@ class TestMemberInfo:
            "joined_at": "2024-01-01T00:00:00Z",
            "premium_since": None,
        }
-        result = json.loads(discord_server(action="member_info", guild_id="111", user_id="42"))
+        result = json.loads(discord_admin_handler(action="member_info", guild_id="111", user_id="42"))
        assert result["username"] == "testuser"
        assert result["nickname"] == "Testy"
        assert result["roles"] == ["2", "3"]
@@ -334,7 +339,7 @@ class TestSearchMembers:
        mock_req.return_value = [
            {"user": {"id": "42", "username": "testuser", "global_name": "Test", "bot": False}, "nick": None, "roles": []},
        ]
-        result = json.loads(discord_server(action="search_members", guild_id="111", query="test"))
+        result = json.loads(discord_core(action="search_members", guild_id="111", query="test"))
        assert result["count"] == 1
        assert result["members"][0]["username"] == "testuser"
        mock_req.assert_called_once_with(
@@ -346,7 +351,7 @@ class TestSearchMembers:
    def test_search_members_limit_capped(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = []
-        discord_server(action="search_members", guild_id="111", query="x", limit=200)
+        discord_core(action="search_members", guild_id="111", query="x", limit=200)
        call_params = mock_req.call_args[1]["params"]
        assert call_params["limit"] == "100"  # Capped at 100

@@ -370,7 +375,7 @@ class TestFetchMessages:
                "pinned": False,
            },
        ]
-        result = json.loads(discord_server(action="fetch_messages", channel_id="11"))
+        result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
        assert result["count"] == 1
        assert result["messages"][0]["content"] == "Hello world"
        assert result["messages"][0]["author"]["username"] == "user1"
@@ -379,7 +384,7 @@ class TestFetchMessages:
    def test_fetch_messages_with_pagination(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = []
-        discord_server(action="fetch_messages", channel_id="11", before="999", limit=10)
+        discord_core(action="fetch_messages", channel_id="11", before="999", limit=10)
        call_params = mock_req.call_args[1]["params"]
        assert call_params["before"] == "999"
        assert call_params["limit"] == "10"
@@ -396,7 +401,7 @@ class TestListPins:
        mock_req.return_value = [
            {"id": "500", "content": "Important announcement", "author": {"username": "admin"}, "timestamp": "2024-01-01T00:00:00Z"},
        ]
-        result = json.loads(discord_server(action="list_pins", channel_id="11"))
+        result = json.loads(discord_admin_handler(action="list_pins", channel_id="11"))
        assert result["count"] == 1
        assert result["pinned_messages"][0]["content"] == "Important announcement"

@@ -410,7 +415,7 @@ class TestPinUnpin:
    def test_pin_message(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None  # 204
-        result = json.loads(discord_server(action="pin_message", channel_id="11", message_id="500"))
+        result = json.loads(discord_admin_handler(action="pin_message", channel_id="11", message_id="500"))
        assert result["success"] is True
        mock_req.assert_called_once_with("PUT", "/channels/11/pins/500", "test-token")

@@ -418,7 +423,7 @@ class TestPinUnpin:
    def test_unpin_message(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None
-        result = json.loads(discord_server(action="unpin_message", channel_id="11", message_id="500"))
+        result = json.loads(discord_admin_handler(action="unpin_message", channel_id="11", message_id="500"))
        assert result["success"] is True


@@ -431,7 +436,7 @@ class TestCreateThread:
    def test_create_standalone_thread(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = {"id": "800", "name": "New Thread"}
-        result = json.loads(discord_server(action="create_thread", channel_id="11", name="New Thread"))
+        result = json.loads(discord_core(action="create_thread", channel_id="11", name="New Thread"))
        assert result["success"] is True
        assert result["thread_id"] == "800"
        # Verify the API call
@@ -444,7 +449,7 @@ class TestCreateThread:
    def test_create_thread_from_message(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = {"id": "801", "name": "Discussion"}
-        result = json.loads(discord_server(
+        result = json.loads(discord_core(
            action="create_thread", channel_id="11", name="Discussion", message_id="1001",
        ))
        assert result["success"] is True
@@ -463,7 +468,7 @@ class TestRoleManagement:
    def test_add_role(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None
-        result = json.loads(discord_server(
+        result = json.loads(discord_admin_handler(
            action="add_role", guild_id="111", user_id="42", role_id="2",
        ))
        assert result["success"] is True
@@ -475,7 +480,7 @@ class TestRoleManagement:
    def test_remove_role(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.return_value = None
-        result = json.loads(discord_server(
+        result = json.loads(discord_admin_handler(
            action="remove_role", guild_id="111", user_id="42", role_id="2",
        ))
        assert result["success"] is True
@@ -490,15 +495,23 @@ class TestErrorHandling:
    def test_api_error_handled(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.side_effect = DiscordAPIError(403, '{"message": "Missing Access"}')
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "error" in result
        assert "403" in result["error"]

    @patch("tools.discord_tool._discord_request")
-    def test_unexpected_error_handled(self, mock_req, monkeypatch):
+    def test_unexpected_error_handled_admin(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
        mock_req.side_effect = RuntimeError("something broke")
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
+        assert "error" in result
+        assert "something broke" in result["error"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_unexpected_error_handled_core(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
+        mock_req.side_effect = RuntimeError("something broke")
+        result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
        assert "error" in result
        assert "something broke" in result["error"]

@@ -508,79 +521,109 @@ class TestErrorHandling:
 # ---------------------------------------------------------------------------

 class TestRegistration:
-    def test_tool_registered(self):
+    def test_core_tool_registered(self):
        from tools.registry import registry
-        entry = registry._tools.get("discord_server")
+        entry = registry._tools.get("discord")
        assert entry is not None
-        assert entry.schema["name"] == "discord_server"
+        assert entry.schema["name"] == "discord"
        assert entry.toolset == "discord"
        assert entry.check_fn is not None
        assert entry.requires_env == ["DISCORD_BOT_TOKEN"]

-    def test_schema_actions(self):
-        """Static schema should list all actions (the model_tools post-processing
-        narrows this per-session; static registration is the superset)."""
+    def test_admin_tool_registered(self):
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
-        actions = entry.schema["parameters"]["properties"]["action"]["enum"]
-        expected = [
-            "list_guilds", "server_info", "list_channels", "channel_info",
-            "list_roles", "member_info", "search_members", "fetch_messages",
-            "list_pins", "pin_message", "unpin_message", "create_thread",
-            "add_role", "remove_role",
-        ]
-        assert set(actions) == set(expected)
-        assert set(_ACTIONS.keys()) == set(expected)
+        entry = registry._tools.get("discord_admin")
+        assert entry is not None
+        assert entry.schema["name"] == "discord_admin"
+        assert entry.toolset == "discord_admin"
+        assert entry.check_fn is not None
+        assert entry.requires_env == ["DISCORD_BOT_TOKEN"]
+
+    def test_core_schema_actions(self):
+        """Core static schema should list only core actions."""
+        from tools.registry import registry
+        entry = registry._tools["discord"]
+        actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == {"fetch_messages", "search_members", "create_thread"}
+
+    def test_admin_schema_actions(self):
+        """Admin static schema should list only admin actions."""
+        from tools.registry import registry
+        entry = registry._tools["discord_admin"]
+        actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
+        expected_admin = set(_ACTIONS.keys()) - {"fetch_messages", "search_members", "create_thread"}
+        assert actions == expected_admin
+
+    def test_all_actions_covered(self):
+        """Core + admin actions should cover all known actions."""
+        assert set(_CORE_ACTIONS.keys()) | set(_ADMIN_ACTIONS.keys()) == set(_ACTIONS.keys())
+        assert set(_CORE_ACTIONS.keys()) & set(_ADMIN_ACTIONS.keys()) == set()

    def test_schema_parameter_bounds(self):
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
+        entry = registry._tools["discord"]
        props = entry.schema["parameters"]["properties"]
        assert props["limit"]["minimum"] == 1
        assert props["limit"]["maximum"] == 100
        assert props["auto_archive_duration"]["enum"] == [60, 1440, 4320, 10080]

-    def test_schema_description_is_action_manifest(self):
-        """The top-level description should include the action manifest
-        (one-line signatures per action) so the model can find required
-        params without re-reading every parameter description."""
+    def test_core_schema_description(self):
+        """Core schema description should mention core actions."""
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
+        entry = registry._tools["discord"]
        desc = entry.schema["description"]
-        # Spot-check a few entries
-        assert "list_guilds()" in desc
        assert "fetch_messages(channel_id)" in desc
+        assert "search_members(guild_id, query)" in desc
+        assert "create_thread(channel_id, name)" in desc
+        # Admin actions should NOT be in core description
+        assert "list_guilds()" not in desc
+        assert "add_role(" not in desc
+
+    def test_admin_schema_description(self):
+        """Admin schema description should mention admin actions."""
+        from tools.registry import registry
+        entry = registry._tools["discord_admin"]
+        desc = entry.schema["description"]
+        assert "list_guilds()" in desc
        assert "add_role(guild_id, user_id, role_id)" in desc
+        # Core actions should NOT be in admin description
+        assert "fetch_messages(" not in desc
+        assert "create_thread(" not in desc

    def test_handler_callable(self):
        from tools.registry import registry
-        entry = registry._tools["discord_server"]
+        entry = registry._tools["discord"]
        assert callable(entry.handler)
+        entry_admin = registry._tools["discord_admin"]
+        assert callable(entry_admin.handler)


 # ---------------------------------------------------------------------------
-# Toolset: discord_server only in hermes-discord
+# Toolset: discord / discord_admin only in hermes-discord
 # ---------------------------------------------------------------------------

 class TestToolsetInclusion:
-    def test_discord_server_in_hermes_discord_toolset(self):
+    def test_discord_tools_in_hermes_discord_toolset(self):
        from toolsets import TOOLSETS
-        assert "discord_server" in TOOLSETS["hermes-discord"]["tools"]
+        assert "discord" in TOOLSETS["hermes-discord"]["tools"]
+        assert "discord_admin" in TOOLSETS["hermes-discord"]["tools"]

-    def test_discord_server_not_in_core_tools(self):
+    def test_discord_tools_not_in_core_tools(self):
        from toolsets import _HERMES_CORE_TOOLS
-        assert "discord_server" not in _HERMES_CORE_TOOLS
+        assert "discord" not in _HERMES_CORE_TOOLS
+        assert "discord_admin" not in _HERMES_CORE_TOOLS

-    def test_discord_server_not_in_other_toolsets(self):
+    def test_discord_tools_not_in_other_toolsets(self):
        from toolsets import TOOLSETS
        for name, ts in TOOLSETS.items():
-            if name == "hermes-discord":
+            if name in ("hermes-discord", "hermes-gateway", "discord", "discord_admin"):
                continue
-            # The gateway toolset might include it if it unions all platform tools
-            if name == "hermes-gateway":
-                continue
-            assert "discord_server" not in ts.get("tools", []), (
-                f"discord_server should not be in toolset '{name}'"
+            tools = ts.get("tools", [])
+            assert "discord" not in tools or name == "discord", (
+                f"discord tool should not be in toolset '{name}'"
+            )
+            assert "discord_admin" not in tools or name == "discord_admin", (
+                f"discord_admin tool should not be in toolset '{name}'"
            )


@@ -798,40 +841,69 @@ class TestDynamicSchema:
    @patch("tools.discord_tool._discord_request")
    def test_no_token_returns_none(self, mock_req, monkeypatch):
        monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
-        assert get_dynamic_schema() is None
+        assert get_dynamic_schema_core() is None
+        assert get_dynamic_schema_admin() is None
        mock_req.assert_not_called()

    @patch("tools.discord_tool._discord_request")
-    def test_full_intents_full_schema(self, mock_req, monkeypatch):
+    def test_full_intents_core_schema(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
-        schema = get_dynamic_schema()
-        actions = schema["parameters"]["properties"]["action"]["enum"]
-        assert set(actions) == set(_ACTIONS.keys())
-        # No content warning
+        schema = get_dynamic_schema_core()
+        actions = set(schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == set(_CORE_ACTIONS.keys())
+        assert schema["name"] == "discord"
+
+    @patch("tools.discord_tool._discord_request")
+    def test_full_intents_admin_schema(self, mock_req, monkeypatch):
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        schema = get_dynamic_schema_admin()
+        actions = set(schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == set(_ADMIN_ACTIONS.keys())
+        assert schema["name"] == "discord_admin"
+        # No content warning when MESSAGE_CONTENT is enabled
        assert "MESSAGE_CONTENT" not in schema["description"]

    @patch("tools.discord_tool._discord_request")
-    def test_no_members_intent_removes_member_actions_from_schema(
+    def test_no_members_intent_removes_member_actions_from_admin_schema(
        self, mock_req, monkeypatch,
    ):
+        """member_info is an admin action; it should be hidden when
+        GUILD_MEMBERS intent is missing."""
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.return_value = {"flags": 1 << 18}  # only MESSAGE_CONTENT
-        schema = get_dynamic_schema()
+        schema = get_dynamic_schema_admin()
+        actions = schema["parameters"]["properties"]["action"]["enum"]
+        assert "member_info" not in actions
+        assert "member_info" not in schema["description"]
+
+    @patch("tools.discord_tool._discord_request")
+    def test_no_members_intent_hides_search_members_from_core(
+        self, mock_req, monkeypatch,
+    ):
+        """search_members is a core action gated by GUILD_MEMBERS intent."""
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": 1 << 18}  # only MESSAGE_CONTENT
+        schema = get_dynamic_schema_core()
        actions = schema["parameters"]["properties"]["action"]["enum"]
        assert "search_members" not in actions
-        assert "member_info" not in actions
-        # Manifest description should also not advertise them
-        assert "search_members" not in schema["description"]
-        assert "member_info" not in schema["description"]

    @patch("tools.discord_tool._discord_request")
    def test_no_message_content_adds_warning_note(self, mock_req, monkeypatch):
@@ -841,41 +913,53 @@ class TestDynamicSchema:
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.return_value = {"flags": 1 << 14}  # only GUILD_MEMBERS
-        schema = get_dynamic_schema()
+        schema = get_dynamic_schema_core()
        assert "MESSAGE_CONTENT" in schema["description"]
        # But fetch_messages is still available
        actions = schema["parameters"]["properties"]["action"]["enum"]
        assert "fetch_messages" in actions

    @patch("tools.discord_tool._discord_request")
-    def test_config_allowlist_narrows_schema(self, mock_req, monkeypatch):
+    def test_config_allowlist_narrows_admin_schema(self, mock_req, monkeypatch):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": "list_guilds,list_channels"}},
        )
        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
-        schema = get_dynamic_schema()
+        schema = get_dynamic_schema_admin()
        actions = schema["parameters"]["properties"]["action"]["enum"]
        assert actions == ["list_guilds", "list_channels"]
-        # Manifest description should only show allowed ones (check for
-        # the signature marker, which is specific to manifest lines)
        assert "list_guilds()" in schema["description"]
        assert "add_role(" not in schema["description"]
-        assert "create_thread(" not in schema["description"]

    @patch("tools.discord_tool._discord_request")
-    def test_empty_allowlist_with_valid_values_hides_tool(self, mock_req, monkeypatch):
+    def test_empty_allowlist_with_valid_values_hides_tools(self, mock_req, monkeypatch):
        """If the allowlist resolves to zero valid actions (e.g. all names
-        were typos), get_dynamic_schema returns None so the tool is dropped
-        entirely rather than showing an empty enum."""
+        were typos), get_dynamic_schema returns None so the tool is dropped."""
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": "typo_one,typo_two"}},
        )
        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
-        assert get_dynamic_schema() is None
+        assert get_dynamic_schema_core() is None
+        assert get_dynamic_schema_admin() is None
+
+    @patch("tools.discord_tool._discord_request")
+    def test_backward_compat_wrapper(self, mock_req, monkeypatch):
+        """get_dynamic_schema() should delegate to get_dynamic_schema_core()."""
+        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
+        monkeypatch.setattr(
+            "hermes_cli.config.load_config",
+            lambda: {"discord": {"server_actions": ""}},
+        )
+        mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
+        schema = get_dynamic_schema()
+        assert schema is not None
+        assert schema["name"] == "discord"
+        actions = set(schema["parameters"]["properties"]["action"]["enum"])
+        assert actions == set(_CORE_ACTIONS.keys())


 # ---------------------------------------------------------------------------
@@ -890,7 +974,7 @@ class TestRuntimeAllowlistEnforcement:
            "hermes_cli.config.load_config",
            lambda: {"discord": {"server_actions": "list_guilds"}},
        )
-        result = json.loads(discord_server(action="add_role", guild_id="1", user_id="2", role_id="3"))
+        result = json.loads(discord_admin_handler(action="add_role", guild_id="1", user_id="2", role_id="3"))
        assert "error" in result
        assert "disabled by config" in result["error"]
        mock_req.assert_not_called()
@@ -903,7 +987,7 @@ class TestRuntimeAllowlistEnforcement:
            lambda: {"discord": {"server_actions": "list_guilds"}},
        )
        mock_req.return_value = []
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "guilds" in result


@@ -930,7 +1014,7 @@ class Test403Enrichment:
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.side_effect = DiscordAPIError(403, '{"message":"Missing Permissions"}')
-        result = json.loads(discord_server(
+        result = json.loads(discord_admin_handler(
            action="add_role", guild_id="1", user_id="2", role_id="3",
        ))
        assert "error" in result
@@ -944,7 +1028,7 @@ class Test403Enrichment:
            lambda: {"discord": {"server_actions": ""}},
        )
        mock_req.side_effect = DiscordAPIError(500, "server error")
-        result = json.loads(discord_server(action="list_guilds"))
+        result = json.loads(discord_admin_handler(action="list_guilds"))
        assert "500" in result["error"]
        assert "MANAGE_ROLES" not in result["error"]

@@ -961,10 +1045,10 @@ class TestModelToolsIntegration:
        _reset_capability_cache()

    @patch("tools.discord_tool._discord_request")
-    def test_discord_server_schema_rebuilt_by_get_tool_definitions(
+    def test_discord_admin_schema_rebuilt_by_get_tool_definitions(
        self, mock_req, monkeypatch,
    ):
-        """When model_tools.get_tool_definitions runs with discord_server
+        """When model_tools.get_tool_definitions runs with discord_admin
        available, it should replace the static schema with the dynamic one."""
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
        monkeypatch.setattr(
@@ -976,16 +1060,16 @@ class TestModelToolsIntegration:

        from model_tools import get_tool_definitions
        tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
-        discord_tool = next(
-            (t for t in tools if t.get("function", {}).get("name") == "discord_server"),
+        discord_admin_tool = next(
+            (t for t in tools if t.get("function", {}).get("name") == "discord_admin"),
            None,
        )
-        assert discord_tool is not None, "discord_server should be in the schema"
-        actions = discord_tool["function"]["parameters"]["properties"]["action"]["enum"]
+        assert discord_admin_tool is not None, "discord_admin should be in the schema"
+        actions = discord_admin_tool["function"]["parameters"]["properties"]["action"]["enum"]
        assert actions == ["list_guilds", "server_info"]

    @patch("tools.discord_tool._discord_request")
-    def test_discord_server_dropped_when_allowlist_empties_it(
+    def test_discord_tools_dropped_when_allowlist_empties_them(
        self, mock_req, monkeypatch,
    ):
        monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
@@ -998,4 +1082,6 @@ class TestModelToolsIntegration:
        from model_tools import get_tool_definitions
        tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
        names = [t.get("function", {}).get("name") for t in tools]
+        assert "discord" not in names
+        assert "discord_admin" not in names
        assert "discord_server" not in names
@@ -19,9 +19,11 @@ from unittest.mock import patch
 from tools.process_registry import (
    ProcessRegistry,
    ProcessSession,
-    WATCH_MAX_PER_WINDOW,
-    WATCH_WINDOW_SECONDS,
-    WATCH_OVERLOAD_KILL_SECONDS,
+    WATCH_MIN_INTERVAL_SECONDS,
+    WATCH_STRIKE_LIMIT,
+    WATCH_GLOBAL_MAX_PER_WINDOW,
+    WATCH_GLOBAL_WINDOW_SECONDS,
+    WATCH_GLOBAL_COOLDOWN_SECONDS,
 )


@@ -129,10 +131,15 @@ class TestCheckWatchPatterns:
        assert registry.completion_queue.empty()

    def test_hit_counter_increments(self, registry):
-        """Each delivered notification increments _watch_hits."""
+        """Each delivered notification increments _watch_hits.
+
+        With 1/15s rate limit, we need to reset cooldown between calls.
+        """
        session = _make_session(watch_patterns=["X"])
        registry._check_watch_patterns(session, "X\n")
        assert session._watch_hits == 1
+        # Reset cooldown so the second match gets delivered.
+        session._watch_cooldown_until = 0.0
        registry._check_watch_patterns(session, "X\n")
        assert session._watch_hits == 2

@@ -148,100 +155,114 @@ class TestCheckWatchPatterns:


 # =========================================================================
-# Rate limiting
+# Per-session rate limiting: 1 notification per 15s, 3 strikes → disable
 # =========================================================================

-class TestRateLimiting:
-    def test_within_window_limit(self, registry):
-        """Notifications within the rate limit all get delivered."""
+class TestPerSessionRateLimit:
+    def test_first_match_delivers(self, registry):
+        """A fresh session with no prior cooldown delivers the first match."""
        session = _make_session(watch_patterns=["E"])
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        assert registry.completion_queue.qsize() == WATCH_MAX_PER_WINDOW
+        registry._check_watch_patterns(session, "E first\n")
+        assert registry.completion_queue.qsize() == 1
+        evt = registry.completion_queue.get_nowait()
+        assert evt["type"] == "watch_match"
+        assert session._watch_hits == 1
+        # Cooldown is now armed.
+        assert session._watch_cooldown_until > 0

-    def test_exceeds_window_limit(self, registry):
-        """Notifications beyond the rate limit are suppressed."""
+    def test_second_match_within_cooldown_is_suppressed(self, registry):
+        """A second match inside the 15s cooldown is dropped and counted."""
        session = _make_session(watch_patterns=["E"])
-        for i in range(WATCH_MAX_PER_WINDOW + 5):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        # Only WATCH_MAX_PER_WINDOW should be in the queue
-        assert registry.completion_queue.qsize() == WATCH_MAX_PER_WINDOW
-        assert session._watch_suppressed == 5
-
-    def test_window_resets(self, registry):
-        """After the window expires, notifications can flow again."""
-        session = _make_session(watch_patterns=["E"])
-        # Fill the window
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        # One more should be suppressed
-        registry._check_watch_patterns(session, "E extra\n")
+        registry._check_watch_patterns(session, "E first\n")
+        assert registry.completion_queue.qsize() == 1
+        # Immediately trigger another match — well inside cooldown.
+        registry._check_watch_patterns(session, "E second\n")
+        # Still only one notification.
+        assert registry.completion_queue.qsize() == 1
        assert session._watch_suppressed == 1
+        assert session._watch_consecutive_strikes == 1

-        # Fast-forward past window
-        session._watch_window_start = time.time() - WATCH_WINDOW_SECONDS - 1
-        registry._check_watch_patterns(session, "E after reset\n")
-        # Should deliver now (window reset)
-        assert registry.completion_queue.qsize() == WATCH_MAX_PER_WINDOW + 1
-
-    def test_suppressed_count_in_next_delivery(self, registry):
-        """Suppressed count is reported in the next successful delivery."""
+    def test_many_drops_inside_window_count_as_ONE_strike(self, registry):
+        """Multiple suppressions inside the same cooldown window = 1 strike."""
        session = _make_session(watch_patterns=["E"])
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
-        # Suppress 3 more
-        for i in range(3):
-            registry._check_watch_patterns(session, f"E suppressed {i}\n")
-        assert session._watch_suppressed == 3
+        registry._check_watch_patterns(session, "E\n")
+        for _ in range(10):
+            registry._check_watch_patterns(session, "E\n")
+        assert session._watch_consecutive_strikes == 1
+        assert session._watch_suppressed == 10

-        # Fast-forward past window to allow delivery
-        session._watch_window_start = time.time() - WATCH_WINDOW_SECONDS - 1
-        registry._check_watch_patterns(session, "E back\n")
-        # Drain to the last event
-        last_evt = None
-        while not registry.completion_queue.empty():
-            last_evt = registry.completion_queue.get_nowait()
-        assert last_evt["suppressed"] == 3
-        assert session._watch_suppressed == 0  # reset after delivery
-
-
-# =========================================================================
-# Overload kill switch
-# =========================================================================
-
-class TestOverloadKillSwitch:
-    def test_sustained_overload_disables(self, registry):
-        """Sustained overload beyond threshold permanently disables watching."""
+    def test_three_strikes_disables_watch_and_promotes_to_notify(self, registry):
+        """Three consecutive strike windows → watch_disabled + notify_on_complete."""
        session = _make_session(watch_patterns=["E"])
-        # Fill the window to trigger rate limit
-        for i in range(WATCH_MAX_PER_WINDOW):
-            registry._check_watch_patterns(session, f"E {i}\n")
+        session.notify_on_complete = False

-        # Simulate sustained overload: set overload_since to past threshold
-        session._watch_overload_since = time.time() - WATCH_OVERLOAD_KILL_SECONDS - 1
-        # Force another suppressed hit
-        registry._check_watch_patterns(session, "E overload\n")
-        registry._check_watch_patterns(session, "E overload2\n")
+        for strike in range(WATCH_STRIKE_LIMIT):
+            # Emit → arms cooldown.
+            registry._check_watch_patterns(session, f"E emit {strike}\n")
+            # Attempt while inside cooldown → one strike, dropped.
+            registry._check_watch_patterns(session, f"E drop {strike}\n")
+            # Fast-forward past the cooldown for the NEXT iteration, BUT leave
+            # the strike candidate set so the cooldown-expiry branch sees
+            # "this was a strike window" and doesn't reset the counter.
+            session._watch_cooldown_until = time.time() - 0.01

+        # After WATCH_STRIKE_LIMIT strikes, the next attempt should find
+        # the session disabled.
        assert session._watch_disabled is True
-        # Should have a watch_disabled event in the queue
+        assert session.notify_on_complete is True
+        # One watch_disabled summary event should be in the queue.
        disabled_evts = []
+        matches = 0
        while not registry.completion_queue.empty():
            evt = registry.completion_queue.get_nowait()
            if evt.get("type") == "watch_disabled":
                disabled_evts.append(evt)
+            elif evt.get("type") == "watch_match":
+                matches += 1
        assert len(disabled_evts) == 1
-        assert "too many matches" in disabled_evts[0]["message"]
+        assert "notify_on_complete" in disabled_evts[0]["message"]
+        # We should have had exactly WATCH_STRIKE_LIMIT emissions before disable.
+        assert matches == WATCH_STRIKE_LIMIT

-    def test_overload_resets_on_delivery(self, registry):
-        """Overload timer resets when a notification gets through."""
+    def test_clean_window_resets_strike_counter(self, registry):
+        """A cooldown that expires with zero drops resets the consecutive counter."""
        session = _make_session(watch_patterns=["E"])
-        # Start overload tracking
-        session._watch_overload_since = time.time() - 10
-        # But window allows delivery → overload should reset
-        registry._check_watch_patterns(session, "E ok\n")
-        assert session._watch_overload_since == 0.0
-        assert session._watch_disabled is False
+        # Emit + drop inside window → 1 strike.
+        registry._check_watch_patterns(session, "E emit\n")
+        registry._check_watch_patterns(session, "E drop\n")
+        assert session._watch_consecutive_strikes == 1
+
+        # Fast-forward past cooldown. No match arrived during the window —
+        # strike_candidate stays False from the prior window's reset, but
+        # it was True during that window. On the NEXT emission, the
+        # cooldown-expiry branch checks strike_candidate. Since we emitted
+        # at the start of this new window and no drop has happened, the
+        # reset branch should fire.
+        session._watch_cooldown_until = time.time() - 0.01
+        # Clear strike candidate to simulate "this cooldown had no drops".
+        session._watch_strike_candidate = False
+        registry._check_watch_patterns(session, "E clean\n")
+        assert session._watch_consecutive_strikes == 0
+
+    def test_suppressed_count_in_next_delivery(self, registry):
+        """Suppressed count from a strike window is reported in the next emit."""
+        session = _make_session(watch_patterns=["E"])
+        registry._check_watch_patterns(session, "E emit\n")
+        for _ in range(4):
+            registry._check_watch_patterns(session, "E drop\n")
+        assert session._watch_suppressed == 4
+
+        # Fast-forward past cooldown.
+        session._watch_cooldown_until = time.time() - 0.01
+        # Drain the queue so we can inspect the next emission.
+        while not registry.completion_queue.empty():
+            registry.completion_queue.get_nowait()
+
+        registry._check_watch_patterns(session, "E back\n")
+        evt = registry.completion_queue.get_nowait()
+        assert evt["type"] == "watch_match"
+        assert evt["suppressed"] == 4
+        assert session._watch_suppressed == 0  # reset after delivery


 # =========================================================================
@@ -321,3 +342,150 @@ class TestCodeExecutionBlocked:
    def test_watch_patterns_blocked(self):
        from tools.code_execution_tool import _TERMINAL_BLOCKED_PARAMS
        assert "watch_patterns" in _TERMINAL_BLOCKED_PARAMS
+
+
+# =========================================================================
+# Suppress-after-exit (anti-spam fix)
+# =========================================================================
+
+class TestSuppressAfterExit:
+    def test_match_dropped_once_session_exited(self, registry):
+        """watch_patterns notifications stop the moment session.exited is set."""
+        session = _make_session(watch_patterns=["ERROR"])
+        # Mark the process as exited BEFORE the late chunk arrives.
+        session.exited = True
+        registry._check_watch_patterns(session, "ERROR: late buffer\n")
+        assert registry.completion_queue.empty()
+        assert session._watch_hits == 0
+
+    def test_match_still_delivered_while_session_running(self, registry):
+        """Sanity: while the process is still running, matches still deliver."""
+        session = _make_session(watch_patterns=["ERROR"])
+        session.exited = False
+        registry._check_watch_patterns(session, "ERROR: oh no\n")
+        assert not registry.completion_queue.empty()
+        evt = registry.completion_queue.get_nowait()
+        assert evt["type"] == "watch_match"
+
+
+# =========================================================================
+# Mutual exclusion: notify_on_complete wins over watch_patterns
+# =========================================================================
+
+class TestMutualExclusion:
+    def test_resolver_drops_watch_when_notify_set(self):
+        """Both flags set → watch_patterns dropped with a note."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=True,
+            watch_patterns=["ERROR", "DONE"],
+            background=True,
+        )
+        assert resolved is None
+        assert "notify_on_complete" in note
+        assert "duplicate notifications" in note
+
+    def test_resolver_keeps_watch_when_notify_off(self):
+        """notify_on_complete=False → watch_patterns kept intact."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=False,
+            watch_patterns=["ERROR"],
+            background=True,
+        )
+        assert resolved == ["ERROR"]
+        assert note == ""
+
+    def test_resolver_keeps_notify_when_no_watch(self):
+        """Only notify_on_complete set → no conflict."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=True,
+            watch_patterns=None,
+            background=True,
+        )
+        assert resolved is None
+        assert note == ""
+
+    def test_resolver_inert_when_not_background(self):
+        """Without background=True, the whole thing is a no-op."""
+        from tools.terminal_tool import _resolve_notification_flag_conflict
+
+        resolved, note = _resolve_notification_flag_conflict(
+            notify_on_complete=True,
+            watch_patterns=["ERROR"],
+            background=False,
+        )
+        assert resolved == ["ERROR"]
+        assert note == ""
+
+
+# =========================================================================
+# Global circuit breaker (cross-session overflow blocker)
+# =========================================================================
+
+class TestGlobalCircuitBreaker:
+    def test_trips_after_global_threshold(self, registry):
+        """When >N matches fire across sessions in the window, breaker trips."""
+        sessions = [
+            _make_session(sid=f"proc_s{i}", watch_patterns=["E"])
+            for i in range(WATCH_GLOBAL_MAX_PER_WINDOW + 3)
+        ]
+        # Each session fires exactly one match — individually well under the
+        # per-session cap. But collectively they should trip the global cap.
+        for s in sessions:
+            registry._check_watch_patterns(s, "E hit\n")
+
+        # Drain the queue and count event types.
+        watch_matches = 0
+        overflow_tripped = 0
+        while not registry.completion_queue.empty():
+            evt = registry.completion_queue.get_nowait()
+            if evt.get("type") == "watch_match":
+                watch_matches += 1
+            elif evt.get("type") == "watch_overflow_tripped":
+                overflow_tripped += 1
+        assert watch_matches == WATCH_GLOBAL_MAX_PER_WINDOW
+        assert overflow_tripped == 1
+        assert registry._global_watch_tripped_until > 0
+
+    def test_cooldown_suppresses_and_then_releases(self, registry):
+        """After trip, further events are suppressed; cooldown expiry emits release."""
+        # Spawn enough fresh sessions to trip the global breaker.
+        sessions = [
+            _make_session(sid=f"proc_t{i}", watch_patterns=["E"])
+            for i in range(WATCH_GLOBAL_MAX_PER_WINDOW + 1)
+        ]
+        for s in sessions:
+            registry._check_watch_patterns(s, "E hit\n")
+        assert registry._global_watch_tripped_until > 0
+
+        # Further matches from BRAND-NEW sessions during cooldown are dropped.
+        q_size_before = registry.completion_queue.qsize()
+        extra1 = _make_session(sid="proc_extra1", watch_patterns=["E"])
+        extra2 = _make_session(sid="proc_extra2", watch_patterns=["E"])
+        registry._check_watch_patterns(extra1, "E hit\n")
+        registry._check_watch_patterns(extra2, "E hit\n")
+        assert registry.completion_queue.qsize() == q_size_before  # no new events
+        assert registry._global_watch_suppressed_during_trip >= 2
+
+        # Simulate cooldown expiry.
+        registry._global_watch_tripped_until = time.time() - 1
+
+        # Next call admits AND emits the release summary.
+        released_session = _make_session(sid="proc_after", watch_patterns=["E"])
+        registry._check_watch_patterns(released_session, "E hit\n")
+        released = False
+        admitted = False
+        while not registry.completion_queue.empty():
+            evt = registry.completion_queue.get_nowait()
+            if evt.get("type") == "watch_overflow_released":
+                released = True
+                assert evt["suppressed"] >= 2
+            elif evt.get("type") == "watch_match":
+                admitted = True
+        assert released
+        assert admitted
@@ -11,7 +11,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union

 from hermes_constants import display_hermes_home

@@ -238,6 +238,7 @@ def cronjob(
    base_url: Optional[str] = None,
    reason: Optional[str] = None,
    script: Optional[str] = None,
+    context_from: Optional[Union[str, List[str]]] = None,
    enabled_toolsets: Optional[List[str]] = None,
    workdir: Optional[str] = None,
    task_id: str = None,
@@ -265,6 +266,18 @@ def cronjob(
                if script_error:
                    return tool_error(script_error, success=False)

+            # Validate context_from references existing jobs
+            if context_from:
+                from cron.jobs import get_job as _get_job
+                refs = [context_from] if isinstance(context_from, str) else context_from
+                for ref_id in refs:
+                    if not _get_job(ref_id):
+                        return tool_error(
+                            f"context_from job '{ref_id}' not found. "
+                            "Use cronjob(action='list') to see available jobs.",
+                            success=False,
+                        )
+
            job = create_job(
                prompt=prompt or "",
                schedule=schedule,
@@ -277,6 +290,7 @@ def cronjob(
                provider=_normalize_optional_job_value(provider),
                base_url=_normalize_optional_job_value(base_url, strip_trailing_slash=True),
                script=_normalize_optional_job_value(script),
+                context_from=context_from,
                enabled_toolsets=enabled_toolsets or None,
                workdir=_normalize_optional_job_value(workdir),
            )
@@ -368,6 +382,24 @@ def cronjob(
                    if script_error:
                        return tool_error(script_error, success=False)
                updates["script"] = _normalize_optional_job_value(script) if script else None
+            if context_from is not None:
+                # Empty string / empty list clears the field; otherwise validate
+                # each referenced job exists before storing. Normalized to a list
+                # (or None) to match the shape stored by create_job().
+                if isinstance(context_from, str):
+                    refs = [context_from.strip()] if context_from.strip() else []
+                else:
+                    refs = [str(j).strip() for j in context_from if str(j).strip()]
+                if refs:
+                    from cron.jobs import get_job as _get_job
+                    for ref_id in refs:
+                        if not _get_job(ref_id):
+                            return tool_error(
+                                f"context_from job '{ref_id}' not found. "
+                                "Use cronjob(action='list') to see available jobs.",
+                                success=False,
+                            )
+                updates["context_from"] = refs or None
            if enabled_toolsets is not None:
                updates["enabled_toolsets"] = enabled_toolsets or None
            if workdir is not None:
@@ -473,6 +505,19 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
                "type": "string",
                "description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear."
            },
+            "context_from": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": (
+                    "Optional job ID or list of job IDs whose most recent completed output is "
+                    "injected into the prompt as context before each run. "
+                    "Use this to chain cron jobs: job A collects data, job B processes it. "
+                    "Each entry must be a valid job ID (from cronjob action='list'). "
+                    "Note: injects the most recent completed output — does not wait for "
+                    "upstream jobs running in the same tick. "
+                    "On update, pass an empty array to clear."
+                ),
+            },
            "enabled_toolsets": {
                "type": "array",
                "items": {"type": "string"},
@@ -526,6 +571,7 @@ registry.register(
        base_url=args.get("base_url"),
        reason=args.get("reason"),
        script=args.get("script"),
+        context_from=args.get("context_from"),
        enabled_toolsets=args.get("enabled_toolsets"),
        workdir=args.get("workdir"),
        task_id=kw.get("task_id"),
@@ -33,6 +33,7 @@ from typing import Any, Dict, List, Optional

 from toolsets import TOOLSETS
 from tools import file_state
+from tools.terminal_tool import set_approval_callback as _set_subagent_approval_cb
 from utils import base_url_hostname, is_truthy_value


@@ -47,6 +48,64 @@ DELEGATE_BLOCKED_TOOLS = frozenset(
    ]
 )

+
+# ---------------------------------------------------------------------------
+# Subagent approval callbacks
+# ---------------------------------------------------------------------------
+# Subagents run inside a ThreadPoolExecutor worker. The CLI's interactive
+# approval callback is stored in tools/terminal_tool.py's threading.local(),
+# so worker threads do NOT inherit it. Without a callback,
+# prompt_dangerous_approval() falls back to input() from the worker thread,
+# which deadlocks against the parent's prompt_toolkit TUI that owns stdin.
+#
+# Fix: install a non-interactive callback into every subagent worker thread
+# via ThreadPoolExecutor(initializer=_set_subagent_approval_cb, initargs=(cb,)).
+# The callback is chosen by the `delegation.subagent_auto_approve` config:
+#   false (default) → _subagent_auto_deny (safe; matches leaf tool blocklist)
+#   true            → _subagent_auto_approve (opt-in YOLO for cron/batch)
+# Both emit a logger.warning for audit; gateway sessions are unaffected
+# because they resolve approvals via tools/approval.py's per-session queue,
+# not through these TLS callbacks.
+def _subagent_auto_deny(command: str, description: str, **kwargs) -> str:
+    """Auto-deny dangerous commands in subagent threads (safe default).
+
+    Returns 'deny' so the subagent sees a refusal it can recover from, and
+    never calls input() (which would deadlock the parent TUI).
+    """
+    logger.warning(
+        "Subagent auto-denied dangerous command: %s (%s). "
+        "Set delegation.subagent_auto_approve: true to allow.",
+        command, description,
+    )
+    return "deny"
+
+
+def _subagent_auto_approve(command: str, description: str, **kwargs) -> str:
+    """Auto-approve dangerous commands in subagent threads (opt-in YOLO).
+
+    Only installed when delegation.subagent_auto_approve=true. Returns 'once'
+    so the subagent proceeds without blocking the parent UI.
+    """
+    logger.warning(
+        "Subagent auto-approved dangerous command: %s (%s)",
+        command, description,
+    )
+    return "once"
+
+
+def _get_subagent_approval_callback():
+    """Return the callback to install into subagent worker threads.
+
+    Config key: delegation.subagent_auto_approve (bool, default False).
+    Reads via the same _load_config() path as the rest of delegate_task so
+    priority is config.yaml > (no env override for this knob) > default.
+    """
+    cfg = _load_config()
+    val = cfg.get("subagent_auto_approve", False)
+    if is_truthy_value(val):
+        return _subagent_auto_approve
+    return _subagent_auto_deny
+
 # Build a description fragment listing toolsets available for subagents.
 # Excludes toolsets where ALL tools are blocked, composite/platform toolsets
 # (hermes-* prefixed), and scenario toolsets.
@@ -276,7 +335,14 @@ def _get_max_concurrent_children() -> int:
    val = cfg.get("max_concurrent_children")
    if val is not None:
        try:
-            return max(1, int(val))
+            result = max(1, int(val))
+            if result > 10:
+                logger.warning(
+                    "delegation.max_concurrent_children=%d: each child consumes API tokens "
+                    "independently. High values multiply cost linearly.",
+                    result,
+                )
+            return result
        except (TypeError, ValueError):
            logger.warning(
                "delegation.max_concurrent_children=%r is not a valid integer; "
@@ -1337,7 +1403,15 @@ def _run_single_child(
        # Run child with a hard timeout to prevent indefinite blocking
        # when the child's API call or tool-level HTTP request hangs.
        child_timeout = _get_child_timeout()
-        _timeout_executor = ThreadPoolExecutor(max_workers=1)
+        _timeout_executor = ThreadPoolExecutor(
+            max_workers=1,
+            # Install a non-interactive approval callback in the worker thread
+            # so dangerous-command prompts from the subagent don't fall back to
+            # input() and deadlock the parent's prompt_toolkit TUI.
+            # Callback (deny vs approve) is governed by delegation.subagent_auto_approve.
+            initializer=_set_subagent_approval_cb,
+            initargs=(_get_subagent_approval_callback(),),
+        )
        # Capture the worker thread so the timeout diagnostic can dump its
        # Python stack (see #14726 — 0-API-call hangs are opaque without it).
        _worker_thread_holder: Dict[str, Optional[threading.Thread]] = {"t": None}
@@ -2229,8 +2303,8 @@ DELEGATE_TASK_SCHEMA = {
        "never enter your context window.\n\n"
        "TWO MODES (one of 'goal' or 'tasks' is required):\n"
        "1. Single task: provide 'goal' (+ optional context, toolsets)\n"
-        "2. Batch (parallel): provide 'tasks' array with up to delegation.max_concurrent_children items (default 3). "
-        "All run concurrently and results are returned together.\n\n"
+        "2. Batch (parallel): provide 'tasks' array with up to delegation.max_concurrent_children items (default 3, configurable via config.yaml, no hard ceiling). "
+        "All run concurrently and results are returned together. Nested delegation requires role='orchestrator' and delegation.max_spawn_depth >= 2.\n\n"
        "WHEN TO USE delegate_task:\n"
        "- Reasoning-heavy subtasks (debugging, code review, research synthesis)\n"
        "- Tasks that would flood your context with intermediate data\n"
@@ -473,6 +473,12 @@ _ACTIONS = {
    "remove_role": _remove_role,
 }

+_CORE_ACTION_NAMES = frozenset({"fetch_messages", "search_members", "create_thread"})
+_ADMIN_ACTION_NAMES = frozenset(_ACTIONS.keys()) - _CORE_ACTION_NAMES
+
+_CORE_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _CORE_ACTION_NAMES}
+_ADMIN_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _ADMIN_ACTION_NAMES}
+
 # Single-source-of-truth manifest: action → (signature, one-line description).
 # Consumed by :func:`_build_schema` so the schema's top-level description
 # always matches the registered action set.
@@ -531,7 +537,7 @@ def _load_allowed_actions_config() -> Optional[List[str]]:
        from hermes_cli.config import load_config
        cfg = load_config()
    except Exception as exc:
-        logger.debug("discord_server: could not load config (%s); allowing all actions.", exc)
+        logger.debug("discord: could not load config (%s); allowing all actions.", exc)
        return None

    raw = (cfg.get("discord") or {}).get("server_actions")
@@ -586,12 +592,16 @@ def _available_actions(
 def _build_schema(
    actions: List[str],
    caps: Optional[Dict[str, Any]] = None,
-) -> Dict[str, Any]:
-    """Build the tool schema for the given filtered action list."""
+    tool_name: str = "discord",
+) -> Optional[Dict[str, Any]]:
+    """Build the tool schema for the given filtered action list.
+
+    Returns ``None`` when *actions* is empty — callers should drop the
+    tool from registration in that case.
+    """
    caps = caps or {}
    if not actions:
-        # Tool shouldn't be registered when empty, but guard anyway.
-        actions = list(_ACTIONS.keys())
+        return None

    # Action manifest lines (action-first, parameter-scoped).
    manifest_lines = [
@@ -602,24 +612,36 @@ def _build_schema(
    manifest_block = "\n".join(manifest_lines)

    content_note = ""
-    if caps.get("detected") and caps.get("has_message_content") is False:
+    affected_actions = {"fetch_messages", "list_pins"} & set(actions)
+    if affected_actions and caps.get("detected") and caps.get("has_message_content") is False:
+        names = " and ".join(sorted(affected_actions))
        content_note = (
-            "\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
-            "fetch_messages and list_pins will return message metadata (author, "
+            f"\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
+            f"{names} will return message metadata (author, "
            "timestamps, attachments, reactions, pin state) but `content` will be "
            "empty for messages not sent as a direct mention to the bot or in DMs. "
            "Enable the intent in the Discord Developer Portal to see all content."
        )

-    description = (
-        "Query and manage a Discord server via the REST API.\n\n"
-        "Available actions:\n"
-        f"{manifest_block}\n\n"
-        "Call list_guilds first to discover guild_ids, then list_channels for "
-        "channel_ids. Runtime errors will tell you if the bot lacks a specific "
-        "per-guild permission (e.g. MANAGE_ROLES for add_role)."
-        f"{content_note}"
-    )
+    if tool_name == "discord_admin":
+        description = (
+            "Manage a Discord server via the REST API.\n\n"
+            "Available actions:\n"
+            f"{manifest_block}\n\n"
+            "Call list_guilds first to discover guild_ids, then list_channels for "
+            "channel_ids. Runtime errors will tell you if the bot lacks a specific "
+            "per-guild permission (e.g. MANAGE_ROLES for add_role)."
+            f"{content_note}"
+        )
+    else:
+        description = (
+            "Read and participate in a Discord server.\n\n"
+            "Available actions:\n"
+            f"{manifest_block}\n\n"
+            "Use the channel_id from the current conversation context. "
+            "Use search_members to look up user IDs by name prefix."
+            f"{content_note}"
+        )

    properties: Dict[str, Any] = {
        "action": {
@@ -676,7 +698,7 @@ def _build_schema(
    }

    return {
-        "name": "discord_server",
+        "name": tool_name,
        "description": description,
        "parameters": {
            "type": "object",
@@ -686,28 +708,33 @@ def _build_schema(
    }


-def get_dynamic_schema() -> Optional[Dict[str, Any]]:
-    """Return a schema filtered by current intents + config allowlist.
-
-    Called by ``model_tools.get_tool_definitions`` as a post-processing
-    step so the schema the model sees always reflects reality. Returns
-    ``None`` when no actions are available (tool should be removed from
-    the schema list entirely).
-    """
+def _get_dynamic_schema(
+    action_subset: Dict[str, Any],
+    tool_name: str,
+) -> Optional[Dict[str, Any]]:
+    """Build a dynamic schema for *action_subset* filtered by intents + config."""
    token = _get_bot_token()
    if not token:
        return None
-
    caps = _detect_capabilities(token)
    allowlist = _load_allowed_actions_config()
-    actions = _available_actions(caps, allowlist)
+    actions = [a for a in _available_actions(caps, allowlist) if a in action_subset]
    if not actions:
-        logger.warning(
-            "discord_server: config allowlist/intents left zero available actions; "
-            "hiding tool from this session."
-        )
        return None
-    return _build_schema(actions, caps)
+    return _build_schema(actions, caps, tool_name=tool_name)
+
+
+def get_dynamic_schema_core() -> Optional[Dict[str, Any]]:
+    return _get_dynamic_schema(_CORE_ACTIONS, "discord")
+
+
+def get_dynamic_schema_admin() -> Optional[Dict[str, Any]]:
+    return _get_dynamic_schema(_ADMIN_ACTIONS, "discord_admin")
+
+
+def get_dynamic_schema() -> Optional[Dict[str, Any]]:
+    """Backward-compat wrapper — returns core schema."""
+    return get_dynamic_schema_core()


 # ---------------------------------------------------------------------------
@@ -774,11 +801,13 @@ def check_discord_tool_requirements() -> bool:


 # ---------------------------------------------------------------------------
-# Main handler
+# Handlers
 # ---------------------------------------------------------------------------

-def discord_server(
+def _run_discord_action(
    action: str,
+    valid_actions: Dict[str, Any],
+    tool_label: str,
    guild_id: str = "",
    channel_id: str = "",
    user_id: str = "",
@@ -790,18 +819,17 @@ def discord_server(
    before: str = "",
    after: str = "",
    auto_archive_duration: int = 1440,
-    task_id: str = None,
 ) -> str:
-    """Execute a Discord server action."""
+    """Shared handler logic for both discord tools."""
    token = _get_bot_token()
    if not token:
        return json.dumps({"error": "DISCORD_BOT_TOKEN not configured."})

-    action_fn = _ACTIONS.get(action)
+    action_fn = valid_actions.get(action)
    if not action_fn:
        return json.dumps({
            "error": f"Unknown action: {action}",
-            "available_actions": list(_ACTIONS.keys()),
+            "available_actions": list(valid_actions.keys()),
        })

    # Config-level allowlist gate (defense in depth — schema already filtered,
@@ -848,44 +876,64 @@ def discord_server(
            auto_archive_duration=auto_archive_duration,
        )
    except DiscordAPIError as e:
-        logger.warning("Discord API error in action '%s': %s", action, e)
+        logger.warning("Discord API error in %s action '%s': %s", tool_label, action, e)
        if e.status == 403:
            return json.dumps({"error": _enrich_403(action, e.body)})
        return json.dumps({"error": str(e)})
    except Exception as e:
-        logger.exception("Unexpected error in discord_server action '%s'", action)
+        logger.exception("Unexpected error in %s action '%s'", tool_label, action)
        return json.dumps({"error": f"Unexpected error: {e}"})


+def discord_core(action: str, **kwargs) -> str:
+    """Execute a core Discord action (fetch_messages, search_members, create_thread)."""
+    return _run_discord_action(action, _CORE_ACTIONS, "discord", **kwargs)
+
+
+def discord_admin_handler(action: str, **kwargs) -> str:
+    """Execute a Discord admin action (server management)."""
+    return _run_discord_action(action, _ADMIN_ACTIONS, "discord_admin", **kwargs)
+
+
 # ---------------------------------------------------------------------------
 # Tool registration
 # ---------------------------------------------------------------------------

-# Register with the full unfiltered schema. ``model_tools.get_tool_definitions``
-# rebuilds this per-session via ``get_dynamic_schema`` so the model only ever
-# sees intent-available, config-allowed actions. The static registration is a
-# safe baseline for tools that inspect the registry directly.
-_STATIC_SCHEMA = _build_schema(list(_ACTIONS.keys()), caps={"detected": False})
+_HANDLER_DEFAULTS = {
+    "action": "", "guild_id": "", "channel_id": "", "user_id": "",
+    "role_id": "", "message_id": "", "query": "", "name": "",
+    "limit": 50, "before": "", "after": "", "auto_archive_duration": 1440,
+}
+
+
+def _make_handler(handler_fn):
+    """Create a registry-compatible handler lambda for a discord handler."""
+    return lambda args, **kw: handler_fn(
+        **{k: args.get(k, v) for k, v in _HANDLER_DEFAULTS.items()},
+    )
+
+
+_STATIC_CORE_SCHEMA = _build_schema(
+    list(_CORE_ACTIONS.keys()), caps={"detected": False}, tool_name="discord",
+)
+_STATIC_ADMIN_SCHEMA = _build_schema(
+    list(_ADMIN_ACTIONS.keys()), caps={"detected": False}, tool_name="discord_admin",
+)

 registry.register(
-    name="discord_server",
+    name="discord",
    toolset="discord",
-    schema=_STATIC_SCHEMA,
-    handler=lambda args, **kw: discord_server(
-        action=args.get("action", ""),
-        guild_id=args.get("guild_id", ""),
-        channel_id=args.get("channel_id", ""),
-        user_id=args.get("user_id", ""),
-        role_id=args.get("role_id", ""),
-        message_id=args.get("message_id", ""),
-        query=args.get("query", ""),
-        name=args.get("name", ""),
-        limit=args.get("limit", 50),
-        before=args.get("before", ""),
-        after=args.get("after", ""),
-        auto_archive_duration=args.get("auto_archive_duration", 1440),
-        task_id=kw.get("task_id"),
-    ),
+    schema=_STATIC_CORE_SCHEMA,
+    handler=_make_handler(discord_core),
+    check_fn=check_discord_tool_requirements,
+    requires_env=["DISCORD_BOT_TOKEN"],
+)
+
+registry.register(
+    name="discord_admin",
+    toolset="discord_admin",
+    schema=_STATIC_ADMIN_SCHEMA,
+    handler=_make_handler(discord_admin_handler),
    check_fn=check_discord_tool_requirements,
    requires_env=["DISCORD_BOT_TOKEN"],
 )
@@ -58,10 +58,20 @@ MAX_OUTPUT_CHARS = 200_000      # 200KB rolling output buffer
 FINISHED_TTL_SECONDS = 1800     # Keep finished processes for 30 minutes
 MAX_PROCESSES = 64              # Max concurrent tracked processes (LRU pruning)

-# Watch pattern rate limiting
-WATCH_MAX_PER_WINDOW = 8        # Max notifications delivered per window
-WATCH_WINDOW_SECONDS = 10       # Rolling window length
-WATCH_OVERLOAD_KILL_SECONDS = 45  # Sustained overload duration before disabling watch
+# Watch pattern rate limiting — PER SESSION.
+# Hard rule: at most ONE watch-match notification every WATCH_MIN_INTERVAL_SECONDS.
+# Any match arriving inside that cooldown window is dropped and counted as a strike.
+# After WATCH_STRIKE_LIMIT consecutive strike windows, watch_patterns for that
+# session is permanently disabled and the session falls back to notify_on_complete
+# semantics (one notification when the process actually exits).
+WATCH_MIN_INTERVAL_SECONDS = 15   # Minimum spacing between consecutive watch matches
+WATCH_STRIKE_LIMIT = 3            # Strikes in a row → disable watch + promote to notify_on_complete
+
+# Global circuit breaker — across all sessions. Secondary safety net so concurrent
+# siblings can't collectively flood the user even when each is under its own cap.
+WATCH_GLOBAL_MAX_PER_WINDOW = 15
+WATCH_GLOBAL_WINDOW_SECONDS = 10
+WATCH_GLOBAL_COOLDOWN_SECONDS = 30


 def format_uptime_short(seconds: int) -> str:
@@ -105,10 +115,18 @@ class ProcessSession:
    watch_patterns: List[str] = field(default_factory=list)
    _watch_hits: int = field(default=0, repr=False)          # total matches delivered
    _watch_suppressed: int = field(default=0, repr=False)    # matches dropped by rate limit
-    _watch_overload_since: float = field(default=0.0, repr=False)  # when sustained overload began
-    _watch_disabled: bool = field(default=False, repr=False) # permanently killed by overload
-    _watch_window_hits: int = field(default=0, repr=False)   # hits in current rate window
-    _watch_window_start: float = field(default=0.0, repr=False)
+    _watch_disabled: bool = field(default=False, repr=False) # permanently killed after strike limit
+    # Per-session rate limit state: at most one match every WATCH_MIN_INTERVAL_SECONDS.
+    # When an emission happens, _watch_cooldown_until is set to now + interval and
+    # _watch_strike_candidate becomes True. The next match to arrive before that
+    # deadline counts as one strike (regardless of how many matches were dropped in
+    # between — a strike is a window, not a match). After WATCH_STRIKE_LIMIT strikes
+    # in a row, watch_patterns is disabled and the session promotes to
+    # notify_on_complete.
+    _watch_last_emit_at: float = field(default=0.0, repr=False)
+    _watch_cooldown_until: float = field(default=0.0, repr=False)
+    _watch_strike_candidate: bool = field(default=False, repr=False)
+    _watch_consecutive_strikes: int = field(default=0, repr=False)
    _lock: threading.Lock = field(default_factory=threading.Lock)
    _reader_thread: Optional[threading.Thread] = field(default=None, repr=False)
    _pty: Any = field(default=None, repr=False)  # ptyprocess handle (when use_pty=True)
@@ -151,6 +169,15 @@ class ProcessRegistry:
        # via wait/poll/log.  Drain loops skip notifications for these.
        self._completion_consumed: set = set()

+        # Global watch-match circuit breaker — across all sessions.
+        # Prevents sibling processes from collectively flooding the user even
+        # when each stays under its own per-session cap.
+        self._global_watch_lock = threading.Lock()
+        self._global_watch_window_start: float = 0.0
+        self._global_watch_window_hits: int = 0
+        self._global_watch_tripped_until: float = 0.0
+        self._global_watch_suppressed_during_trip: int = 0
+
    @staticmethod
    def _clean_shell_noise(text: str) -> str:
        """Strip shell startup warnings from the beginning of output."""
@@ -163,12 +190,23 @@ class ProcessRegistry:
        """Scan new output for watch patterns and queue notifications.

        Called from reader threads with new_text being the freshly-read chunk.
-        Rate-limited: max WATCH_MAX_PER_WINDOW notifications per WATCH_WINDOW_SECONDS.
-        If sustained overload exceeds WATCH_OVERLOAD_KILL_SECONDS, watching is
-        disabled permanently for this process.
+
+        Per-session rate limit: at most ONE watch-match notification per
+        WATCH_MIN_INTERVAL_SECONDS. Any match arriving inside the cooldown
+        window is dropped and counts as ONE strike for that window. After
+        WATCH_STRIKE_LIMIT consecutive strike windows, watch_patterns is
+        disabled for this session and the session is promoted to
+        notify_on_complete semantics — one notification when the process
+        actually exits, no more mid-process spam.
        """
        if not session.watch_patterns or session._watch_disabled:
            return
+        # Suppress-after-exit: once the reader loop has declared the process
+        # exited, any late chunk we still see is post-exit noise. Dropping these
+        # prevents the "stale notifications delivered minutes after the process
+        # ended" spam when completion_queue consumers run async.
+        if session.exited:
+            return

        # Scan new text line-by-line for pattern matches
        matched_lines = []
@@ -185,55 +223,80 @@ class ProcessRegistry:
            return

        now = time.time()
+        should_disable = False
        with session._lock:
-            # Reset window if it's expired
-            if now - session._watch_window_start >= WATCH_WINDOW_SECONDS:
-                session._watch_window_hits = 0
-                session._watch_window_start = now
-
-            # Check rate limit
-            if session._watch_window_hits >= WATCH_MAX_PER_WINDOW:
+            # Case 1: still inside the cooldown from the last emission.
+            # Count this as a strike for the current window (only once per window)
+            # and drop the event. If we've hit the strike limit, disable watch
+            # and promote to notify_on_complete.
+            if session._watch_cooldown_until and now < session._watch_cooldown_until:
                session._watch_suppressed += len(matched_lines)
+                if not session._watch_strike_candidate:
+                    # First drop in this window — count one strike.
+                    session._watch_strike_candidate = True
+                    session._watch_consecutive_strikes += 1
+                    if session._watch_consecutive_strikes >= WATCH_STRIKE_LIMIT:
+                        session._watch_disabled = True
+                        # Promote to notify_on_complete so the agent still gets
+                        # exactly one notification when the process actually ends.
+                        session.notify_on_complete = True
+                        should_disable = True
+                return_early = True
+            else:
+                # Case 2: cooldown has expired.
+                # Decide whether this window was a "clean" one (no drops) or a
+                # strike window. If no strike candidate was set during the prior
+                # cooldown, reset the consecutive-strike counter — we're back to
+                # healthy emission cadence.
+                if (
+                    session._watch_cooldown_until
+                    and not session._watch_strike_candidate
+                ):
+                    session._watch_consecutive_strikes = 0
+                session._watch_strike_candidate = False

-                # Track sustained overload for kill switch
-                if session._watch_overload_since == 0.0:
-                    session._watch_overload_since = now
-                elif now - session._watch_overload_since > WATCH_OVERLOAD_KILL_SECONDS:
-                    session._watch_disabled = True
-                    self.completion_queue.put({
-                        "session_id": session.id,
-                        "session_key": session.session_key,
-                        "command": session.command,
-                        "type": "watch_disabled",
-                        "suppressed": session._watch_suppressed,
-                        "platform": session.watcher_platform,
-                        "chat_id": session.watcher_chat_id,
-                        "user_id": session.watcher_user_id,
-                        "user_name": session.watcher_user_name,
-                        "thread_id": session.watcher_thread_id,
-                        "message": (
-                            f"Watch patterns disabled for process {session.id} — "
-                            f"too many matches ({session._watch_suppressed} suppressed). "
-                            f"Use process(action='poll') to check output manually."
-                        ),
-                    })
-                return
+                # Emit the notification and start a new cooldown window.
+                session._watch_last_emit_at = now
+                session._watch_cooldown_until = now + WATCH_MIN_INTERVAL_SECONDS
+                session._watch_hits += 1
+                suppressed = session._watch_suppressed
+                session._watch_suppressed = 0
+                return_early = False

-            # Under the rate limit — deliver notification
-            session._watch_window_hits += 1
-            session._watch_hits += 1
-            # Clear overload tracker since we got a delivery through
-            session._watch_overload_since = 0.0
-
-            # Include suppressed count if any events were dropped
-            suppressed = session._watch_suppressed
-            session._watch_suppressed = 0
+        if return_early:
+            if should_disable:
+                # Emit exactly one "watch disabled, falling back to notify_on_complete"
+                # summary event so the agent/user sees why things went quiet.
+                self.completion_queue.put({
+                    "session_id": session.id,
+                    "session_key": session.session_key,
+                    "command": session.command,
+                    "type": "watch_disabled",
+                    "suppressed": session._watch_suppressed,
+                    "platform": session.watcher_platform,
+                    "chat_id": session.watcher_chat_id,
+                    "user_id": session.watcher_user_id,
+                    "user_name": session.watcher_user_name,
+                    "thread_id": session.watcher_thread_id,
+                    "message": (
+                        f"Watch patterns disabled for process {session.id} — "
+                        f"{WATCH_STRIKE_LIMIT} consecutive rate-limit windows triggered "
+                        f"(min spacing {WATCH_MIN_INTERVAL_SECONDS}s). "
+                        f"Falling back to notify_on_complete semantics; you'll get "
+                        f"exactly one notification when the process exits."
+                    ),
+                })
+            return

        # Trim matched output to a reasonable size
        output = "\n".join(matched_lines[:20])
        if len(output) > 2000:
            output = output[:2000] + "\n...(truncated)"

+        # Global circuit breaker — across all sessions (secondary safety net).
+        if not self._global_watch_admit(now):
+            return
+
        self.completion_queue.put({
            "session_id": session.id,
            "session_key": session.session_key,
@@ -249,6 +312,93 @@ class ProcessRegistry:
            "thread_id": session.watcher_thread_id,
        })

+    def _global_watch_admit(self, now: float) -> bool:
+        """Return True if this watch_match event is allowed through the global breaker.
+
+        Semantics:
+        - If we're currently in a cooldown period, drop the event and count it.
+        - Otherwise, slide the rolling window and check the global cap.
+        - If the cap is exceeded, trip the breaker for WATCH_GLOBAL_COOLDOWN_SECONDS
+          and emit ONE summary event so the agent/user sees "N notifications were
+          suppressed" instead of getting them individually.
+        - When the cooldown ends, emit a release summary and reset counters.
+        """
+        with self._global_watch_lock:
+            # Handle cooldown expiry first so we can emit the release summary.
+            if self._global_watch_tripped_until and now >= self._global_watch_tripped_until:
+                suppressed = self._global_watch_suppressed_during_trip
+                self._global_watch_tripped_until = 0.0
+                self._global_watch_suppressed_during_trip = 0
+                self._global_watch_window_start = now
+                self._global_watch_window_hits = 0
+                if suppressed > 0:
+                    # Queue a summary event outside the lock (below).
+                    release_msg = {
+                        "session_id": "",
+                        "session_key": "",
+                        "command": "",
+                        "type": "watch_overflow_released",
+                        "suppressed": suppressed,
+                        "message": (
+                            f"Watch-pattern notifications resumed. "
+                            f"{suppressed} match event(s) were suppressed during the flood."
+                        ),
+                        "platform": "",
+                        "chat_id": "",
+                        "user_id": "",
+                        "user_name": "",
+                        "thread_id": "",
+                    }
+                else:
+                    release_msg = None
+            else:
+                release_msg = None
+
+            # Still in cooldown — drop and count.
+            if self._global_watch_tripped_until and now < self._global_watch_tripped_until:
+                self._global_watch_suppressed_during_trip += 1
+                admit = False
+                trip_now = None
+            else:
+                # Slide the window.
+                if now - self._global_watch_window_start >= WATCH_GLOBAL_WINDOW_SECONDS:
+                    self._global_watch_window_start = now
+                    self._global_watch_window_hits = 0
+
+                if self._global_watch_window_hits >= WATCH_GLOBAL_MAX_PER_WINDOW:
+                    # Trip the breaker.
+                    self._global_watch_tripped_until = now + WATCH_GLOBAL_COOLDOWN_SECONDS
+                    self._global_watch_suppressed_during_trip += 1
+                    trip_now = now
+                    admit = False
+                else:
+                    self._global_watch_window_hits += 1
+                    trip_now = None
+                    admit = True
+
+        # Queue summary events outside the lock.
+        if release_msg is not None:
+            self.completion_queue.put(release_msg)
+        if trip_now is not None:
+            self.completion_queue.put({
+                "session_id": "",
+                "session_key": "",
+                "command": "",
+                "type": "watch_overflow_tripped",
+                "message": (
+                    f"Watch-pattern overflow: >{WATCH_GLOBAL_MAX_PER_WINDOW} "
+                    f"notifications in {WATCH_GLOBAL_WINDOW_SECONDS}s across all processes. "
+                    f"Suppressing further watch_match events for "
+                    f"{WATCH_GLOBAL_COOLDOWN_SECONDS}s."
+                ),
+                "platform": "",
+                "chat_id": "",
+                "user_id": "",
+                "user_name": "",
+                "thread_id": "",
+            })
+        return admit
+
    @staticmethod
    def _is_host_pid_alive(pid: Optional[int]) -> bool:
        """Best-effort liveness check for host-visible PIDs."""
@@ -1388,6 +1388,33 @@ def _foreground_background_guidance(command: str) -> str | None:
    return None


+def _resolve_notification_flag_conflict(
+    *,
+    notify_on_complete: bool,
+    watch_patterns,
+    background: bool,
+) -> tuple:
+    """Decide what to do when both notify_on_complete and watch_patterns are set.
+
+    These flags produce duplicate, delayed notifications when combined — one
+    notification per watch-pattern match AND one on process exit, with async
+    delivery that can spam the user long after the process ends. When both are
+    set, we drop watch_patterns in favor of notify_on_complete (the more useful
+    "let me know when it's done" signal) and return a human-readable note.
+
+    Returns:
+        (watch_patterns_to_use, conflict_note). conflict_note is "" when there
+        is no conflict.
+    """
+    if background and notify_on_complete and watch_patterns:
+        note = (
+            "watch_patterns ignored because notify_on_complete=True; "
+            "these two flags produce duplicate notifications when combined"
+        )
+        return None, note
+    return watch_patterns, ""
+
+
 def terminal_tool(
    command: str,
    background: bool = False,
@@ -1410,8 +1437,8 @@ def terminal_tool(
        force: If True, skip dangerous command check (use after user confirms)
        workdir: Working directory for this command (optional, uses session cwd if not set)
        pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
-        notify_on_complete: If True and background=True, auto-notify the agent when the process exits
-        watch_patterns: List of strings to watch for in background output; fires a notification on first match per pattern. Use ONLY for mid-process signals (errors, readiness markers) that appear before exit. For end-of-run markers use notify_on_complete instead — stacking both produces duplicate, delayed notifications.
+        notify_on_complete: If True and background=True, you'll be notified exactly once when the process exits. The right choice for almost every long task. MUTUALLY EXCLUSIVE with watch_patterns.
+        watch_patterns: List of strings to watch for in background output. HARD rate limit: 1 notification per 15s per process. After 3 strike windows in a row, watch_patterns is disabled and the session is auto-promoted to notify_on_complete. Use ONLY for rare, one-shot mid-process signals on long-lived processes (server readiness, migration-done markers). NEVER use in loops/batch jobs — error patterns there will hit the strike limit and get disabled. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both.

    Returns:
        str: JSON string with output, exit_code, and error fields
@@ -1701,6 +1728,22 @@ def terminal_tool(
                        proc_session.watcher_user_name = _gw_user_name
                        proc_session.watcher_thread_id = _gw_thread_id

+                # Mutual exclusion: if both notify_on_complete and watch_patterns
+                # are set, drop watch_patterns. The combination produces duplicate
+                # notifications (one per match + one on exit) that deliver
+                # asynchronously and can spam the user long after the process ends.
+                # notify_on_complete is the more useful signal for "let me know
+                # when the task finishes"; watch_patterns should be reserved for
+                # standalone mid-process signals on long-lived processes.
+                watch_patterns, conflict_note = _resolve_notification_flag_conflict(
+                    notify_on_complete=bool(notify_on_complete),
+                    watch_patterns=watch_patterns,
+                    background=bool(background),
+                )
+                if conflict_note:
+                    logger.warning("background proc %s: %s", proc_session.id, conflict_note)
+                    result_data["watch_patterns_ignored"] = conflict_note
+
                # Mark for agent notification on completion
                if notify_on_complete and background:
                    proc_session.notify_on_complete = True
@@ -2039,13 +2082,13 @@ TERMINAL_SCHEMA = {
            },
            "notify_on_complete": {
                "type": "boolean",
-                "description": "When true (and background=true), you'll be automatically notified when the process finishes — no polling needed. Use this for tasks that take a while (tests, builds, deployments) so you can keep working on other things in the meantime.",
+                "description": "When true (and background=true), you'll be automatically notified exactly once when the process finishes. **This is the right choice for almost every long-running task** — tests, builds, deployments, multi-item batch jobs, anything that takes over a minute and has a defined end. Use this and keep working on other things; the system notifies you on exit. MUTUALLY EXCLUSIVE with watch_patterns — when both are set, watch_patterns is dropped.",
                "default": False
            },
            "watch_patterns": {
                "type": "array",
                "items": {"type": "string"},
-                "description": "Strings to watch for in background process output. Fires a notification the first time each pattern matches a line of output. **Use ONLY for mid-process signals** you want to react to before the process exits — errors, readiness markers, intermediate step markers (e.g. [\"ERROR\", \"Traceback\", \"listening on port\"]). Do NOT use for end-of-run markers (summary headers, 'DONE', 'PASS' printed right before exit) — use `notify_on_complete` for that instead. Stacking end-of-run patterns on top of `notify_on_complete` produces duplicate, delayed notifications that arrive after you've already moved on, since delivery is asynchronous and continues after the process exits."
+                "description": "Strings to watch for in background process output. HARD RATE LIMIT: at most 1 notification per 15 seconds per process — matches arriving inside the cooldown are dropped. After 3 consecutive 15-second windows with dropped matches, watch_patterns is automatically disabled for that process and promoted to notify_on_complete behavior (one notification on exit, no more mid-process spam). USE ONLY for truly rare, one-shot mid-process signals on LONG-LIVED processes that will never exit on their own — e.g. ['Application startup complete'] on a server so you know when to hit its endpoint, or ['migration done'] on a daemon. DO NOT use for: (1) end-of-run markers like 'DONE'/'PASS' — use notify_on_complete instead; (2) error patterns like 'ERROR'/'Traceback' in loops or multi-item batch jobs — they fire on every iteration and you'll hit the strike limit fast; (3) anything you'd ever combine with notify_on_complete. When in doubt, choose notify_on_complete. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both."
            }
        },
        "required": ["command"]
@@ -202,6 +202,18 @@ TOOLSETS = {
        "includes": []
    },

+    "discord": {
+        "description": "Discord read and participate tools (fetch messages, search members, create threads)",
+        "tools": ["discord"],
+        "includes": [],
+    },
+
+    "discord_admin": {
+        "description": "Discord server management (list channels/roles, pin messages, assign roles)",
+        "tools": ["discord_admin"],
+        "includes": [],
+    },
+
    "feishu_doc": {
        "description": "Read Feishu/Lark document content",
        "tools": ["feishu_doc_read"],
@@ -326,8 +338,8 @@ TOOLSETS = {
    "hermes-discord": {
        "description": "Discord bot toolset - full access (terminal has safety checks via dangerous command approval)",
        "tools": _HERMES_CORE_TOOLS + [
-            # Discord server introspection & management (gated on DISCORD_BOT_TOKEN via check_fn)
-            "discord_server",
+            "discord",
+            "discord_admin",
        ],
        "includes": []
    },
@@ -388,7 +400,13 @@ TOOLSETS = {

    "hermes-feishu": {
        "description": "Feishu/Lark bot toolset - enterprise messaging via Feishu/Lark (full access)",
-        "tools": _HERMES_CORE_TOOLS,
+        "tools": _HERMES_CORE_TOOLS + [
+            "feishu_doc_read",
+            "feishu_drive_list_comments",
+            "feishu_drive_list_comment_replies",
+            "feishu_drive_reply_comment",
+            "feishu_drive_add_comment",
+        ],
        "includes": []
    },

@@ -560,17 +560,55 @@ def resolve_skin() -> dict:


 def _resolve_model() -> str:
-    env = os.environ.get("HERMES_MODEL", "")
+    env = (
+        os.environ.get("HERMES_MODEL", "")
+        or os.environ.get("HERMES_INFERENCE_MODEL", "")
+    ).strip()
    if env:
        return env
    m = _load_cfg().get("model", "")
    if isinstance(m, dict):
-        return m.get("default", "")
+        return str(m.get("default", "") or "").strip()
    if isinstance(m, str) and m:
-        return m
+        return m.strip()
    return "anthropic/claude-sonnet-4"


+def _resolve_startup_runtime() -> tuple[str, str | None]:
+    model = _resolve_model()
+    explicit_provider = os.environ.get("HERMES_TUI_PROVIDER", "").strip()
+    if explicit_provider:
+        return model, explicit_provider
+
+    explicit_model = (
+        os.environ.get("HERMES_MODEL", "")
+        or os.environ.get("HERMES_INFERENCE_MODEL", "")
+    ).strip()
+    if not explicit_model:
+        return model, None
+
+    try:
+        from hermes_cli.models import detect_static_provider_for_model
+
+        cfg = _load_cfg().get("model") or {}
+        current_provider = (
+            (
+                str(cfg.get("provider") or "").strip().lower()
+                if isinstance(cfg, dict)
+                else ""
+            )
+            or os.environ.get("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+            or "auto"
+        )
+        detected = detect_static_provider_for_model(explicit_model, current_provider)
+        if detected:
+            provider, detected_model = detected
+            return detected_model, provider
+    except Exception:
+        pass
+    return model, None
+
+
 def _write_config_key(key_path: str, value):
    cfg = _load_cfg()
    current = cfg
@@ -736,12 +774,15 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        _emit("session.info", sid, _session_info(agent))

    os.environ["HERMES_MODEL"] = result.new_model
+    os.environ["HERMES_INFERENCE_MODEL"] = result.new_model
    # Keep the process-level provider env var in sync with the user's explicit
    # choice so any ambient re-resolution (credential pool refresh, compressor
    # rebuild, aux clients) resolves to the new provider instead of the
    # original one persisted in config or env.
    if result.target_provider:
        os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
+        if os.environ.get("HERMES_TUI_PROVIDER"):
+            os.environ["HERMES_TUI_PROVIDER"] = result.target_provider
    if persist_global:
        _persist_model_switch(result)
    return {"value": result.new_model, "warning": result.warning_message or ""}
@@ -1277,9 +1318,13 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):

    cfg = _load_cfg()
    system_prompt = ((cfg.get("agent") or {}).get("system_prompt", "") or "").strip()
-    runtime = resolve_runtime_provider(requested=None)
+    model, requested_provider = _resolve_startup_runtime()
+    runtime = resolve_runtime_provider(
+        requested=requested_provider,
+        target_model=model or None,
+    )
    return AIAgent(
-        model=_resolve_model(),
+        model=model,
        provider=runtime.get("provider"),
        base_url=runtime.get("base_url"),
        api_key=runtime.get("api_key"),
@@ -2789,6 +2834,23 @@ def _(rid, params: dict) -> dict:
        _write_config_key("display.tui_statusbar", nv)
        return _ok(rid, {"key": key, "value": nv})

+    if key == "mouse":
+        raw = str(value or "").strip().lower()
+        display = _load_cfg().get("display") if isinstance(_load_cfg().get("display"), dict) else {}
+        current = bool(display.get("tui_mouse", True))
+
+        if raw in ("", "toggle"):
+            nv = not current
+        elif raw == "on":
+            nv = True
+        elif raw == "off":
+            nv = False
+        else:
+            return _err(rid, 4002, f"unknown mouse value: {value}")
+
+        _write_config_key("display.tui_mouse", nv)
+        return _ok(rid, {"key": key, "value": "on" if nv else "off"})
+
    if key in ("prompt", "personality", "skin"):
        try:
            cfg = _load_cfg()
@@ -2917,6 +2979,10 @@ def _(rid, params: dict) -> dict:
            display.get("tui_statusbar", "top") if isinstance(display, dict) else "top"
        )
        return _ok(rid, {"value": _coerce_statusbar(raw)})
+    if key == "mouse":
+        display = _load_cfg().get("display")
+        on = display.get("tui_mouse", True) if isinstance(display, dict) else True
+        return _ok(rid, {"value": "on" if on else "off"})
    if key == "mtime":
        cfg_path = _hermes_home / "config.yaml"
        try:
@@ -53,7 +53,11 @@ export function AlternateScreen(t0: Props) {
      }

      writeRaw(
-        ENTER_ALT_SCREEN + ERASE_SCROLLBACK + ERASE_SCREEN + CURSOR_HOME + (mouseTracking ? ENABLE_MOUSE_TRACKING : '')
+        ENTER_ALT_SCREEN +
+          ERASE_SCROLLBACK +
+          ERASE_SCREEN +
+          CURSOR_HOME +
+          (mouseTracking ? ENABLE_MOUSE_TRACKING : DISABLE_MOUSE_TRACKING)
      )
      ink?.setAltScreenActive(true, mouseTracking)

@@ -323,27 +323,39 @@ const measureTextNode = function (
  widthMode: LayoutMeasureMode
 ): { width: number; height: number } {
  const elem = node.nodeName !== '#text' ? (node as DOMElement) : node.parentNode
+
  if (elem && elem.nodeName === 'ink-text') {
    let cache = elem._textMeasureCache
+
    if (!cache) {
      cache = { gen: 0, entries: new Map() }
      elem._textMeasureCache = cache
    }
+
    const key = `${width}|${widthMode}`
    const hit = cache.entries.get(key)
+
    if (hit && hit._gen === cache.gen) {
      return hit.result
    }
+
    const result = computeTextMeasure(node, width, widthMode)
+
    // Enforce cap with FIFO eviction to avoid unbounded growth during
    // pathological frames where yoga probes many widths.
    if (cache.entries.size >= MEASURE_CACHE_CAP) {
      const firstKey = cache.entries.keys().next().value
-      cache.entries.delete(firstKey)
+
+      if (firstKey !== undefined) {
+        cache.entries.delete(firstKey)
+      }
    }
+
    cache.entries.set(key, { _gen: cache.gen, result })
+
    return result
  }
+
  return computeTextMeasure(node, width, widthMode)
 }

@@ -475,6 +487,7 @@ export const clearYogaNodeReferences = (node: DOMElement | TextNode): void => {
    for (const child of node.childNodes) {
      clearYogaNodeReferences(child)
    }
+
    node._textMeasureCache = undefined
  }

@@ -1121,6 +1121,23 @@ export default class Ink {
      this.repaint()
    }
  }
+
+  /**
+   * Toggle mouse tracking at runtime while the alt screen is active.
+   * Writes the appropriate DEC reset/set sequences so the terminal
+   * (and ConPTY on Windows WSL2) reflects the change immediately.
+   */
+  setAltScreenMouseTracking(enabled: boolean): void {
+    if (this.altScreenMouseTracking === enabled) {
+      return
+    }
+
+    this.altScreenMouseTracking = enabled
+
+    if (this.altScreenActive) {
+      this.options.stdout.write(enabled ? ENABLE_MOUSE_TRACKING : DISABLE_MOUSE_TRACKING)
+    }
+  }
  get isAltScreenActive(): boolean {
    return this.altScreenActive
  }
@@ -1,6 +1,6 @@
-import { type AnsiCode, ansiCodesToString, diffAnsiCodes } from '@alcalzone/ansi-tokenize'
+import { ansiCodesToString, diffAnsiCodes, type AnsiCode } from '@alcalzone/ansi-tokenize'

-import { type Point, type Rectangle, type Size, unionRect } from './layout/geometry.js'
+import { unionRect, type Point, type Rectangle, type Size } from './layout/geometry.js'
 import { BEL, ESC, SEP } from './termio/ansi.js'
 import * as warn from './warn.js'

@@ -436,6 +436,13 @@ export type Screen = Size & {
   */
  noSelect: Uint8Array

+  /**
+   * Per-cell written bitmap. A written plain space and never-written padding
+   * share the same packed cell value, so selection needs this side channel to
+   * preserve code indentation without selecting blank UI margins.
+   */
+  written: Uint8Array
+
  /**
   * Per-ROW soft-wrap continuation marker. softWrap[r]=N>0 means row r
   * is a word-wrap continuation of row r-1 (the `\n` before it was
@@ -475,6 +482,14 @@ export function isEmptyCellAt(screen: Screen, x: number, y: number): boolean {
  return isEmptyCellByIndex(screen, y * screen.width + x)
 }

+export function isWrittenCellAt(screen: Screen, x: number, y: number): boolean {
+  if (x < 0 || y < 0 || x >= screen.width || y >= screen.height) {
+    return false
+  }
+
+  return screen.written[y * screen.width + x] === 1
+}
+
 /**
 * Check if a Cell (view object) represents an empty cell.
 */
@@ -533,6 +548,7 @@ export function createScreen(
    emptyStyleId: styles.none,
    damage: undefined,
    noSelect: new Uint8Array(size),
+    written: new Uint8Array(size),
    softWrap: new Int32Array(height)
  }
 }
@@ -566,6 +582,7 @@ export function resetScreen(screen: Screen, width: number, height: number): void
    screen.cells = new Int32Array(buf)
    screen.cells64 = new BigInt64Array(buf)
    screen.noSelect = new Uint8Array(size)
+    screen.written = new Uint8Array(size)
  }

  if (screen.softWrap.length < height) {
@@ -575,6 +592,7 @@ export function resetScreen(screen: Screen, width: number, height: number): void
  // Reset all cells — single fill call, no loop
  screen.cells64.fill(EMPTY_CELL_VALUE, 0, size)
  screen.noSelect.fill(0, 0, size)
+  screen.written.fill(0, 0, size)
  screen.softWrap.fill(0, 0, height)

  // Update dimensions
@@ -770,6 +788,7 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
      if ((cells[spacerCI + 1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
        cells[spacerCI] = EMPTY_CHAR_INDEX
        cells[spacerCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+        screen.written[y * screen.width + spacerX] = 0
      }
    }
  }
@@ -787,6 +806,7 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
      if ((cells[wideCI + 1]! & WIDTH_MASK) === CellWidth.Wide) {
        cells[wideCI] = EMPTY_CHAR_INDEX
        cells[wideCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+        screen.written[y * screen.width + x - 1] = 0
        clearedWideX = x - 1
      }
    }
@@ -795,6 +815,7 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
  // Pack cell data into cells array
  cells[ci] = internCharString(screen, cell.char)
  cells[ci + 1] = packWord1(cell.styleId, internHyperlink(screen, cell.hyperlink), cell.width)
+  screen.written[y * screen.width + x] = 1

  // Track damage - expand bounds in place instead of allocating new objects
  // Include the main cell position and any cleared orphan cells
@@ -841,11 +862,13 @@ export function setCellAt(screen: Screen, x: number, y: number, cell: Cell): voi
        if (spacerX + 1 < screen.width && (cells[orphanCI + 1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
          cells[orphanCI] = EMPTY_CHAR_INDEX
          cells[orphanCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+          screen.written[y * screen.width + spacerX + 1] = 0
        }
      }

      cells[spacerCI] = SPACER_CHAR_INDEX
      cells[spacerCI + 1] = packWord1(screen.emptyStyleId, 0, CellWidth.SpacerTail)
+      screen.written[y * screen.width + spacerX] = 1

      // Expand damage to include SpacerTail so diff() scans it
      const d = screen.damage
@@ -929,6 +952,8 @@ export function blitRegion(
  const dstCells = dst.cells
  const srcNoSel = src.noSelect
  const dstNoSel = dst.noSelect
+  const srcWritten = src.written
+  const dstWritten = dst.written

  // softWrap is per-row — copy the row range regardless of stride/width.
  // Partial-width blits still carry the row's wrap provenance since the
@@ -947,6 +972,7 @@ export function blitRegion(
    const nsStart = regionY * src.width
    const nsLen = (maxY - regionY) * src.width
    dstNoSel.set(srcNoSel.subarray(nsStart, nsStart + nsLen), nsStart)
+    dstWritten.set(srcWritten.subarray(nsStart, nsStart + nsLen), nsStart)
  } else {
    // Per-row copy for partial-width or mismatched-stride regions
    let srcRowCI = regionY * srcStride + (regionX << 1)
@@ -957,6 +983,7 @@ export function blitRegion(
    for (let y = regionY; y < maxY; y++) {
      dstCells.set(srcCells.subarray(srcRowCI, srcRowCI + rowBytes), dstRowCI)
      dstNoSel.set(srcNoSel.subarray(srcRowNS, srcRowNS + rowLen), dstRowNS)
+      dstWritten.set(srcWritten.subarray(srcRowNS, srcRowNS + rowLen), dstRowNS)
      srcRowCI += srcStride
      dstRowCI += dstStride
      srcRowNS += src.width
@@ -989,6 +1016,7 @@ export function blitRegion(
      if ((srcCells[srcLastCI + 1]! & WIDTH_MASK) === CellWidth.Wide) {
        dstCells[dstSpacerCI] = SPACER_CHAR_INDEX
        dstCells[dstSpacerCI + 1] = packWord1(dst.emptyStyleId, 0, CellWidth.SpacerTail)
+        dstWritten[y * dst.width + maxX] = 1
        wroteSpacerOutsideRegion = true
      }

@@ -1030,6 +1058,7 @@ export function clearRegion(

  const cells = screen.cells
  const cells64 = screen.cells64
+  const written = screen.written
  const screenWidth = screen.width
  const rowBase = startY * screenWidth
  let damageMinX = startX
@@ -1040,6 +1069,7 @@ export function clearRegion(
  if (startX === 0 && maxX === screenWidth) {
    // Full-width: single fill, no boundary checks needed
    cells64.fill(EMPTY_CELL_VALUE, rowBase, rowBase + (maxY - startY) * screenWidth)
+    written.fill(0, rowBase, rowBase + (maxY - startY) * screenWidth)
  } else {
    // Partial-width: single loop handles boundary cleanup and fill per row.
    const stride = screenWidth << 1 // 2 Int32s per cell
@@ -1062,6 +1092,7 @@ export function clearRegion(
          if ((cells[prevW1]! & WIDTH_MASK) === CellWidth.Wide) {
            cells[prevW1 - 1] = EMPTY_CHAR_INDEX
            cells[prevW1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+            written[y * screenWidth + startX - 1] = 0
            damageMinX = startX - 1
          }
        }
@@ -1078,12 +1109,14 @@ export function clearRegion(
          if ((cells[nextW1]! & WIDTH_MASK) === CellWidth.SpacerTail) {
            cells[nextW1 - 1] = EMPTY_CHAR_INDEX
            cells[nextW1] = packWord1(screen.emptyStyleId, 0, CellWidth.Narrow)
+            written[y * screenWidth + maxX] = 0
            damageMaxX = maxX + 1
          }
        }
      }

      cells64.fill(EMPTY_CELL_VALUE, fillStart, fillStart + rowLen)
+      written.fill(0, fillStart, fillStart + rowLen)
      leftEdge += stride
      rightEdge += stride
      fillStart += screenWidth
@@ -1120,12 +1153,14 @@ export function shiftRows(screen: Screen, top: number, bottom: number, n: number
  const w = screen.width
  const cells64 = screen.cells64
  const noSel = screen.noSelect
+  const written = screen.written
  const sw = screen.softWrap
  const absN = Math.abs(n)

  if (absN > bottom - top) {
    cells64.fill(EMPTY_CELL_VALUE, top * w, (bottom + 1) * w)
    noSel.fill(0, top * w, (bottom + 1) * w)
+    written.fill(0, top * w, (bottom + 1) * w)
    sw.fill(0, top, bottom + 1)

    return
@@ -1135,17 +1170,21 @@ export function shiftRows(screen: Screen, top: number, bottom: number, n: number
    // SU: row top+n..bottom → top..bottom-n; clear bottom-n+1..bottom
    cells64.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
    noSel.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
+    written.copyWithin(top * w, (top + n) * w, (bottom + 1) * w)
    sw.copyWithin(top, top + n, bottom + 1)
    cells64.fill(EMPTY_CELL_VALUE, (bottom - n + 1) * w, (bottom + 1) * w)
    noSel.fill(0, (bottom - n + 1) * w, (bottom + 1) * w)
+    written.fill(0, (bottom - n + 1) * w, (bottom + 1) * w)
    sw.fill(0, bottom - n + 1, bottom + 1)
  } else {
    // SD: row top..bottom+n → top-n..bottom; clear top..top-n-1
    cells64.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
    noSel.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
+    written.copyWithin((top - n) * w, top * w, (bottom + n + 1) * w)
    sw.copyWithin(top - n, top, bottom + n + 1)
    cells64.fill(EMPTY_CELL_VALUE, top * w, (top - n) * w)
    noSel.fill(0, top * w, (top - n) * w)
+    written.fill(0, top * w, (top - n) * w)
    sw.fill(0, top, top - n)
  }
 }
@@ -0,0 +1,82 @@
+import { describe, expect, it } from 'vitest'
+
+import { cellAt, CellWidth, CharPool, createScreen, HyperlinkPool, setCellAt, StylePool } from './screen.js'
+import {
+  applySelectionOverlay,
+  createSelectionState,
+  getSelectedText,
+  startSelection,
+  updateSelection
+} from './selection.js'
+
+const screenWithText = () => {
+  const styles = new StylePool()
+  const screen = createScreen(10, 3, styles, new CharPool(), new HyperlinkPool())
+
+  setCellAt(screen, 2, 1, { char: 'h', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+  setCellAt(screen, 3, 1, { char: 'i', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+
+  return { screen, styles }
+}
+
+describe('selection whitespace handling', () => {
+  it('does not copy whitespace-only selections', () => {
+    const { screen } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 0)
+    updateSelection(selection, 9, 0)
+
+    expect(getSelectedText(selection, screen)).toBe('')
+  })
+
+  it('trims outer drag padding while preserving selected content', () => {
+    const { screen } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 1)
+    updateSelection(selection, 9, 1)
+
+    expect(getSelectedText(selection, screen)).toBe('hi')
+  })
+
+  it('preserves selected indentation when spaces are rendered content', () => {
+    const styles = new StylePool()
+    const screen = createScreen(10, 1, styles, new CharPool(), new HyperlinkPool())
+    const selection = createSelectionState()
+
+    setCellAt(screen, 0, 0, { char: ' ', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+    setCellAt(screen, 1, 0, { char: ' ', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+    setCellAt(screen, 2, 0, { char: 'x', hyperlink: undefined, styleId: screen.emptyStyleId, width: CellWidth.Narrow })
+
+    startSelection(selection, 0, 0)
+    updateSelection(selection, 9, 0)
+
+    expect(getSelectedText(selection, screen)).toBe('  x')
+  })
+
+  it('clamps copied selection bounds to screen width', () => {
+    const { screen } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 1)
+    updateSelection(selection, 99, 1)
+
+    expect(getSelectedText(selection, screen)).toBe('hi')
+  })
+
+  it('does not paint selection background on leading/trailing empty cells or empty rows', () => {
+    const { screen, styles } = screenWithText()
+    const selection = createSelectionState()
+
+    startSelection(selection, 0, 0)
+    updateSelection(selection, 9, 2)
+    applySelectionOverlay(screen, selection, styles)
+
+    expect(cellAt(screen, 0, 0)?.styleId).toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 0, 1)?.styleId).toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 2, 1)?.styleId).not.toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 4, 1)?.styleId).toBe(screen.emptyStyleId)
+    expect(cellAt(screen, 0, 2)?.styleId).toBe(screen.emptyStyleId)
+  })
+})
@@ -12,7 +12,7 @@

 import { clamp } from './layout/geometry.js'
 import type { Screen, StylePool } from './screen.js'
-import { cellAt, cellAtIndex, CellWidth, setCellStyleId } from './screen.js'
+import { cellAt, cellAtIndex, CellWidth, isWrittenCellAt, setCellStyleId } from './screen.js'

 type Point = { col: number; row: number }

@@ -842,6 +842,43 @@ export function isCellSelected(s: SelectionState, col: number, row: number): boo
  return true
 }

+function selectableCell(screen: Screen, row: number, col: number): boolean {
+  const cell = cellAt(screen, col, row)
+
+  return (
+    screen.noSelect[row * screen.width + col] !== 1 &&
+    isWrittenCellAt(screen, col, row) &&
+    !!cell &&
+    cell.width !== CellWidth.SpacerTail &&
+    cell.width !== CellWidth.SpacerHead
+  )
+}
+
+function selectionContentBounds(
+  screen: Screen,
+  row: number,
+  start: number,
+  end: number
+): { first: number; last: number } | null {
+  let first = start
+
+  while (first <= end && !selectableCell(screen, row, first)) {
+    first++
+  }
+
+  if (first > end) {
+    return null
+  }
+
+  let last = end
+
+  while (last >= first && !selectableCell(screen, row, last)) {
+    last--
+  }
+
+  return { first, last }
+}
+
 /** Extract text from one screen row. When the next row is a soft-wrap
 *  continuation (screen.softWrap[row+1]>0), clamp to that content-end
 *  column and skip the trailing trim so the word-separator space survives
@@ -890,6 +927,21 @@ function joinRows(lines: string[], text: string, sw: boolean | undefined): void
  }
 }

+function trimEmptyEdgeRows(lines: string[]): string[] {
+  let start = 0
+  let end = lines.length
+
+  while (start < end && !lines[start]!.trim()) {
+    start++
+  }
+
+  while (end > start && !lines[end - 1]!.trim()) {
+    end--
+  }
+
+  return lines.slice(start, end)
+}
+
 /**
 * Extract text from the screen buffer within the selection range.
 * Rows are joined with newlines unless the screen's softWrap bitmap
@@ -917,16 +969,18 @@ export function getSelectedText(s: SelectionState, screen: Screen): string {
  }

  for (let row = start.row; row <= end.row; row++) {
-    const rowStart = row === start.row ? start.col : 0
-    const rowEnd = row === end.row ? end.col : screen.width - 1
-    joinRows(lines, extractRowText(screen, row, rowStart, rowEnd), sw[row]! > 0)
+    const rowStart = Math.max(0, row === start.row ? start.col : 0)
+    const rowEnd = Math.min(row === end.row ? end.col : screen.width - 1, screen.width - 1)
+    const bounds = selectionContentBounds(screen, row, rowStart, rowEnd)
+
+    joinRows(lines, bounds ? extractRowText(screen, row, bounds.first, bounds.last) : '', sw[row]! > 0)
  }

  for (let i = 0; i < s.scrolledOffBelow.length; i++) {
    joinRows(lines, s.scrolledOffBelow[i]!, s.scrolledOffBelowSW[i])
  }

-  return lines.join('\n')
+  return trimEmptyEdgeRows(lines).join('\n')
 }

 /**
@@ -1051,9 +1105,14 @@ export function applySelectionOverlay(screen: Screen, selection: SelectionState,
  for (let row = start.row; row <= end.row && row < screen.height; row++) {
    const colStart = row === start.row ? start.col : 0
    const colEnd = row === end.row ? Math.min(end.col, width - 1) : width - 1
+    const bounds = selectionContentBounds(screen, row, colStart, colEnd)
    const rowOff = row * width

-    for (let col = colStart; col <= colEnd; col++) {
+    if (!bounds) {
+      continue
+    }
+
+    for (let col = bounds.first; col <= bounds.last; col++) {
      const idx = rowOff + col

      // Skip noSelect cells — gutters stay visually unchanged so it's
@@ -9,18 +9,21 @@ describe('shouldEmitClipboardSequence', () => {
  })

  it('keeps OSC enabled for remote or plain local terminals', () => {
-    expect(shouldEmitClipboardSequence({ SSH_CONNECTION: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe(
-      true
-    )
+    expect(
+      shouldEmitClipboardSequence({ SSH_CONNECTION: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)
+    ).toBe(true)
    expect(shouldEmitClipboardSequence({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
  })

  it('honors explicit env override', () => {
-    expect(shouldEmitClipboardSequence({ HERMES_TUI_CLIPBOARD_OSC52: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe(
-      true
-    )
-    expect(shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(
-      false
-    )
+    expect(
+      shouldEmitClipboardSequence({
+        HERMES_TUI_CLIPBOARD_OSC52: '1',
+        TMUX: '/tmp/tmux-1/default,1,0'
+      } as NodeJS.ProcessEnv)
+    ).toBe(true)
+    expect(
+      shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)
+    ).toBe(false)
  })
 })
@@ -226,7 +226,10 @@ describe('createGatewayEventHandler', () => {
    const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
    const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'

-    onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
+    onEvent({
+      payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
+      type: 'tool.complete'
+    } as any)
    onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)

    expect(appended).toHaveLength(1)
@@ -17,6 +17,14 @@ describe('createSlashHandler', () => {
    expect(getOverlayState().picker).toBe(true)
  })

+  it('treats /provider as a local /model alias', () => {
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/provider')).toBe(true)
+    expect(getOverlayState().modelPicker).toBe(true)
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+  })
+
  it('opens the skills hub locally for bare /skills', () => {
    const ctx = buildCtx()

@@ -118,9 +126,7 @@ describe('createSlashHandler', () => {
    const ctx = buildCtx()
    createSlashHandler(ctx)('/details tools blink')
    expect(getUiState().sections.tools).toBeUndefined()
-    expect(ctx.transcript.sys).toHaveBeenCalledWith(
-      'usage: /details <section> [hidden|collapsed|expanded|reset]'
-    )
+    expect(ctx.transcript.sys).toHaveBeenCalledWith('usage: /details <section> [hidden|collapsed|expanded|reset]')
  })

  it('shows tool enable usage when names are missing', () => {
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest'

-import { isSectionName, parseDetailsMode, resolveSections, sectionMode, SECTION_NAMES } from '../domain/details.js'
+import { isSectionName, parseDetailsMode, resolveSections, SECTION_NAMES, sectionMode } from '../domain/details.js'

 describe('parseDetailsMode', () => {
  it('accepts the canonical modes case-insensitively', () => {
@@ -31,6 +31,28 @@ describe('platform action modifier', () => {
  })
 })

+describe('isCopyShortcut', () => {
+  it('keeps Ctrl+C as the local non-macOS copy chord', async () => {
+    const { isCopyShortcut } = await importPlatform('linux')
+
+    expect(isCopyShortcut({ ctrl: true, meta: false, super: false }, 'c', {})).toBe(true)
+  })
+
+  it('accepts client Cmd+C over SSH even when running on Linux', async () => {
+    const { isCopyShortcut } = await importPlatform('linux')
+    const env = { SSH_CONNECTION: '1 2 3 4' } as NodeJS.ProcessEnv
+
+    expect(isCopyShortcut({ ctrl: false, meta: false, super: true }, 'c', env)).toBe(true)
+    expect(isCopyShortcut({ ctrl: false, meta: true, super: false }, 'c', env)).toBe(true)
+  })
+
+  it('does not treat local Linux Alt+C as copy', async () => {
+    const { isCopyShortcut } = await importPlatform('linux')
+
+    expect(isCopyShortcut({ ctrl: false, meta: true, super: false }, 'c', {})).toBe(false)
+  })
+})
+
 describe('isVoiceToggleKey', () => {
  it('matches raw Ctrl+B on macOS (doc-default across platforms)', async () => {
    const { isVoiceToggleKey } = await importPlatform('darwin')
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`{"default":{"identifier":"default","description":"Default Hermes GUI permissions","local":true,"windows":["main"],"permissions":["core:default","notification:default","opener:default"]}}`