fix(tools): normalize numeric entries and clear stale no_mcp in _save_platform_tools

YAML parses bare numeric toolset names (e.g. 12306:) as int, causing TypeError in sorted() since the read path normalizes to str but the save path did not. The no_mcp sentinel was preserved in existing entries even when the user re-enabled MCP servers, causing MCP to stay silently disabled.
fix(nix): use --rebuild in fix-lockfiles to bypass cached FOD store paths (#15444 )
2026-04-25 06:23:17 +05:30 · 2026-04-25 06:14:32 +05:30 · 2026-04-24 17:21:38 -07:00 · 2026-04-24 16:45:27 -07:00 · 2026-04-24 16:45:27 -07:00 · 2026-04-24 16:45:27 -07:00
159 changed files with 12151 additions and 2560 deletions
@@ -240,6 +240,19 @@ npm run fmt       # prettier
 npm test          # vitest
 ```

+### TUI in the Dashboard (`hermes dashboard` → `/chat`)
+
+The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
+
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
+- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
+- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
+
+**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
+
+**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
+
 ---

 ## Adding New Tools
@@ -986,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
 # ---------------------------------------------------------------------------


+def _is_bedrock_model_id(model: str) -> bool:
+    """Detect AWS Bedrock model IDs that use dots as namespace separators.
+
+    Bedrock model IDs come in two forms:
+    - Bare:    ``anthropic.claude-opus-4-7``
+    - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
+
+    In both cases the dots separate namespace components, not version
+    numbers, and must be preserved verbatim for the Bedrock API.
+    """
+    lower = model.lower()
+    # Regional inference-profile prefixes
+    if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
+        return True
+    # Bare Bedrock model IDs: provider.model-family
+    if lower.startswith("anthropic."):
+        return True
+    return False
+
+
 def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    """Normalize a model name for the Anthropic API.

@@ -993,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    - Converts dots to hyphens in version numbers (OpenRouter uses dots,
      Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
      preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
+    - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
+      regional inference profiles (``us.anthropic.claude-*``) whose dots
+      are namespace separators, not version separators.
    """
    lower = model.lower()
    if lower.startswith("anthropic/"):
        model = model[len("anthropic/"):]
    if not preserve_dots:
+        # Bedrock model IDs use dots as namespace separators
+        # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
+        # These must not be converted to hyphens.  See issue #12295.
+        if _is_bedrock_model_id(model):
+            return model
        # OpenRouter uses dots for version separators (claude-opus-4.6),
        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
        model = model.replace(".", "-")
@@ -1993,6 +1993,39 @@ def resolve_provider_client(
                       "directly supported", provider)
        return None, None

+    elif pconfig.auth_type == "aws_sdk":
+        # AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
+        # boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
+        try:
+            from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
+            from agent.anthropic_adapter import build_anthropic_bedrock_client
+        except ImportError:
+            logger.warning("resolve_provider_client: bedrock requested but "
+                           "boto3 or anthropic SDK not installed")
+            return None, None
+
+        if not has_aws_credentials():
+            logger.debug("resolve_provider_client: bedrock requested but "
+                         "no AWS credentials found")
+            return None, None
+
+        region = resolve_bedrock_region()
+        default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
+        final_model = _normalize_resolved_model(model or default_model, provider)
+        try:
+            real_client = build_anthropic_bedrock_client(region)
+        except ImportError as exc:
+            logger.warning("resolve_provider_client: cannot create Bedrock "
+                           "client: %s", exc)
+            return None, None
+        client = AnthropicAuxiliaryClient(
+            real_client, final_model, api_key="aws-sdk",
+            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
+        )
+        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
+        return (_to_async_client(client, final_model) if async_mode
+                else (client, final_model))
+
    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
        # OAuth providers — route through their specific try functions
        if provider == "nous":
@@ -87,6 +87,114 @@ def reset_client_cache():
    _bedrock_control_client_cache.clear()


+def invalidate_runtime_client(region: str) -> bool:
+    """Evict the cached ``bedrock-runtime`` client for a single region.
+
+    Per-region counterpart to :func:`reset_client_cache`. Used by the converse
+    call wrappers to discard clients whose underlying HTTP connection has
+    gone stale, so the next call allocates a fresh client (with a fresh
+    connection pool) instead of reusing a dead socket.
+
+    Returns True if a cached entry was evicted, False if the region was not
+    cached.
+    """
+    existed = region in _bedrock_runtime_client_cache
+    _bedrock_runtime_client_cache.pop(region, None)
+    return existed
+
+
+# ---------------------------------------------------------------------------
+# Stale-connection detection
+# ---------------------------------------------------------------------------
+#
+# boto3 caches its HTTPS connection pool inside the client object. When a
+# pooled connection is killed out from under us (NAT timeout, VPN flap,
+# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
+# one of a handful of low-level exceptions — most commonly
+# ``botocore.exceptions.ConnectionClosedError`` or
+# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
+# ``assert`` in a couple of paths (connection pool state checks, chunked
+# response readers) which bubbles up as a bare ``AssertionError`` with an
+# empty ``str(exc)``.
+#
+# In all of these cases the client is the problem, not the request: retrying
+# with the same cached client reproduces the failure until the process
+# restarts. The fix is to evict the region's cached client so the next
+# attempt builds a new one.
+
+_STALE_LIB_MODULE_PREFIXES = (
+    "urllib3.",
+    "botocore.",
+    "boto3.",
+)
+
+
+def _traceback_frames_modules(exc: BaseException):
+    """Yield ``__name__``-style module strings for each frame in exc's traceback."""
+    tb = getattr(exc, "__traceback__", None)
+    while tb is not None:
+        frame = tb.tb_frame
+        module = frame.f_globals.get("__name__", "")
+        yield module or ""
+        tb = tb.tb_next
+
+
+def is_stale_connection_error(exc: BaseException) -> bool:
+    """Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
+
+    Matches:
+      * ``botocore.exceptions.ConnectionError`` and subclasses
+        (``ConnectionClosedError``, ``EndpointConnectionError``,
+        ``ReadTimeoutError``, ``ConnectTimeoutError``).
+      * ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
+        ``ConnectionError`` (best-effort import — urllib3 is a transitive
+        dependency of botocore so it is always available in practice).
+      * Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
+        or boto3. These are internal-invariant failures (typically triggered
+        by corrupted connection-pool state after a dropped socket) and are
+        recoverable by swapping the client.
+
+    Non-library ``AssertionError``s (from application code or tests) are
+    intentionally not matched — only library-internal asserts signal stale
+    connection state.
+    """
+    # botocore: the canonical signal — HTTPClientError is the umbrella for
+    # ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
+    # ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
+    # the same family via a different branch of the hierarchy.
+    try:
+        from botocore.exceptions import (
+            ConnectionError as BotoConnectionError,
+            HTTPClientError,
+        )
+        botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
+    except ImportError:  # pragma: no cover — botocore always present with boto3
+        botocore_errors = ()
+    if botocore_errors and isinstance(exc, botocore_errors):
+        return True
+
+    # urllib3: low-level transport failures
+    try:
+        from urllib3.exceptions import (
+            ProtocolError,
+            NewConnectionError,
+            ConnectionError as Urllib3ConnectionError,
+        )
+        urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
+    except ImportError:  # pragma: no cover
+        urllib3_errors = ()
+    if urllib3_errors and isinstance(exc, urllib3_errors):
+        return True
+
+    # Library-internal AssertionError (urllib3 / botocore / boto3)
+    if isinstance(exc, AssertionError):
+        for module in _traceback_frames_modules(exc):
+            if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
+                return True
+
+    return False
+
+
 # ---------------------------------------------------------------------------
 # AWS credential detection
 # ---------------------------------------------------------------------------
@@ -787,7 +895,17 @@ def call_converse(
        guardrail_config=guardrail_config,
    )

-    response = client.converse(**kwargs)
+    try:
+        response = client.converse(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse(region=%s, model=%s): "
+                "%s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_response(response)


@@ -819,7 +937,17 @@ def call_converse_stream(
        guardrail_config=guardrail_config,
    )

-    response = client.converse_stream(**kwargs)
+    try:
+        response = client.converse_stream(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse_stream(region=%s, "
+                "model=%s): %s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_stream_events(response)


@@ -23,6 +23,23 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
 logger = logging.getLogger(__name__)


+# Matches Codex/Harmony tool-call serialization that occasionally leaks into
+# assistant-message content when the model fails to emit a structured
+# ``function_call`` item.  Accepts the common forms:
+#
+#   to=functions.exec_command
+#   assistant to=functions.exec_command
+#   <|channel|>commentary to=functions.exec_command
+#
+# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
+# Harmony channel prefix varies by degeneration mode.  Case-insensitive to
+# cover lowercase/uppercase ``assistant`` variants.
+_TOOL_CALL_LEAK_PATTERN = re.compile(
+    r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
+    re.IGNORECASE,
+)
+
+
 # ---------------------------------------------------------------------------
 # Multimodal content helpers
 # ---------------------------------------------------------------------------
@@ -787,6 +804,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        if isinstance(out_text, str):
            final_text = out_text.strip()

+    # ── Tool-call leak recovery ──────────────────────────────────
+    # gpt-5.x on the Codex Responses API sometimes degenerates and emits
+    # what should be a structured `function_call` item as plain assistant
+    # text using the Harmony/Codex serialization (``to=functions.foo
+    # {json}`` or ``assistant to=functions.foo {json}``). The model
+    # intended to call a tool, but the intent never made it into
+    # ``response.output`` as a ``function_call`` item, so ``tool_calls``
+    # is empty here. If we pass this through, the parent sees a
+    # confident-looking summary with no audit trail (empty ``tool_trace``)
+    # and no tools actually ran — the Taiwan-embassy-email incident.
+    #
+    # Detection: leaked tokens always contain ``to=functions.<name>`` and
+    # the assistant message has no real tool calls. Treat it as incomplete
+    # so the existing Codex-incomplete continuation path (3 retries,
+    # handled in run_agent.py) gets a chance to re-elicit a proper
+    # ``function_call`` item. The existing loop already handles message
+    # append, dedup, and retry budget.
+    leaked_tool_call_text = False
+    if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
+        leaked_tool_call_text = True
+        logger.warning(
+            "Codex response contains leaked tool-call text in assistant content "
+            "(no structured function_call items). Treating as incomplete so the "
+            "continuation path can re-elicit a proper tool call. Leaked snippet: %r",
+            final_text[:300],
+        )
+        # Clear the text so downstream code doesn't surface the garbage as
+        # a summary. The encrypted reasoning items (if any) are preserved
+        # so the model keeps its chain-of-thought on the retry.
+        final_text = ""
+
    assistant_message = SimpleNamespace(
        content=final_text,
        tool_calls=tool_calls,
@@ -798,6 +846,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:

    if tool_calls:
        finish_reason = "tool_calls"
+    elif leaked_tool_call_text:
+        finish_reason = "incomplete"
    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif reasoning_items_raw and not final_text:
@@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_summary_error = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
        self._last_compression_savings_pct: float = 100.0
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
+        self._last_summary_error: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
            self._summary_model_fallen_back = False
+            self._last_summary_error = None
            return self._with_summary_prefix(summary)
        except RuntimeError:
            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            self._last_summary_error = "no auxiliary LLM provider configured"
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
                            "for %d seconds.",
@@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
+            err_text = str(e).strip() or e.__class__.__name__
+            if len(err_text) > 220:
+                err_text = err_text[:217].rstrip() + "..."
+            self._last_summary_error = err_text
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
@@ -31,6 +31,7 @@ from __future__ import annotations
 import json
 import logging
 import re
+import inspect
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
@@ -312,7 +313,39 @@ class MemoryManager:
                )
        return "\n\n".join(parts)

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    @staticmethod
+    def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
+        """Return how to pass metadata to a provider's memory-write hook."""
+        try:
+            signature = inspect.signature(provider.on_memory_write)
+        except (TypeError, ValueError):
+            return "keyword"
+
+        params = list(signature.parameters.values())
+        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
+            return "keyword"
+        if "metadata" in signature.parameters:
+            return "keyword"
+
+        accepted = [
+            p for p in params
+            if p.kind in (
+                inspect.Parameter.POSITIONAL_ONLY,
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                inspect.Parameter.KEYWORD_ONLY,
+            )
+        ]
+        if len(accepted) >= 4:
+            return "positional"
+        return "legacy"
+
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Notify external providers when the built-in memory tool writes.

        Skips the builtin provider itself (it's the source of the write).
@@ -321,7 +354,15 @@ class MemoryManager:
            if provider.name == "builtin":
                continue
            try:
-                provider.on_memory_write(action, target, content)
+                metadata_mode = self._provider_memory_write_metadata_mode(provider)
+                if metadata_mode == "keyword":
+                    provider.on_memory_write(
+                        action, target, content, metadata=dict(metadata or {})
+                    )
+                elif metadata_mode == "positional":
+                    provider.on_memory_write(action, target, content, dict(metadata or {}))
+                else:
+                    provider.on_memory_write(action, target, content)
            except Exception as e:
                logger.debug(
                    "Memory provider '%s' on_memory_write failed: %s",
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
  on_pre_compress(messages) -> str       — extract before context compression
-  on_memory_write(action, target, content) — mirror built-in memory writes
+  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
 """

@@ -34,7 +34,7 @@ from __future__ import annotations

 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional

 logger = logging.getLogger(__name__)

@@ -220,12 +220,21 @@ class MemoryProvider(ABC):
          should all have ``env_var`` set and this method stays no-op).
        """

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Called when the built-in memory tool writes an entry.

        action: 'add', 'replace', or 'remove'
        target: 'memory' or 'user'
        content: the entry content
+        metadata: structured provenance for the write, when available. Common
+          keys include ``write_origin``, ``execution_context``, ``session_id``,
+          ``parent_session_id``, ``platform``, and ``tool_name``.

        Use to mirror built-in memory writes to your backend.
        """
@@ -1199,6 +1199,7 @@ def get_model_context_length(
    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing)
+    1b. AWS Bedrock static table (must precede custom-endpoint probe)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
@@ -1237,6 +1238,26 @@ def get_model_context_length(
            else:
                return cached

+    # 1b. AWS Bedrock — use static context length table.
+    # Bedrock's ListFoundationModels API doesn't expose context window sizes,
+    # so we maintain a curated table in bedrock_adapter.py that reflects
+    # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
+    # Anthropic API).  This must run BEFORE the custom-endpoint probe at
+    # step 2 — bedrock-runtime.<region>.amazonaws.com is not in
+    # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
+    # fail the /models probe (Bedrock doesn't expose that shape), and fall
+    # back to the 128K default before reaching the original step 4b branch.
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
+        try:
+            from agent.bedrock_adapter import get_bedrock_context_length
+            return get_bedrock_context_length(model)
+        except ImportError:
+            pass  # boto3 not installed — fall through to generic resolution
+
    # 2. Active endpoint metadata for truly custom/unknown endpoints.
    # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
    # /models endpoint may report a provider-imposed limit (e.g. Copilot
@@ -1282,19 +1303,7 @@ def get_model_context_length(
        if ctx:
            return ctx

-    # 4b. AWS Bedrock — use static context length table.
-    # Bedrock's ListFoundationModels doesn't expose context window sizes,
-    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
-        try:
-            from agent.bedrock_adapter import get_bedrock_context_length
-            return get_bedrock_context_length(model)
-        except ImportError:
-            pass  # boto3 not installed — fall through to generic resolution
+    # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)

    # 5. Provider-aware lookups (before generic OpenRouter cache)
    # These are provider-specific and take priority over the generic OR cache,
@@ -7,11 +7,15 @@ can invoke skills via /skill-name commands.
 import json
 import logging
 import re
-import subprocess
 from pathlib import Path
 from typing import Any, Dict, Optional

 from hermes_constants import display_hermes_home
+from agent.skill_preprocessing import (
+    expand_inline_shell as _expand_inline_shell,
+    load_skills_config as _load_skills_config,
+    substitute_template_vars as _substitute_template_vars,
+)

 logger = logging.getLogger(__name__)

@@ -20,111 +24,6 @@ _skill_commands: Dict[str, Dict[str, Any]] = {}
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
-# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
-# left as-is so the user can debug them.
-_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
-
-# Matches inline shell snippets like:  !`date +%Y-%m-%d`
-# Non-greedy, single-line only — no newlines inside the backticks.
-_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
-
-# Cap inline-shell output so a runaway command can't blow out the context.
-_INLINE_SHELL_MAX_OUTPUT = 4000
-
-
-def _load_skills_config() -> dict:
-    """Load the ``skills`` section of config.yaml (best-effort)."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config() or {}
-        skills_cfg = cfg.get("skills")
-        if isinstance(skills_cfg, dict):
-            return skills_cfg
-    except Exception:
-        logger.debug("Could not read skills config", exc_info=True)
-    return {}
-
-
-def _substitute_template_vars(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None,
-) -> str:
-    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
-
-    Only substitutes tokens for which a concrete value is available —
-    unresolved tokens are left in place so the author can spot them.
-    """
-    if not content:
-        return content
-
-    skill_dir_str = str(skill_dir) if skill_dir else None
-
-    def _replace(match: re.Match) -> str:
-        token = match.group(1)
-        if token == "HERMES_SKILL_DIR" and skill_dir_str:
-            return skill_dir_str
-        if token == "HERMES_SESSION_ID" and session_id:
-            return str(session_id)
-        return match.group(0)
-
-    return _SKILL_TEMPLATE_RE.sub(_replace, content)
-
-
-def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
-    """Execute a single inline-shell snippet and return its stdout (trimmed).
-
-    Failures return a short ``[inline-shell error: ...]`` marker instead of
-    raising, so one bad snippet can't wreck the whole skill message.
-    """
-    try:
-        completed = subprocess.run(
-            ["bash", "-c", command],
-            cwd=str(cwd) if cwd else None,
-            capture_output=True,
-            text=True,
-            timeout=max(1, int(timeout)),
-            check=False,
-        )
-    except subprocess.TimeoutExpired:
-        return f"[inline-shell timeout after {timeout}s: {command}]"
-    except FileNotFoundError:
-        return f"[inline-shell error: bash not found]"
-    except Exception as exc:
-        return f"[inline-shell error: {exc}]"
-
-    output = (completed.stdout or "").rstrip("\n")
-    if not output and completed.stderr:
-        output = completed.stderr.rstrip("\n")
-    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
-        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
-    return output
-
-
-def _expand_inline_shell(
-    content: str,
-    skill_dir: Path | None,
-    timeout: int,
-) -> str:
-    """Replace every !`cmd` snippet in ``content`` with its stdout.
-
-    Runs each snippet with the skill directory as CWD so relative paths in
-    the snippet work the way the author expects.
-    """
-    if "!`" not in content:
-        return content
-
-    def _replace(match: re.Match) -> str:
-        cmd = match.group(1).strip()
-        if not cmd:
-            return ""
-        return _run_inline_shell(cmd, skill_dir, timeout)
-
-    return _INLINE_SHELL_RE.sub(_replace, content)
-
-
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
    raw_identifier = (skill_identifier or "").strip()
@@ -143,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
        else:
            normalized = raw_identifier.lstrip("/")

-        loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
+        loaded_skill = json.loads(
+            skill_view(normalized, task_id=task_id, preprocess=False)
+        )
    except Exception:
        return None

@@ -0,0 +1,131 @@
+"""Shared SKILL.md preprocessing helpers."""
+
+import logging
+import re
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only -- no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available --
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return "[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
+    return output
+
+
+def expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+
+
+def preprocess_skill_content(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None = None,
+    skills_cfg: dict | None = None,
+) -> str:
+    """Apply configured SKILL.md template and inline-shell preprocessing."""
+    if not content:
+        return content
+
+    cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
+    if cfg.get("template_vars", True):
+        content = substitute_template_vars(content, skill_dir, session_id)
+    if cfg.get("inline_shell", False):
+        timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
+        content = expand_inline_shell(content, skill_dir, timeout)
+    return content
@@ -951,13 +951,9 @@ class BatchRunner:
                    root_logger.setLevel(original_level)
        
        # Aggregate all batch statistics and update checkpoint
-        all_completed_prompts = list(completed_prompts_set)
        total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
-        
+
        for batch_result in results:
-            # Add newly completed prompts
-            all_completed_prompts.extend(batch_result.get("completed_prompts", []))
-            
            # Aggregate tool stats
            for tool_name, stats in batch_result.get("tool_stats", {}).items():
                if tool_name not in total_tool_stats:
@@ -977,7 +973,7 @@ class BatchRunner:
        
        # Save final checkpoint (best-effort; incremental writes already happened)
        try:
-            checkpoint_data["completed_prompts"] = all_completed_prompts
+            checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
            self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
        except Exception as ckpt_err:
            print(f"âš ï¸  Warning: Failed to save final checkpoint: {ckpt_err}")
@@ -5374,29 +5374,26 @@ class HermesCLI:
        _cprint(f"  ✓ Model switched: {result.new_model}")
        _cprint(f"    Provider: {provider_label}")

-        # Rich metadata from models.dev
+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry
+        # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
        mi = result.model_info
+        from hermes_cli.model_switch import resolve_display_context_length
+        ctx = resolve_display_context_length(
+            result.new_model,
+            result.target_provider,
+            base_url=result.base_url or self.base_url or "",
+            api_key=result.api_key or self.api_key or "",
+            model_info=mi,
+        )
+        if ctx:
+            _cprint(f"    Context: {ctx:,} tokens")
        if mi:
-            if mi.context_window:
-                _cprint(f"    Context: {mi.context_window:,} tokens")
            if mi.max_output:
                _cprint(f"    Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                _cprint(f"    Cost: {mi.format_cost()}")
            _cprint(f"    Capabilities: {mi.format_capabilities()}")
-        else:
-            # Fallback to old context length lookup
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or self.base_url,
-                    api_key=result.api_key or self.api_key,
-                    provider=result.target_provider,
-                )
-                _cprint(f"    Context: {ctx:,} tokens")
-            except Exception:
-                pass

        # Cache notice
        cache_enabled = (
@@ -6165,6 +6162,8 @@ class HermesCLI:
            self._handle_skin_command(cmd_original)
        elif canonical == "voice":
            self._handle_voice_command(cmd_original)
+        elif canonical == "busy":
+            self._handle_busy_command(cmd_original)
        else:
            # Check for user-defined quick commands (bypass agent loop, no LLM call)
            base_cmd = cmd_lower.split()[0]
@@ -6901,6 +6900,36 @@ class HermesCLI:
        else:
            _cprint(f"  {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")

+    def _handle_busy_command(self, cmd: str):
+        """Handle /busy — control what Enter does while Hermes is working.
+
+        Usage:
+            /busy               Show current busy input mode
+            /busy status        Show current busy input mode
+            /busy queue         Queue input for the next turn instead of interrupting
+            /busy interrupt     Interrupt the current run on Enter (default)
+        """
+        parts = cmd.strip().split(maxsplit=1)
+        if len(parts) < 2 or parts[1].strip().lower() == "status":
+            _cprint(f"  {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}")
+            _cprint(f"  {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}")
+            _cprint(f"  {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
+            return
+
+        arg = parts[1].strip().lower()
+        if arg not in {"queue", "interrupt"}:
+            _cprint(f"  {_DIM}(._.) Unknown argument: {arg}{_RST}")
+            _cprint(f"  {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
+            return
+
+        self.busy_input_mode = arg
+        if save_config_value("display.busy_input_mode", arg):
+            behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy."
+            _cprint(f"  {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}")
+            _cprint(f"  {_DIM}{behavior}{_RST}")
+        else:
+            _cprint(f"  {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}")
+
    def _handle_fast_command(self, cmd: str):
        """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
        if not self._fast_command_available():
@@ -6979,51 +7008,52 @@ class HermesCLI:
                focus_topic = parts[1].strip()

        original_count = len(self.conversation_history)
-        try:
-            from agent.model_metadata import estimate_messages_tokens_rough
-            from agent.manual_compression_feedback import summarize_manual_compression
-            original_history = list(self.conversation_history)
-            approx_tokens = estimate_messages_tokens_rough(original_history)
-            if focus_topic:
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
-                      f"focus: \"{focus_topic}\"...")
-            else:
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+        with self._busy_command("Compressing context..."):
+            try:
+                from agent.model_metadata import estimate_messages_tokens_rough
+                from agent.manual_compression_feedback import summarize_manual_compression
+                original_history = list(self.conversation_history)
+                approx_tokens = estimate_messages_tokens_rough(original_history)
+                if focus_topic:
+                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
+                          f"focus: \"{focus_topic}\"...")
+                else:
+                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

-            compressed, _ = self.agent._compress_context(
-                original_history,
-                self.agent._cached_system_prompt or "",
-                approx_tokens=approx_tokens,
-                focus_topic=focus_topic or None,
-            )
-            self.conversation_history = compressed
-            # _compress_context ends the old session and creates a new child
-            # session on the agent (run_agent.py::_compress_context). Sync the
-            # CLI's session_id so /status, /resume, exit summary, and title
-            # generation all point at the live continuation session, not the
-            # ended parent. Without this, subsequent end_session() calls target
-            # the already-closed parent and the child is orphaned.
-            if (
-                getattr(self.agent, "session_id", None)
-                and self.agent.session_id != self.session_id
-            ):
-                self.session_id = self.agent.session_id
-                self._pending_title = None
-            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
-            summary = summarize_manual_compression(
-                original_history,
-                self.conversation_history,
-                approx_tokens,
-                new_tokens,
-            )
-            icon = "🗜️" if summary["noop"] else "✅"
-            print(f"  {icon} {summary['headline']}")
-            print(f"     {summary['token_line']}")
-            if summary["note"]:
-                print(f"     {summary['note']}")
+                compressed, _ = self.agent._compress_context(
+                    original_history,
+                    self.agent._cached_system_prompt or "",
+                    approx_tokens=approx_tokens,
+                    focus_topic=focus_topic or None,
+                )
+                self.conversation_history = compressed
+                # _compress_context ends the old session and creates a new child
+                # session on the agent (run_agent.py::_compress_context). Sync the
+                # CLI's session_id so /status, /resume, exit summary, and title
+                # generation all point at the live continuation session, not the
+                # ended parent. Without this, subsequent end_session() calls target
+                # the already-closed parent and the child is orphaned.
+                if (
+                    getattr(self.agent, "session_id", None)
+                    and self.agent.session_id != self.session_id
+                ):
+                    self.session_id = self.agent.session_id
+                    self._pending_title = None
+                new_tokens = estimate_messages_tokens_rough(self.conversation_history)
+                summary = summarize_manual_compression(
+                    original_history,
+                    self.conversation_history,
+                    approx_tokens,
+                    new_tokens,
+                )
+                icon = "🗜️" if summary["noop"] else "✅"
+                print(f"  {icon} {summary['headline']}")
+                print(f"     {summary['token_line']}")
+                if summary["note"]:
+                    print(f"     {summary['note']}")

-        except Exception as e:
-            print(f"  ❌ Compression failed: {e}")
+            except Exception as e:
+                print(f"  ❌ Compression failed: {e}")

    def _handle_debug_command(self):
        """Handle /debug — upload debug report + logs and print paste URLs."""
@@ -9525,9 +9555,20 @@ class HermesCLI:
        
        @kb.add('c-d')
        def handle_ctrl_d(event):
-            """Handle Ctrl+D - exit."""
-            self._should_exit = True
-            event.app.exit()
+            """Ctrl+D: delete char under cursor (standard readline behaviour).
+            Only exit when the input is empty — same as bash/zsh. Pending
+            attached images count as input and block the EOF-exit so the
+            user doesn't lose them silently.
+            """
+            buf = event.app.current_buffer
+            if buf.text:
+                buf.delete()
+            elif self._attached_images:
+                # Empty text but pending attachments — no-op, don't exit.
+                return
+            else:
+                self._should_exit = True
+                event.app.exit()

        _modal_prompt_active = Condition(
            lambda: bool(self._secret_state or self._sudo_state)
@@ -135,7 +135,7 @@ class SessionResetPolicy:
            mode=mode if mode is not None else "both",
            at_hour=at_hour if at_hour is not None else 4,
            idle_minutes=idle_minutes if idle_minutes is not None else 1440,
-            notify=notify if notify is not None else True,
+            notify=_coerce_bool(notify, True),
            notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
        )

@@ -178,7 +178,7 @@ class PlatformConfig:
            home_channel = HomeChannel.from_dict(data["home_channel"])
        
        return cls(
-            enabled=data.get("enabled", False),
+            enabled=_coerce_bool(data.get("enabled"), False),
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
@@ -435,7 +435,7 @@ class GatewayConfig:
            reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
            quick_commands=quick_commands,
            sessions_dir=sessions_dir,
-            always_log_local=data.get("always_log_local", True),
+            always_log_local=_coerce_bool(data.get("always_log_local"), True),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
            thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
+                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
+                    gac = telegram_cfg["group_allowed_chats"]
+                    if isinstance(gac, list):
+                        gac = ",".join(str(v) for v in gac)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter):

        If the client disconnects mid-stream, ``agent.interrupt()`` is
        called so the agent stops issuing upstream LLM calls, then the
-        asyncio task is cancelled.  When ``store=True`` the full response
-        is persisted to the ResponseStore in a ``finally`` block so GET
-        /v1/responses/{id} and ``previous_response_id`` chaining work the
-        same as the batch path.
+        asyncio task is cancelled.  When ``store=True`` an initial
+        ``in_progress`` snapshot is persisted immediately after
+        ``response.created`` and disconnects update it to an
+        ``incomplete`` snapshot so GET /v1/responses/{id} and
+        ``previous_response_id`` chaining still have something to
+        recover from.
        """
        import queue as _q

@@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter):
        final_response_text = ""
        agent_error: Optional[str] = None
        usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        terminal_snapshot_persisted = False
+
+        def _persist_response_snapshot(
+            response_env: Dict[str, Any],
+            *,
+            conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
+        ) -> None:
+            if not store:
+                return
+            if conversation_history_snapshot is None:
+                conversation_history_snapshot = list(conversation_history)
+                conversation_history_snapshot.append({"role": "user", "content": user_message})
+            self._response_store.put(response_id, {
+                "response": response_env,
+                "conversation_history": conversation_history_snapshot,
+                "instructions": instructions,
+                "session_id": session_id,
+            })
+            if conversation:
+                self._response_store.set_conversation(conversation, response_id)
+
+        def _persist_incomplete_if_needed() -> None:
+            """Persist an ``incomplete`` snapshot if no terminal one was written.
+
+            Called from both the client-disconnect (``ConnectionResetError``)
+            and server-cancellation (``asyncio.CancelledError``) paths so
+            GET /v1/responses/{id} and ``previous_response_id`` chaining keep
+            working after abrupt stream termination.
+            """
+            if not store or terminal_snapshot_persisted:
+                return
+            incomplete_text = "".join(final_text_parts) or final_response_text
+            incomplete_items: List[Dict[str, Any]] = list(emitted_items)
+            if incomplete_text:
+                incomplete_items.append({
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{"type": "output_text", "text": incomplete_text}],
+                })
+            incomplete_env = _envelope("incomplete")
+            incomplete_env["output"] = incomplete_items
+            incomplete_env["usage"] = {
+                "input_tokens": usage.get("input_tokens", 0),
+                "output_tokens": usage.get("output_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+            }
+            incomplete_history = list(conversation_history)
+            incomplete_history.append({"role": "user", "content": user_message})
+            if incomplete_text:
+                incomplete_history.append({"role": "assistant", "content": incomplete_text})
+            _persist_response_snapshot(
+                incomplete_env,
+                conversation_history_snapshot=incomplete_history,
+            )

        try:
            # response.created — initial envelope, status=in_progress
@@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter):
                "type": "response.created",
                "response": created_env,
            })
+            _persist_response_snapshot(created_env)
            last_activity = time.monotonic()

            async def _open_message_item() -> None:
@@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                _failed_history = list(conversation_history)
+                _failed_history.append({"role": "user", "content": user_message})
+                if final_response_text or agent_error:
+                    _failed_history.append({
+                        "role": "assistant",
+                        "content": final_response_text or agent_error,
+                    })
+                _persist_response_snapshot(
+                    failed_env,
+                    conversation_history_snapshot=_failed_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.failed", {
                    "type": "response.failed",
                    "response": failed_env,
@@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                full_history = list(conversation_history)
+                full_history.append({"role": "user", "content": user_message})
+                if isinstance(result, dict) and result.get("messages"):
+                    full_history.extend(result["messages"])
+                else:
+                    full_history.append({"role": "assistant", "content": final_response_text})
+                _persist_response_snapshot(
+                    completed_env,
+                    conversation_history_snapshot=full_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.completed", {
                    "type": "response.completed",
                    "response": completed_env,
                })

-                # Persist for future chaining / GET retrieval, mirroring
-                # the batch path behavior.
-                if store:
-                    full_history = list(conversation_history)
-                    full_history.append({"role": "user", "content": user_message})
-                    if isinstance(result, dict) and result.get("messages"):
-                        full_history.extend(result["messages"])
-                    else:
-                        full_history.append({"role": "assistant", "content": final_response_text})
-                    self._response_store.put(response_id, {
-                        "response": completed_env,
-                        "conversation_history": full_history,
-                        "instructions": instructions,
-                        "session_id": session_id,
-                    })
-                    if conversation:
-                        self._response_store.set_conversation(conversation, response_id)
-
        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
+            _persist_incomplete_if_needed()
            # Client disconnected — interrupt the agent so it stops
            # making upstream LLM calls, then cancel the task.
            agent = agent_ref[0] if agent_ref else None
@@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter):
                except (asyncio.CancelledError, Exception):
                    pass
            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
+        except asyncio.CancelledError:
+            # Server-side cancellation (e.g. shutdown, request timeout) —
+            # persist an incomplete snapshot so GET /v1/responses/{id} and
+            # previous_response_id chaining still work, then re-raise so the
+            # runtime's cancellation semantics are respected.
+            _persist_incomplete_if_needed()
+            agent = agent_ref[0] if agent_ref else None
+            if agent is not None:
+                try:
+                    agent.interrupt("SSE task cancelled")
+                except Exception:
+                    pass
+            if not agent_task.done():
+                agent_task.cancel()
+            logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
+            raise

        return response

@@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
    return None


-def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
+def _split_host_port(value: str) -> tuple[str, int | None]:
+    raw = str(value or "").strip()
+    if not raw:
+        return "", None
+    if "://" in raw:
+        parsed = urlsplit(raw)
+        return (parsed.hostname or "").lower().rstrip("."), parsed.port
+    if raw.startswith("[") and "]" in raw:
+        host, _, rest = raw[1:].partition("]")
+        port = None
+        if rest.startswith(":") and rest[1:].isdigit():
+            port = int(rest[1:])
+        return host.lower().rstrip("."), port
+    if raw.count(":") == 1:
+        host, _, maybe_port = raw.rpartition(":")
+        if maybe_port.isdigit():
+            return host.lower().rstrip("."), int(maybe_port)
+    return raw.lower().strip("[]").rstrip("."), None
+
+
+def _no_proxy_entries() -> list[str]:
+    entries: list[str] = []
+    for key in ("NO_PROXY", "no_proxy"):
+        raw = os.environ.get(key, "")
+        entries.extend(part.strip() for part in raw.split(",") if part.strip())
+    return entries
+
+
+def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
+    token = str(entry or "").strip().lower()
+    if not token:
+        return False
+    if token == "*":
+        return True
+
+    token_host, token_port = _split_host_port(token)
+    if token_port is not None and port is not None and token_port != port:
+        return False
+    if token_port is not None and port is None:
+        return False
+    if not token_host:
+        return False
+
+    try:
+        network = ipaddress.ip_network(token_host, strict=False)
+        try:
+            return ipaddress.ip_address(host) in network
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    try:
+        token_ip = ipaddress.ip_address(token_host)
+        try:
+            return ipaddress.ip_address(host) == token_ip
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    if token_host.startswith("*."):
+        suffix = token_host[1:]
+        return host.endswith(suffix)
+    if token_host.startswith("."):
+        return host == token_host[1:] or host.endswith(token_host)
+    return host == token_host or host.endswith(f".{token_host}")
+
+
+def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
+    """Return True when NO_PROXY/no_proxy matches at least one target host.
+
+    Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
+    CIDR ranges, optional host:port entries, and ``*``.
+    """
+    entries = _no_proxy_entries()
+    if not entries or not target_hosts:
+        return False
+    if isinstance(target_hosts, str):
+        candidates = [target_hosts]
+    else:
+        candidates = list(target_hosts)
+    for candidate in candidates:
+        host, port = _split_host_port(str(candidate))
+        if not host:
+            continue
+        if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
+            return True
+    return False
+
+
+def resolve_proxy_url(
+    platform_env_var: str | None = None,
+    *,
+    target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
+) -> str | None:
    """Return a proxy URL from env vars, or macOS system proxy.

    Check order:
@@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
      1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
      2. macOS system proxy via ``scutil --proxy`` (auto-detect)

-    Returns *None* if no proxy is found.
+    Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
+    of ``target_hosts``.
    """
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
-    return normalize_proxy_url(_detect_macos_system_proxy())
+    detected = normalize_proxy_url(_detect_macos_system_proxy())
+    if detected and should_bypass_proxy(target_hosts):
+        return None
+    return detected


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:

 class BlueBubblesAdapter(BasePlatformAdapter):
    platform = Platform.BLUEBUBBLES
+    SUPPORTS_MESSAGE_EDITING = False
    MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH

    def __init__(self, config: PlatformConfig):
@@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    # Text sending
    # ------------------------------------------------------------------

+    @staticmethod
+    def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
+        # Use the base splitter but skip pagination indicators — iMessage
+        # bubbles flow naturally without "(1/3)" suffixes.
+        chunks = BasePlatformAdapter.truncate_message(content, max_length)
+        return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
+
    async def send(
        self,
        chat_id: str,
@@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        text = strip_markdown(content or "")
+        text = self.format_message(content)
        if not text:
            return SendResult(success=False, error="BlueBubbles send requires text")
-        chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
+        # Split on paragraph breaks first (double newlines) so each thought
+        # becomes its own iMessage bubble, then truncate any that are still
+        # too long.
+        paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
+        chunks: List[str] = []
+        for para in (paragraphs or [text]):
+            if len(para) <= self.MAX_MESSAGE_LENGTH:
+                chunks.append(para)
+            else:
+                chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
        last = SendResult(success=True)
        for chunk in chunks:
            guid = await self._resolve_chat_guid(chat_id)
@@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )

+            proxy_targets = ["api.telegram.org", *fallback_ips]
+            proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
            if fallback_ips and not proxy_url and not disable_fallback:
                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
 _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]


-def _resolve_proxy_url() -> str | None:
+def _resolve_proxy_url(target_hosts=None) -> str | None:
    # Delegate to shared implementation (env vars + macOS system proxy detection)
    from gateway.platforms.base import resolve_proxy_url
-    return resolve_proxy_url("TELEGRAM_PROXY")
+    return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)


 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):

    def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
        self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
-        proxy_url = _resolve_proxy_url()
+        proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
        if proxy_url and "proxy" not in transport_kwargs:
            transport_kwargs["proxy"] = proxy_url
        self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
@@ -298,50 +298,16 @@ from gateway.restart import (
 )


-def _normalize_whatsapp_identifier(value: str) -> str:
-    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
-    return (
-        str(value or "")
-        .strip()
-        .replace("+", "", 1)
-        .split(":", 1)[0]
-        .split("@", 1)[0]
-    )
+from gateway.whatsapp_identity import (
+    canonical_whatsapp_identifier as _canonical_whatsapp_identifier,  # noqa: F401
+    expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
+    normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
+)


-def _expand_whatsapp_auth_aliases(identifier: str) -> set:
-    """Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
-    normalized = _normalize_whatsapp_identifier(identifier)
-    if not normalized:
-        return set()
-
-    session_dir = _hermes_home / "whatsapp" / "session"
-    resolved = set()
-    queue = [normalized]
-
-    while queue:
-        current = queue.pop(0)
-        if not current or current in resolved:
-            continue
-
-        resolved.add(current)
-        for suffix in ("", "_reverse"):
-            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
-            if not mapping_path.exists():
-                continue
-            try:
-                mapped = _normalize_whatsapp_identifier(
-                    json.loads(mapping_path.read_text(encoding="utf-8"))
-                )
-            except Exception:
-                continue
-            if mapped and mapped not in resolved:
-                queue.append(mapped)
-
-    return resolved
-
 logger = logging.getLogger(__name__)

+
 # Sentinel placed into _running_agents immediately when a session starts
 # processing, *before* any await.  Prevents a second message for the same
 # session from bypassing the "already running" guard during the async gap
@@ -3037,6 +3003,7 @@ class GatewayRunner:
            Platform.QQBOT: "QQ_ALLOWED_USERS",
        }
        platform_group_env_map = {
+            Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS",
            Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
        }
        platform_allow_all_map = {
@@ -3093,7 +3060,7 @@ class GatewayRunner:
        # Check platform-specific and global allowlists
        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
        group_allowlist = ""
-        if source.chat_type == "group":
+        if source.chat_type in {"group", "forum"}:
            group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip()
        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()

@@ -3102,7 +3069,7 @@ class GatewayRunner:
            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")

        # Some platforms authorize group traffic by chat ID rather than sender ID.
-        if group_allowlist and source.chat_type == "group" and source.chat_id:
+        if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id:
            allowed_group_ids = {
                chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip()
            }
@@ -3624,6 +3591,10 @@ class GatewayRunner:
                    if self._queue_during_drain_enabled()
                    else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
                )
+            if self._busy_input_mode == "queue":
+                logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20])
+                self._queue_or_replace_pending_event(_quick_key, event)
+                return None
            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
@@ -5688,9 +5659,17 @@ class GatewayRunner:
                        lines = [f"Model switched to `{result.new_model}`"]
                        lines.append(f"Provider: {plabel}")
                        mi = result.model_info
+                        from hermes_cli.model_switch import resolve_display_context_length
+                        ctx = resolve_display_context_length(
+                            result.new_model,
+                            result.target_provider,
+                            base_url=result.base_url or current_base_url or "",
+                            api_key=result.api_key or current_api_key or "",
+                            model_info=mi,
+                        )
+                        if ctx:
+                            lines.append(f"Context: {ctx:,} tokens")
                        if mi:
-                            if mi.context_window:
-                                lines.append(f"Context: {mi.context_window:,} tokens")
                            if mi.max_output:
                                lines.append(f"Max output: {mi.max_output:,} tokens")
                            if mi.has_cost_data():
@@ -5824,28 +5803,25 @@ class GatewayRunner:
        lines = [f"Model switched to `{result.new_model}`"]
        lines.append(f"Provider: {provider_label}")

-        # Rich metadata from models.dev
+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry.
        mi = result.model_info
+        from hermes_cli.model_switch import resolve_display_context_length
+        ctx = resolve_display_context_length(
+            result.new_model,
+            result.target_provider,
+            base_url=result.base_url or current_base_url or "",
+            api_key=result.api_key or current_api_key or "",
+            model_info=mi,
+        )
+        if ctx:
+            lines.append(f"Context: {ctx:,} tokens")
        if mi:
-            if mi.context_window:
-                lines.append(f"Context: {mi.context_window:,} tokens")
            if mi.max_output:
                lines.append(f"Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                lines.append(f"Cost: {mi.format_cost()}")
            lines.append(f"Capabilities: {mi.format_capabilities()}")
-        else:
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or current_base_url,
-                    api_key=result.api_key or current_api_key,
-                    provider=result.target_provider,
-                )
-                lines.append(f"Context: {ctx:,} tokens")
-            except Exception:
-                pass

        # Cache notice
        cache_enabled = (
@@ -7257,13 +7233,19 @@ class GatewayRunner:
                logger.debug("Failed to list titled sessions: %s", e)
                return f"Could not list sessions: {e}"

-        # Resolve the name to a session ID
+        # Resolve the name to a session ID.
        target_id = self._session_db.resolve_session_by_title(name)
        if not target_id:
            return (
                f"No session found matching '**{name}**'.\n"
                "Use `/resume` with no arguments to see available sessions."
            )
+        # Compression creates child continuations that hold the live transcript.
+        # Follow that chain so gateway /resume matches CLI behavior (#15000).
+        try:
+            target_id = self._session_db.resolve_resume_session_id(target_id)
+        except Exception as e:
+            logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e)

        # Check if already on that session
        current_entry = self.session_store.get_or_create_session(source)
@@ -60,6 +60,10 @@ from .config import (
    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
    HomeChannel,
 )
+from .whatsapp_identity import (
+    canonical_whatsapp_identifier,
+    normalize_whatsapp_identifier,
+)


@dataclass
@@ -281,6 +285,18 @@ def build_session_context_prompt(
            "Do not promise to perform these actions. If the user asks, explain "
            "that you can only read messages sent directly to you and respond."
        )
+    elif context.source.platform == Platform.BLUEBUBBLES:
+        lines.append("")
+        lines.append(
+            "**Platform notes:** You are responding via iMessage. "
+            "Keep responses short and conversational — think texts, not essays. "
+            "Structure longer replies as separate short thoughts, each separated "
+            "by a blank line (double newline). Each block between blank lines "
+            "will be delivered as its own iMessage bubble, so write accordingly: "
+            "one idea per bubble, 1–3 sentences each. "
+            "If the user needs a detailed answer, give the short version first "
+            "and offer to elaborate."
+        )

    # Connected platforms
    platforms_list = ["local (files on this machine)"]
@@ -518,15 +534,24 @@ def build_session_key(
    """
    platform = source.platform.value
    if source.chat_type == "dm":
-        if source.chat_id:
+        dm_chat_id = source.chat_id
+        if source.platform == Platform.WHATSAPP:
+            dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
+
+        if dm_chat_id:
            if source.thread_id:
-                return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{source.chat_id}"
+                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
+            return f"agent:main:{platform}:dm:{dm_chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
        return f"agent:main:{platform}:dm"

    participant_id = source.user_id_alt or source.user_id
+    if participant_id and source.platform == Platform.WHATSAPP:
+        # Same JID/LID-flip bug as the DM case: without canonicalisation, a
+        # single group member gets two isolated per-user sessions when the
+        # bridge reshuffles alias forms.
+        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
    key_parts = ["agent:main", platform, source.chat_type]

    if source.chat_id:
@@ -0,0 +1,135 @@
+"""Shared helpers for canonicalising WhatsApp sender identity.
+
+WhatsApp's bridge can surface the same human under two different JID shapes
+within a single conversation:
+
+- LID form: ``999999999999999@lid``
+- Phone form: ``15551234567@s.whatsapp.net``
+
+Both the authorisation path (:mod:`gateway.run`) and the session-key path
+(:mod:`gateway.session`) need to collapse these aliases to a single stable
+identity. This module is the single source of truth for that resolution so
+the two paths can never drift apart.
+
+Public helpers:
+
+- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
+  down to the bare numeric identifier.
+- :func:`canonical_whatsapp_identifier` — walk the bridge's
+  ``lid-mapping-*.json`` files and return a stable canonical identity
+  across phone/LID variants.
+- :func:`expand_whatsapp_aliases` — return the full alias set for an
+  identifier. Used by authorisation code that needs to match any known
+  form of a sender against an allow-list.
+
+Plugins that need per-sender behaviour on WhatsApp (role-based routing,
+per-contact authorisation, policy gating in a gateway hook) should use
+``canonical_whatsapp_identifier`` so their bookkeeping lines up with
+Hermes' own session keys.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Set
+
+from hermes_constants import get_hermes_home
+
+
+def normalize_whatsapp_identifier(value: str) -> str:
+    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
+
+    Accepts any of the identifier shapes the WhatsApp bridge may emit:
+    ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
+    ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
+    Returns just the numeric identifier (``"60123456789"``) suitable for
+    equality comparisons.
+
+    Useful for plugins that want to match sender IDs against
+    user-supplied config (phone numbers in ``config.yaml``) without
+    worrying about which variant the bridge happens to deliver.
+    """
+    return (
+        str(value or "")
+        .strip()
+        .replace("+", "", 1)
+        .split(":", 1)[0]
+        .split("@", 1)[0]
+    )
+
+
+def expand_whatsapp_aliases(identifier: str) -> Set[str]:
+    """Resolve WhatsApp phone/LID aliases via bridge session mapping files.
+
+    Returns the set of all identifiers transitively reachable through the
+    bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
+    starting from ``identifier``. The result always includes the
+    normalized input itself, so callers can safely ``in`` check against
+    the return value without a separate fallback branch.
+
+    Returns an empty set if ``identifier`` normalizes to empty.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return set()
+
+    session_dir = get_hermes_home() / "whatsapp" / "session"
+    resolved: Set[str] = set()
+    queue = [normalized]
+
+    while queue:
+        current = queue.pop(0)
+        if not current or current in resolved:
+            continue
+
+        resolved.add(current)
+        for suffix in ("", "_reverse"):
+            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
+            if not mapping_path.exists():
+                continue
+            try:
+                mapped = normalize_whatsapp_identifier(
+                    json.loads(mapping_path.read_text(encoding="utf-8"))
+                )
+            except Exception:
+                continue
+            if mapped and mapped not in resolved:
+                queue.append(mapped)
+
+    return resolved
+
+
+def canonical_whatsapp_identifier(identifier: str) -> str:
+    """Return a stable WhatsApp sender identity across phone-JID/LID variants.
+
+    WhatsApp may surface the same person under either a phone-format JID
+    (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
+    applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
+    member inside a group chat — both represent a user identity, and the
+    bridge may flip between the two for the same human.
+
+    This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
+    files, walks the mapping transitively, and picks the shortest
+    (numeric-preferred) alias as the canonical identity.
+    :func:`gateway.session.build_session_key` uses this for both WhatsApp
+    DM chat_ids and WhatsApp group participant_ids, so callers get the
+    same session-key identity Hermes itself uses.
+
+    Plugins that need per-sender behaviour (role-based routing,
+    authorisation, per-contact policy) should use this so their
+    bookkeeping lines up with Hermes' session bookkeeping even when
+    the bridge reshuffles aliases.
+
+    Returns an empty string if ``identifier`` normalizes to empty. If no
+    mapping files exist yet (fresh bridge install), returns the
+    normalized input unchanged.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return ""
+
+    # expand_whatsapp_aliases always includes `normalized` itself in the
+    # returned set, so the min() below degrades gracefully to `normalized`
+    # when no lid-mapping files are present.
+    aliases = expand_whatsapp_aliases(normalized)
+    return min(aliases, key=lambda candidate: (len(candidate), candidate))
@@ -743,7 +743,18 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:

    try:
        raw = json.loads(auth_file.read_text())
-    except Exception:
+    except Exception as exc:
+        corrupt_path = auth_file.with_suffix(".json.corrupt")
+        try:
+            import shutil
+            shutil.copy2(auth_file, corrupt_path)
+        except Exception:
+            pass
+        logger.warning(
+            "auth: failed to parse %s (%s) — starting with empty store. "
+            "Corrupt file preserved at %s",
+            auth_file, exc, corrupt_path,
+        )
        return {"version": AUTH_STORE_VERSION, "providers": {}}

    if isinstance(raw, dict) and (
@@ -126,6 +126,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True, args_hint="[name]"),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
+    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
+               cli_only=True, args_hint="[queue|interrupt|status]",
+               subcommands=("queue", "interrupt", "status")),

    # Tools & Skills
    CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
@@ -6715,9 +6715,15 @@ def cmd_dashboard(args):
    try:
        import fastapi  # noqa: F401
        import uvicorn  # noqa: F401
-    except ImportError:
-        print("Web UI dependencies not installed.")
-        print(f"Install them with:  {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
+    except ImportError as e:
+        print("Web UI dependencies not installed (need fastapi + uvicorn).")
+        print(
+            f"Re-install the package into this interpreter so metadata updates apply:\n"
+            f"  cd {PROJECT_ROOT}\n"
+            f"  {sys.executable} -m pip install -e .\n"
+            "If `pip` is missing in this venv, use:  uv pip install -e ."
+        )
+        print(f"Import error: {e}")
        sys.exit(1)

    if "HERMES_WEB_DIST" not in os.environ:
@@ -6726,11 +6732,13 @@ def cmd_dashboard(args):

    from hermes_cli.web_server import start_server

+    embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
    start_server(
        host=args.host,
        port=args.port,
        open_browser=not args.no_open,
        allow_public=getattr(args, "insecure", False),
+        embedded_chat=embedded_chat,
    )


@@ -8916,6 +8924,14 @@ Examples:
        action="store_true",
        help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
    )
+    dashboard_parser.add_argument(
+        "--tui",
+        action="store_true",
+        help=(
+            "Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). "
+            "Alternatively set HERMES_DASHBOARD_TUI=1."
+        ),
+    )
    dashboard_parser.set_defaults(func=cmd_dashboard)

    # =========================================================================
@@ -527,6 +527,42 @@ def _resolve_alias_fallback(
    return None


+def resolve_display_context_length(
+    model: str,
+    provider: str,
+    base_url: str = "",
+    api_key: str = "",
+    model_info: Optional[ModelInfo] = None,
+) -> Optional[int]:
+    """Resolve the context length to show in /model output.
+
+    models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai)
+    but provider-enforced limits can be lower (e.g. Codex OAuth caps the
+    same slug at 272k). The authoritative source is
+    ``agent.model_metadata.get_model_context_length`` which already knows
+    about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
+    rest.
+
+    Prefer the provider-aware value; fall back to ``model_info.context_window``
+    only if the resolver returns nothing.
+    """
+    try:
+        from agent.model_metadata import get_model_context_length
+        ctx = get_model_context_length(
+            model,
+            base_url=base_url or "",
+            api_key=api_key or "",
+            provider=provider or None,
+        )
+        if ctx:
+            return int(ctx)
+    except Exception:
+        pass
+    if model_info is not None and model_info.context_window:
+        return int(model_info.context_window)
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Core model-switching pipeline
 # ---------------------------------------------------------------------------
@@ -42,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
    ("openrouter/elephant-alpha",       "free"),
-    ("openai/gpt-5.4",                  ""),
+    ("openai/gpt-5.5",                  ""),
    ("openai/gpt-5.4-mini",             ""),
    ("xiaomi/mimo-v2.5-pro",             ""),
    ("xiaomi/mimo-v2.5",                 ""),
@@ -65,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
    ("arcee-ai/trinity-large-preview:free", "free"),
    ("arcee-ai/trinity-large-thinking",  ""),
-    ("openai/gpt-5.4-pro",              ""),
+    ("openai/gpt-5.5-pro",              ""),
    ("openai/gpt-5.4-nano",             ""),
 ]

@@ -120,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
-        "openai/gpt-5.4",
+        "openai/gpt-5.5",
        "openai/gpt-5.4-mini",
        "openai/gpt-5.3-codex",
        "google/gemini-3-pro-preview",
@@ -139,7 +139,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
        "arcee-ai/trinity-large-thinking",
-        "openai/gpt-5.4-pro",
+        "openai/gpt-5.5-pro",
        "openai/gpt-5.4-nano",
    ],
    # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
@@ -0,0 +1,229 @@
+"""PTY bridge for `hermes dashboard` chat tab.
+
+Wraps a child process behind a pseudo-terminal so its ANSI output can be
+streamed to a browser-side terminal emulator (xterm.js) and typed
+keystrokes can be fed back in.  The only caller today is the
+``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
+
+Design constraints:
+
+* **POSIX-only.**  Hermes Agent supports Windows exclusively via WSL, which
+  exposes a native POSIX PTY via ``openpty(3)``.  Native Windows Python
+  has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
+  install/platform message so the dashboard can render a banner instead of
+  crashing.
+* **Zero Node dependency on the server side.**  We use :mod:`ptyprocess`,
+  which is a pure-Python wrapper around the OS calls.  The browser talks
+  to the same ``hermes --tui`` binary it would launch from the CLI, so
+  every TUI feature (slash popover, model picker, tool rows, markdown,
+  skin engine, clarify/sudo/approval prompts) ships automatically.
+* **Byte-safe I/O.**  Reads and writes go through the PTY master fd
+  directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
+  streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
+  mid-read.
+"""
+
+from __future__ import annotations
+
+import errno
+import fcntl
+import os
+import select
+import signal
+import struct
+import sys
+import termios
+import time
+from typing import Optional, Sequence
+
+try:
+    import ptyprocess  # type: ignore
+    _PTY_AVAILABLE = not sys.platform.startswith("win")
+except ImportError:  # pragma: no cover - dev env without ptyprocess
+    ptyprocess = None  # type: ignore
+    _PTY_AVAILABLE = False
+
+
+__all__ = ["PtyBridge", "PtyUnavailableError"]
+
+
+class PtyUnavailableError(RuntimeError):
+    """Raised when a PTY cannot be created on this platform.
+
+    Today this means native Windows (no ConPTY bindings) or a dev
+    environment missing the ``ptyprocess`` dependency.  The dashboard
+    surfaces the message to the user as a chat-tab banner.
+    """
+
+
+class PtyBridge:
+    """Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
+
+    Not thread-safe.  A single bridge is owned by the WebSocket handler
+    that spawned it; the reader runs in an executor thread while writes
+    happen on the event-loop thread.  Both sides are OK because the
+    kernel PTY is the actual synchronization point — we never call
+    :mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
+    ``os.write`` on the master fd, which is safe.
+    """
+
+    def __init__(self, proc: "ptyprocess.PtyProcess"):  # type: ignore[name-defined]
+        self._proc = proc
+        self._fd: int = proc.fd
+        self._closed = False
+
+    # -- lifecycle --------------------------------------------------------
+
+    @classmethod
+    def is_available(cls) -> bool:
+        """True if a PTY can be spawned on this platform."""
+        return bool(_PTY_AVAILABLE)
+
+    @classmethod
+    def spawn(
+        cls,
+        argv: Sequence[str],
+        *,
+        cwd: Optional[str] = None,
+        env: Optional[dict] = None,
+        cols: int = 80,
+        rows: int = 24,
+    ) -> "PtyBridge":
+        """Spawn ``argv`` behind a new PTY and return a bridge.
+
+        Raises :class:`PtyUnavailableError` if the platform can't host a
+        PTY.  Raises :class:`FileNotFoundError` or :class:`OSError` for
+        ordinary exec failures (missing binary, bad cwd, etc.).
+        """
+        if not _PTY_AVAILABLE:
+            if sys.platform.startswith("win"):
+                raise PtyUnavailableError(
+                    "Pseudo-terminals are unavailable on this platform. "
+                    "Hermes Agent supports Windows only via WSL."
+                )
+            if ptyprocess is None:
+                raise PtyUnavailableError(
+                    "The `ptyprocess` package is missing. "
+                    "Install with: pip install ptyprocess "
+                    "(or pip install -e '.[pty]')."
+                )
+            raise PtyUnavailableError("Pseudo-terminals are unavailable.")
+        # Let caller-supplied env fully override inheritance; if they pass
+        # None we inherit the server's env (same semantics as subprocess).
+        spawn_env = os.environ.copy() if env is None else env
+        proc = ptyprocess.PtyProcess.spawn(  # type: ignore[union-attr]
+            list(argv),
+            cwd=cwd,
+            env=spawn_env,
+            dimensions=(rows, cols),
+        )
+        return cls(proc)
+
+    @property
+    def pid(self) -> int:
+        return int(self._proc.pid)
+
+    def is_alive(self) -> bool:
+        if self._closed:
+            return False
+        try:
+            return bool(self._proc.isalive())
+        except Exception:
+            return False
+
+    # -- I/O --------------------------------------------------------------
+
+    def read(self, timeout: float = 0.2) -> Optional[bytes]:
+        """Read up to 64 KiB of raw bytes from the PTY master.
+
+        Returns:
+            * bytes — zero or more bytes of child output
+            * empty bytes (``b""``) — no data available within ``timeout``
+            * None — child has exited and the master fd is at EOF
+
+        Never blocks longer than ``timeout`` seconds.  Safe to call after
+        :meth:`close`; returns ``None`` in that case.
+        """
+        if self._closed:
+            return None
+        try:
+            readable, _, _ = select.select([self._fd], [], [], timeout)
+        except (OSError, ValueError):
+            return None
+        if not readable:
+            return b""
+        try:
+            data = os.read(self._fd, 65536)
+        except OSError as exc:
+            # EIO on Linux = slave side closed.  EBADF = already closed.
+            if exc.errno in (errno.EIO, errno.EBADF):
+                return None
+            raise
+        if not data:
+            return None
+        return data
+
+    def write(self, data: bytes) -> None:
+        """Write raw bytes to the PTY master (i.e. the child's stdin)."""
+        if self._closed or not data:
+            return
+        # os.write can return a short write under load; loop until drained.
+        view = memoryview(data)
+        while view:
+            try:
+                n = os.write(self._fd, view)
+            except OSError as exc:
+                if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
+                    return
+                raise
+            if n <= 0:
+                return
+            view = view[n:]
+
+    def resize(self, cols: int, rows: int) -> None:
+        """Forward a terminal resize to the child via ``TIOCSWINSZ``."""
+        if self._closed:
+            return
+        # struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
+        winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
+        try:
+            fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
+        except OSError:
+            pass
+
+    # -- teardown ---------------------------------------------------------
+
+    def close(self) -> None:
+        """Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
+
+        Idempotent.  Reaping the child is important so we don't leak
+        zombies across the lifetime of the dashboard process.
+        """
+        if self._closed:
+            return
+        self._closed = True
+
+        # SIGHUP is the conventional "your terminal went away" signal.
+        # We escalate if the child ignores it.
+        for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
+            if not self._proc.isalive():
+                break
+            try:
+                self._proc.kill(sig)
+            except Exception:
+                pass
+            deadline = time.monotonic() + 0.5
+            while self._proc.isalive() and time.monotonic() < deadline:
+                time.sleep(0.02)
+
+        try:
+            self._proc.close(force=True)
+        except Exception:
+            pass
+
+    # Context-manager sugar — handy in tests and ad-hoc scripts.
+    def __enter__(self) -> "PtyBridge":
+        return self
+
+    def __exit__(self, *_exc) -> None:
+        self.close()
@@ -368,13 +368,9 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Spotify Web API",
-                "tag": "PKCE OAuth — run `hermes auth spotify` after this",
-                "env_vars": [
-                    {"key": "HERMES_SPOTIFY_CLIENT_ID", "prompt": "Spotify app client_id",
-                     "url": "https://developer.spotify.com/dashboard"},
-                    {"key": "HERMES_SPOTIFY_REDIRECT_URI", "prompt": "Redirect URI (must be allow-listed in your Spotify app)",
-                     "default": "http://127.0.0.1:43827/spotify/callback"},
-                ],
+                "tag": "PKCE OAuth — opens the setup wizard",
+                "env_vars": [],
+                "post_setup": "spotify",
            },
        ],
    },
@@ -478,6 +474,35 @@ def _run_post_setup(post_setup_key: str):
            _print_warning("    kittentts install timed out (>5min)")
            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")

+    elif post_setup_key == "spotify":
+        # Run the full `hermes auth spotify` flow — if the user has no
+        # client_id yet, this drops them into the interactive wizard
+        # (opens the Spotify dashboard, prompts for client_id, persists
+        # to ~/.hermes/.env), then continues straight into PKCE. If they
+        # already have an app, it skips the wizard and just does OAuth.
+        from types import SimpleNamespace
+        try:
+            from hermes_cli.auth import login_spotify_command
+        except Exception as exc:
+            _print_warning(f"    Could not load Spotify auth: {exc}")
+            _print_info("    Run manually: hermes auth spotify")
+            return
+        _print_info("    Starting Spotify login...")
+        try:
+            login_spotify_command(SimpleNamespace(
+                client_id=None, redirect_uri=None, scope=None,
+                no_browser=False, timeout=None,
+            ))
+            _print_success("    Spotify authenticated")
+        except SystemExit as exc:
+            # User aborted the wizard, or OAuth failed — don't fail the
+            # toolset enable; they can retry with `hermes auth spotify`.
+            _print_warning(f"    Spotify login did not complete: {exc}")
+            _print_info("    Run later: hermes auth spotify")
+        except Exception as exc:
+            _print_warning(f"    Spotify login failed: {exc}")
+            _print_info("    Run manually: hermes auth spotify")
+
    elif post_setup_key == "rl_training":
        try:
            __import__("tinker_atropos")
@@ -692,6 +717,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
    if not isinstance(existing_toolsets, list):
        existing_toolsets = []
+    existing_toolsets = [str(ts) for ts in existing_toolsets]

    # Preserve any entries that are NOT configurable toolsets and NOT platform
    # defaults (i.e. only MCP server names should be preserved)
@@ -699,6 +725,8 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
        entry for entry in existing_toolsets
        if entry not in configurable_keys and entry not in platform_default_keys
    }
+    if "no_mcp" not in enabled_toolset_keys:
+        preserved_entries.discard("no_mcp")

    # Merge preserved entries with new enabled toolsets
    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
@@ -49,7 +49,7 @@ from hermes_cli.config import (
 from gateway.status import get_running_pid, read_runtime_status

 try:
-    from fastapi import FastAPI, HTTPException, Request
+    from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
    from fastapi.middleware.cors import CORSMiddleware
    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
    from fastapi.staticfiles import StaticFiles
@@ -73,6 +73,10 @@ app = FastAPI(title="Hermes Agent", version=__version__)
 _SESSION_TOKEN = secrets.token_urlsafe(32)
 _SESSION_HEADER_NAME = "X-Hermes-Session-Token"

+# In-browser Chat tab (/chat, /api/pty, …).  Off unless ``hermes dashboard --tui``
+# or HERMES_DASHBOARD_TUI=1.  Set from :func:`start_server`.
+_DASHBOARD_EMBEDDED_CHAT_ENABLED = False
+
 # Simple rate limiter for the reveal endpoint
 _reveal_timestamps: List[float] = []
 _REVEAL_MAX_PER_WINDOW = 5
@@ -283,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
    "display.busy_input_mode": {
        "type": "select",
        "description": "Input behavior while agent is running",
-        "options": ["queue", "interrupt", "block"],
+        "options": ["interrupt", "queue"],
    },
    "memory.provider": {
        "type": "select",
@@ -1529,26 +1533,30 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
        with urllib.request.urlopen(req, timeout=20) as resp:
            result = json.loads(resp.read().decode())
    except Exception as e:
-        sess["status"] = "error"
-        sess["error_message"] = f"Token exchange failed: {e}"
+        with _oauth_sessions_lock:
+            sess["status"] = "error"
+            sess["error_message"] = f"Token exchange failed: {e}"
        return {"ok": False, "status": "error", "message": sess["error_message"]}

    access_token = result.get("access_token", "")
    refresh_token = result.get("refresh_token", "")
    expires_in = int(result.get("expires_in") or 3600)
    if not access_token:
-        sess["status"] = "error"
-        sess["error_message"] = "No access token returned"
+        with _oauth_sessions_lock:
+            sess["status"] = "error"
+            sess["error_message"] = "No access token returned"
        return {"ok": False, "status": "error", "message": sess["error_message"]}

    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
    try:
        _save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms)
    except Exception as e:
-        sess["status"] = "error"
-        sess["error_message"] = f"Save failed: {e}"
+        with _oauth_sessions_lock:
+            sess["status"] = "error"
+            sess["error_message"] = f"Save failed: {e}"
        return {"ok": False, "status": "error", "message": sess["error_message"]}
-    sess["status"] = "approved"
+    with _oauth_sessions_lock:
+        sess["status"] = "approved"
    _log.info("oauth/pkce: anthropic login completed (session=%s)", session_id)
    return {"ok": True, "status": "approved"}

@@ -2263,6 +2271,329 @@ async def get_usage_analytics(days: int = 30):
        db.close()


+# ---------------------------------------------------------------------------
+# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab.
+#
+# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
+# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
+# WebSocket.  The browser renders the ANSI through xterm.js (see
+# web/src/pages/ChatPage.tsx).
+#
+# Auth: ``?token=<session_token>`` query param (browsers can't set
+# Authorization on the WS upgrade).  Same ephemeral ``_SESSION_TOKEN`` as
+# REST.  Localhost-only — we defensively reject non-loopback clients even
+# though uvicorn binds to 127.0.0.1.
+# ---------------------------------------------------------------------------
+
+import re
+import asyncio
+
+from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
+
+_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
+_PTY_READ_CHUNK_TIMEOUT = 0.2
+_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Starlette's TestClient reports the peer as "testclient"; treat it as
+# loopback so tests don't need to rewrite request scope.
+_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
+
+# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
+# and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
+# the chat tab generates on mount; entries auto-evict when the last subscriber
+# drops AND the publisher has disconnected.
+_event_channels: dict[str, set] = {}
+_event_lock = asyncio.Lock()
+
+
+def _resolve_chat_argv(
+    resume: Optional[str] = None,
+    sidecar_url: Optional[str] = None,
+) -> tuple[list[str], Optional[str], Optional[dict]]:
+    """Resolve the argv + cwd + env for the chat PTY.
+
+    Default: whatever ``hermes --tui`` would run.  Tests monkeypatch this
+    function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``)
+    so nothing has to build Node or the TUI bundle.
+
+    Session resume is propagated via the ``HERMES_TUI_RESUME`` env var —
+    matching what ``hermes_cli.main._launch_tui`` does for the CLI path.
+    Appending ``--resume <id>`` to argv doesn't work because ``ui-tui`` does
+    not parse its argv.
+
+    `sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so
+    the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the
+    dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`).
+    """
+    from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
+
+    argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
+    env: Optional[dict] = None
+
+    if resume or sidecar_url:
+        env = os.environ.copy()
+
+        if resume:
+            env["HERMES_TUI_RESUME"] = resume
+
+        if sidecar_url:
+            env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
+
+    return list(argv), str(cwd) if cwd else None, env
+
+
+def _build_sidecar_url(channel: str) -> Optional[str]:
+    """ws:// URL the PTY child should publish events to, or None when unbound."""
+    host = getattr(app.state, "bound_host", None)
+    port = getattr(app.state, "bound_port", None)
+
+    if not host or not port:
+        return None
+
+    netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}"
+    qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
+
+    return f"ws://{netloc}/api/pub?{qs}"
+
+
+async def _broadcast_event(channel: str, payload: str) -> None:
+    """Fan out one publisher frame to every subscriber on `channel`."""
+    async with _event_lock:
+        subs = list(_event_channels.get(channel, ()))
+
+    for sub in subs:
+        try:
+            await sub.send_text(payload)
+        except Exception:
+            # Subscriber went away mid-send; the /api/events finally clause
+            # will remove it from the registry on its next iteration.
+            pass
+
+
+def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
+    """Return the channel id from the query string or None if invalid."""
+    channel = ws.query_params.get("channel", "")
+
+    return channel if _VALID_CHANNEL_RE.match(channel) else None
+
+
+@app.websocket("/api/pty")
+async def pty_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    # --- auth + loopback check (before accept so we can close cleanly) ---
+    token = ws.query_params.get("token", "")
+    expected = _SESSION_TOKEN
+    if not hmac.compare_digest(token.encode(), expected.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    await ws.accept()
+
+    # --- spawn PTY ------------------------------------------------------
+    resume = ws.query_params.get("resume") or None
+    channel = _channel_or_close_code(ws)
+    sidecar_url = _build_sidecar_url(channel) if channel else None
+
+    try:
+        argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url)
+    except SystemExit as exc:
+        # _make_tui_argv calls sys.exit(1) when node/npm is missing.
+        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+
+
+    try:
+        bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
+    except PtyUnavailableError as exc:
+        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+    except (FileNotFoundError, OSError) as exc:
+        await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+
+    loop = asyncio.get_running_loop()
+
+    # --- reader task: PTY master → WebSocket ----------------------------
+    async def pump_pty_to_ws() -> None:
+        while True:
+            chunk = await loop.run_in_executor(
+                None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
+            )
+            if chunk is None:  # EOF
+                return
+            if not chunk:  # no data this tick; yield control and retry
+                await asyncio.sleep(0)
+                continue
+            try:
+                await ws.send_bytes(chunk)
+            except Exception:
+                return
+
+    reader_task = asyncio.create_task(pump_pty_to_ws())
+
+    # --- writer loop: WebSocket → PTY master ----------------------------
+    try:
+        while True:
+            msg = await ws.receive()
+            msg_type = msg.get("type")
+            if msg_type == "websocket.disconnect":
+                break
+            raw = msg.get("bytes")
+            if raw is None:
+                text = msg.get("text")
+                raw = text.encode("utf-8") if isinstance(text, str) else b""
+            if not raw:
+                continue
+
+            # Resize escape is consumed locally, never written to the PTY.
+            match = _RESIZE_RE.match(raw)
+            if match and match.end() == len(raw):
+                cols = int(match.group(1))
+                rows = int(match.group(2))
+                bridge.resize(cols=cols, rows=rows)
+                continue
+
+            bridge.write(raw)
+    except WebSocketDisconnect:
+        pass
+    finally:
+        reader_task.cancel()
+        try:
+            await reader_task
+        except (asyncio.CancelledError, Exception):
+            pass
+        bridge.close()
+
+
+# ---------------------------------------------------------------------------
+# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab.
+#
+# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the
+# dashboard can render structured metadata (model badge, tool-call sidebar,
+# slash launcher, session info) alongside the xterm.js terminal that PTY
+# already paints. Both transports bind to the same session id when one is
+# active, so a tool.start emitted by the agent fans out to both sinks.
+# ---------------------------------------------------------------------------
+
+
+@app.websocket("/api/ws")
+async def gateway_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    from tui_gateway.ws import handle_ws
+
+    await handle_ws(ws)
+
+
+# ---------------------------------------------------------------------------
+# /api/pub + /api/events — chat-tab event broadcast.
+#
+# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by
+# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every
+# dispatcher emit through it.  The dashboard fans those frames out to any
+# subscriber that opened /api/events on the same channel id.  This is what
+# gives the React sidebar its tool-call feed without breaking the PTY
+# child's stdio handshake with Ink.
+# ---------------------------------------------------------------------------
+
+
+@app.websocket("/api/pub")
+async def pub_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    channel = _channel_or_close_code(ws)
+    if not channel:
+        await ws.close(code=4400)
+        return
+
+    await ws.accept()
+
+    try:
+        while True:
+            await _broadcast_event(channel, await ws.receive_text())
+    except WebSocketDisconnect:
+        pass
+
+
+@app.websocket("/api/events")
+async def events_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    channel = _channel_or_close_code(ws)
+    if not channel:
+        await ws.close(code=4400)
+        return
+
+    await ws.accept()
+
+    async with _event_lock:
+        _event_channels.setdefault(channel, set()).add(ws)
+
+    try:
+        while True:
+            # Subscribers don't speak — the receive() just blocks until
+            # disconnect so the connection stays open as long as the
+            # browser holds it.
+            await ws.receive_text()
+    except WebSocketDisconnect:
+        pass
+    finally:
+        async with _event_lock:
+            subs = _event_channels.get(channel)
+
+            if subs is not None:
+                subs.discard(ws)
+
+                if not subs:
+                    _event_channels.pop(channel, None)
+
+
 def mount_spa(application: FastAPI):
    """Mount the built SPA. Falls back to index.html for client-side routing.

@@ -2284,8 +2615,10 @@ def mount_spa(application: FastAPI):
    def _serve_index():
        """Return index.html with the session token injected."""
        html = _index_path.read_text()
+        chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false"
        token_script = (
-            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";</script>'
+            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";'
+            f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};</script>"
        )
        html = html.replace("</head>", f"{token_script}</head>", 1)
        return HTMLResponse(
@@ -2798,10 +3131,15 @@ def start_server(
    port: int = 9119,
    open_browser: bool = True,
    allow_public: bool = False,
+    *,
+    embedded_chat: bool = False,
 ):
    """Start the web UI server."""
    import uvicorn

+    global _DASHBOARD_EMBEDDED_CHAT_ENABLED
+    _DASHBOARD_EMBEDDED_CHAT_ENABLED = embedded_chat
+
    _LOCALHOST = ("127.0.0.1", "localhost", "::1")
    if host not in _LOCALHOST and not allow_public:
        raise SystemExit(
@@ -2817,7 +3155,10 @@ def start_server(

    # Record the bound host so host_header_middleware can validate incoming
    # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
+    # bound_port is also stashed so /api/pty can build the back-WS URL the
+    # PTY child uses to publish events to the dashboard sidebar.
    app.state.bound_host = host
+    app.state.bound_port = port

    if open_browser:
        import webbrowser
@@ -464,9 +464,9 @@ def _coerce_number(value: str, integer_only: bool = False):
        f = float(value)
    except (ValueError, OverflowError):
        return value
-    # Guard against inf/nan before int() conversion
+    # Guard against inf/nan — not JSON-serializable, keep original string
    if f != f or f == float("inf") or f == float("-inf"):
-        return f
+        return value
    # If it looks like an integer (no fractional part), return int
    if f == int(f):
        return int(f)
@@ -156,7 +156,7 @@
      for entry in "''${ENTRIES[@]}"; do
        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
-        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --rebuild --print-build-logs 2>&1)
        STATUS=$?
        if [ "$STATUS" -eq 0 ]; then
          echo "    ok"
@@ -4,7 +4,7 @@ let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
+    hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
@@ -78,6 +78,16 @@ termux = [
 ]
 dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
 feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
+google = [
+  # Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
+  # Sheets, Docs).  Declared here so packagers (Nix, Homebrew) ship them with
+  # the [all] extra and users don't hit runtime `pip install` paths that fail
+  # in environments without pip (e.g. Nix-managed Python).
+  "google-api-python-client>=2.100,<3",
+  "google-auth-oauthlib>=1.0,<2",
+  "google-auth-httplib2>=0.2,<1",
+]
+# `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
 web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
@@ -109,6 +119,7 @@ all = [
  "hermes-agent[voice]",
  "hermes-agent[dingtalk]",
  "hermes-agent[feishu]",
+  "hermes-agent[google]",
  "hermes-agent[mistral]",
  "hermes-agent[bedrock]",
  "hermes-agent[web]",
@@ -502,6 +502,48 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
    return found


+def _escape_invalid_chars_in_json_strings(raw: str) -> str:
+    """Escape unescaped control chars inside JSON string values.
+
+    Walks the raw JSON character-by-character, tracking whether we are
+    inside a double-quoted string. Inside strings, replaces literal
+    control characters (0x00-0x1F) that aren't already part of an escape
+    sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
+    else.
+
+    Ported from #12093 — complements the other repair passes in
+    ``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
+    not enough (e.g. llama.cpp backends that emit literal apostrophes or
+    tabs alongside other malformations).
+    """
+    out: list[str] = []
+    in_string = False
+    i = 0
+    n = len(raw)
+    while i < n:
+        ch = raw[i]
+        if in_string:
+            if ch == "\\" and i + 1 < n:
+                # Already-escaped char — pass through as-is
+                out.append(ch)
+                out.append(raw[i + 1])
+                i += 2
+                continue
+            if ch == '"':
+                in_string = False
+                out.append(ch)
+            elif ord(ch) < 0x20:
+                out.append(f"\\u{ord(ch):04x}")
+            else:
+                out.append(ch)
+        else:
+            if ch == '"':
+                in_string = True
+            out.append(ch)
+        i += 1
+    return "".join(out)
+
+
 def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
    """Attempt to repair malformed tool_call argument JSON.

@@ -523,6 +565,23 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
        logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
        return "{}"

+    # Repair pass 0: llama.cpp backends sometimes emit literal control
+    # characters (tabs, newlines) inside JSON string values. json.loads
+    # with strict=False accepts these and lets us re-serialise the
+    # result into wire-valid JSON without any string surgery. This is
+    # the most common local-model repair case (#12068).
+    try:
+        parsed = json.loads(raw_stripped, strict=False)
+        reserialised = json.dumps(parsed, separators=(",", ":"))
+        if reserialised != raw_stripped:
+            logger.warning(
+                "Repaired unescaped control chars in tool_call arguments for %s",
+                tool_name,
+            )
+        return reserialised
+    except (json.JSONDecodeError, TypeError, ValueError):
+        pass
+
    # Attempt common JSON repairs
    fixed = raw_stripped
    # 1. Strip trailing commas before } or ]
@@ -557,6 +616,21 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
    except json.JSONDecodeError:
        pass

+    # Repair pass 4: escape unescaped control chars inside JSON strings,
+    # then retry. Catches cases where strict=False alone fails because
+    # other malformations are present too.
+    try:
+        escaped = _escape_invalid_chars_in_json_strings(fixed)
+        if escaped != fixed:
+            json.loads(escaped)
+            logger.warning(
+                "Repaired control-char-laced tool_call arguments for %s: %s → %s",
+                tool_name, raw_stripped[:80], escaped[:80],
+            )
+            return escaped
+    except (json.JSONDecodeError, TypeError, ValueError):
+        pass
+
    # Last resort: replace with empty object so the API request doesn't
    # crash the entire session.
    logger.warning(
@@ -740,6 +814,11 @@ class AIAgent:
    for AI models that support function calling.
    """

+    _TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER = (
+        "[hermes-agent: tool call arguments were corrupted in this session and "
+        "have been dropped to keep the conversation alive. See issue #15236.]"
+    )
+
    @property
    def base_url(self) -> str:
        return self._base_url
@@ -1437,6 +1516,8 @@ class AIAgent:
        
        # Track conversation messages for session logging
        self._session_messages: List[Dict[str, Any]] = []
+        self._memory_write_origin = "assistant_tool"
+        self._memory_write_context = "foreground"
        
        # Cached system prompt -- built once per session, only rebuilt on compression
        self._cached_system_prompt: Optional[str] = None
@@ -2231,6 +2312,34 @@ class AIAgent:
            except Exception:
                logger.debug("status_callback error in _emit_status", exc_info=True)

+    def _emit_warning(self, message: str) -> None:
+        """Emit a user-visible warning through the same status plumbing.
+
+        Unlike debug logs, these warnings are meant for degraded side paths
+        such as auxiliary compression or memory flushes where the main turn can
+        continue but the user needs to know something important failed.
+        """
+        try:
+            self._vprint(f"{self.log_prefix}{message}", force=True)
+        except Exception:
+            pass
+        if self.status_callback:
+            try:
+                self.status_callback("warn", message)
+            except Exception:
+                logger.debug("status_callback error in _emit_warning", exc_info=True)
+
+    def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
+        """Surface a compact warning for failed auxiliary work."""
+        try:
+            detail = self._summarize_api_error(exc)
+        except Exception:
+            detail = str(exc)
+        detail = (detail or exc.__class__.__name__).strip()
+        if len(detail) > 220:
+            detail = detail[:217].rstrip() + "..."
+        self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
+
    def _current_main_runtime(self) -> Dict[str, str]:
        """Return the live main runtime for session-scoped auxiliary routing."""
        return {
@@ -3047,7 +3156,10 @@ class AIAgent:
                        quiet_mode=True,
                        platform=self.platform,
                        provider=self.provider,
+                        parent_session_id=self.session_id,
                    )
+                    review_agent._memory_write_origin = "background_review"
+                    review_agent._memory_write_context = "background_review"
                    review_agent._memory_store = self._memory_store
                    review_agent._memory_enabled = self._memory_enabled
                    review_agent._user_profile_enabled = self._user_profile_enabled
@@ -3081,7 +3193,8 @@ class AIAgent:
                            pass

            except Exception as e:
-                logger.debug("Background memory/skill review failed: %s", e)
+                logger.warning("Background memory/skill review failed: %s", e)
+                self._emit_auxiliary_failure("background review", e)
            finally:
                # Close all resources (httpx client, subprocesses, etc.) so
                # GC doesn't try to clean them up on a dead asyncio event
@@ -3095,6 +3208,32 @@ class AIAgent:
        t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
        t.start()

+    def _build_memory_write_metadata(
+        self,
+        *,
+        write_origin: Optional[str] = None,
+        execution_context: Optional[str] = None,
+        task_id: Optional[str] = None,
+        tool_call_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Build provenance metadata for external memory-provider mirrors."""
+        metadata: Dict[str, Any] = {
+            "write_origin": write_origin or getattr(self, "_memory_write_origin", "assistant_tool"),
+            "execution_context": (
+                execution_context
+                or getattr(self, "_memory_write_context", "foreground")
+            ),
+            "session_id": self.session_id or "",
+            "parent_session_id": self._parent_session_id or "",
+            "platform": self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+            "tool_name": "memory",
+        }
+        if task_id:
+            metadata["task_id"] = task_id
+        if tool_call_id:
+            metadata["tool_call_id"] = tool_call_id
+        return {k: v for k, v in metadata.items() if v not in (None, "")}
+
    def _apply_persist_user_message_override(self, messages: List[Dict]) -> None:
        """Rewrite the current-turn user message before persistence/return.

@@ -4023,6 +4162,49 @@ class AIAgent:
        except Exception:
            pass

+    def _sync_external_memory_for_turn(
+        self,
+        *,
+        original_user_message: Any,
+        final_response: Any,
+        interrupted: bool,
+    ) -> None:
+        """Mirror a completed turn into external memory providers.
+
+        Called at the end of ``run_conversation`` with the cleaned user
+        message (``original_user_message``) and the finalised assistant
+        response.  The external memory backend gets both ``sync_all`` (to
+        persist the exchange) and ``queue_prefetch_all`` (to start
+        warming context for the next turn) in one shot.
+
+        Uses ``original_user_message`` rather than ``user_message``
+        because the latter may carry injected skill content that bloats
+        or breaks provider queries.
+
+        Interrupted turns are skipped entirely (#15218).  A partial
+        assistant output, an aborted tool chain, or a mid-stream reset
+        is not durable conversational truth — mirroring it into an
+        external memory backend pollutes future recall with state the
+        user never saw completed.  The prefetch is gated on the same
+        flag: the user's next message is almost certainly a retry of
+        the same intent, and a prefetch keyed on the interrupted turn
+        would fire against stale context.
+
+        Normal completed turns still sync as before.  The whole body is
+        wrapped in ``try/except Exception`` because external memory
+        providers are strictly best-effort — a misconfigured or offline
+        backend must not block the user from seeing their response.
+        """
+        if interrupted:
+            return
+        if not (self._memory_manager and final_response and original_user_message):
+            return
+        try:
+            self._memory_manager.sync_all(original_user_message, final_response)
+            self._memory_manager.queue_prefetch_all(original_user_message)
+        except Exception:
+            pass
+
    def release_clients(self) -> None:
        """Release LLM client resources WITHOUT tearing down session tool state.

@@ -5432,6 +5614,26 @@ class AIAgent:
            self._try_refresh_anthropic_client_credentials()
        return self._anthropic_client.messages.create(**api_kwargs)

+    def _rebuild_anthropic_client(self) -> None:
+        """Rebuild the Anthropic client after an interrupt or stale call.
+
+        Handles both direct Anthropic and Bedrock-hosted Anthropic models
+        correctly — rebuilding with the Bedrock SDK when provider is bedrock,
+        rather than always falling back to build_anthropic_client() which
+        requires a direct Anthropic API key.
+        """
+        if getattr(self, "provider", None) == "bedrock":
+            from agent.anthropic_adapter import build_anthropic_bedrock_client
+            region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1"
+            self._anthropic_client = build_anthropic_bedrock_client(region)
+        else:
+            from agent.anthropic_adapter import build_anthropic_client
+            self._anthropic_client = build_anthropic_client(
+                self._anthropic_api_key,
+                getattr(self, "_anthropic_base_url", None),
+                timeout=get_provider_request_timeout(self.provider, self.model),
+            )
+
    def _interruptible_api_call(self, api_kwargs: dict):
        """
        Run the API call in a background thread so the main conversation loop
@@ -5467,12 +5669,21 @@ class AIAgent:
                    # bedrock responses like chat_completions responses.
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
+                        invalidate_runtime_client,
+                        is_stale_connection_error,
                        normalize_converse_response,
                    )
                    region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                    api_kwargs.pop("__bedrock_converse__", None)
                    client = _get_bedrock_runtime_client(region)
-                    raw_response = client.converse(**api_kwargs)
+                    try:
+                        raw_response = client.converse(**api_kwargs)
+                    except Exception as _bedrock_exc:
+                        # Evict the cached client on stale-connection failures
+                        # so the outer retry loop builds a fresh client/pool.
+                        if is_stale_connection_error(_bedrock_exc):
+                            invalidate_runtime_client(region)
+                        raise
                    result["response"] = normalize_converse_response(raw_response)
                else:
                    request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
@@ -5530,14 +5741,8 @@ class AIAgent:
                )
                try:
                    if self.api_mode == "anthropic_messages":
-                        from agent.anthropic_adapter import build_anthropic_client
-
                        self._anthropic_client.close()
-                        self._anthropic_client = build_anthropic_client(
-                            self._anthropic_api_key,
-                            getattr(self, "_anthropic_base_url", None),
-                            timeout=get_provider_request_timeout(self.provider, self.model),
-                        )
+                        self._rebuild_anthropic_client()
                    else:
                        rc = request_client_holder.get("client")
                        if rc is not None:
@@ -5562,14 +5767,8 @@ class AIAgent:
                # seed future retries.
                try:
                    if self.api_mode == "anthropic_messages":
-                        from agent.anthropic_adapter import build_anthropic_client
-
                        self._anthropic_client.close()
-                        self._anthropic_client = build_anthropic_client(
-                            self._anthropic_api_key,
-                            getattr(self, "_anthropic_base_url", None),
-                            timeout=get_provider_request_timeout(self.provider, self.model),
-                        )
+                        self._rebuild_anthropic_client()
                    else:
                        request_client = request_client_holder.get("client")
                        if request_client is not None:
@@ -5725,12 +5924,21 @@ class AIAgent:
                try:
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
+                        invalidate_runtime_client,
+                        is_stale_connection_error,
                        stream_converse_with_callbacks,
                    )
                    region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                    api_kwargs.pop("__bedrock_converse__", None)
                    client = _get_bedrock_runtime_client(region)
-                    raw_response = client.converse_stream(**api_kwargs)
+                    try:
+                        raw_response = client.converse_stream(**api_kwargs)
+                    except Exception as _bedrock_exc:
+                        # Evict the cached client on stale-connection failures
+                        # so the outer retry loop builds a fresh client/pool.
+                        if is_stale_connection_error(_bedrock_exc):
+                            invalidate_runtime_client(region)
+                        raise

                    def _on_text(text):
                        _fire_first()
@@ -5982,11 +6190,25 @@ class AIAgent:
                for idx in sorted(tool_calls_acc):
                    tc = tool_calls_acc[idx]
                    arguments = tc["function"]["arguments"]
+                    tool_name = tc["function"]["name"] or "?"
                    if arguments and arguments.strip():
                        try:
                            json.loads(arguments)
                        except json.JSONDecodeError:
-                            has_truncated_tool_args = True
+                            # Attempt repair before flagging as truncated.
+                            # Models like GLM-5.1 via Ollama produce trailing
+                            # commas, unclosed brackets, Python None, etc.
+                            # Without repair, these hit the truncation handler
+                            # and kill the session.  _repair_tool_call_arguments
+                            # returns "{}" for unrepairable args, which is far
+                            # better than a crashed session.
+                            repaired = _repair_tool_call_arguments(arguments, tool_name)
+                            if repaired != "{}":
+                                # Successfully repaired — use the fixed args
+                                arguments = repaired
+                            else:
+                                # Unrepairable — flag for truncation handling
+                                has_truncated_tool_args = True
                    mock_tool_calls.append(SimpleNamespace(
                        id=tc["id"],
                        type=tc["type"],
@@ -6410,14 +6632,8 @@ class AIAgent:
            if self._interrupt_requested:
                try:
                    if self.api_mode == "anthropic_messages":
-                        from agent.anthropic_adapter import build_anthropic_client
-
                        self._anthropic_client.close()
-                        self._anthropic_client = build_anthropic_client(
-                            self._anthropic_api_key,
-                            getattr(self, "_anthropic_base_url", None),
-                            timeout=get_provider_request_timeout(self.provider, self.model),
-                        )
+                        self._rebuild_anthropic_client()
                    else:
                        request_client = request_client_holder.get("client")
                        if request_client is not None:
@@ -7409,6 +7625,12 @@ class AIAgent:
            raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
            if raw_reasoning_content is not None:
                msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
+            elif msg.get("tool_calls") and self._needs_deepseek_tool_reasoning():
+                # DeepSeek thinking mode requires reasoning_content on every
+                # assistant tool-call message. Without it, replaying the
+                # persisted message causes HTTP 400. Include empty string
+                # as a defensive compatibility fallback (refs #15250).
+                msg["reasoning_content"] = ""

        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
            # Pass reasoning_details back unmodified so providers (OpenRouter,
@@ -7484,6 +7706,35 @@ class AIAgent:

        return msg

+    def _needs_kimi_tool_reasoning(self) -> bool:
+        """Return True when the current provider is Kimi / Moonshot thinking mode.
+
+        Kimi ``/coding`` and Moonshot thinking mode both require
+        ``reasoning_content`` on every assistant tool-call message; omitting
+        it causes the next replay to fail with HTTP 400.
+        """
+        return (
+            self.provider in {"kimi-coding", "kimi-coding-cn"}
+            or base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
+
+    def _needs_deepseek_tool_reasoning(self) -> bool:
+        """Return True when the current provider is DeepSeek thinking mode.
+
+        DeepSeek V4 thinking mode requires ``reasoning_content`` on every
+        assistant tool-call turn; omitting it causes HTTP 400 when the
+        message is replayed in a subsequent API request (#15250).
+        """
+        provider = (self.provider or "").lower()
+        model = (self.model or "").lower()
+        return (
+            provider == "deepseek"
+            or "deepseek" in model
+            or base_url_host_matches(self.base_url, "api.deepseek.com")
+        )
+
    def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None:
        """Copy provider-facing reasoning fields onto an API replay message."""
        if source_msg.get("role") != "assistant":
@@ -7499,13 +7750,14 @@ class AIAgent:
            api_msg["reasoning_content"] = normalized_reasoning
            return

-        kimi_requires_reasoning = (
-            self.provider in {"kimi-coding", "kimi-coding-cn"}
-            or base_url_host_matches(self.base_url, "api.kimi.com")
-            or base_url_host_matches(self.base_url, "moonshot.ai")
-            or base_url_host_matches(self.base_url, "moonshot.cn")
-        )
-        if kimi_requires_reasoning and source_msg.get("tool_calls"):
+        # Providers that require an echoed reasoning_content on every
+        # assistant tool-call turn. Detection logic lives in the per-provider
+        # helpers so both the creation path (_build_assistant_message) and
+        # this replay path stay in sync.
+        if source_msg.get("tool_calls") and (
+            self._needs_kimi_tool_reasoning()
+            or self._needs_deepseek_tool_reasoning()
+        ):
            api_msg["reasoning_content"] = ""

    @staticmethod
@@ -7536,6 +7788,115 @@ class AIAgent:
        ]
        return api_msg

+    @staticmethod
+    def _sanitize_tool_call_arguments(
+        messages: list,
+        *,
+        logger=None,
+        session_id: str = None,
+    ) -> int:
+        """Repair corrupted assistant tool-call argument JSON in-place."""
+        log = logger or logging.getLogger(__name__)
+        if not isinstance(messages, list):
+            return 0
+
+        repaired = 0
+        marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
+
+        def _prepend_marker(tool_msg: dict) -> None:
+            existing = tool_msg.get("content")
+            if isinstance(existing, str):
+                if not existing:
+                    tool_msg["content"] = marker
+                elif not existing.startswith(marker):
+                    tool_msg["content"] = f"{marker}\n{existing}"
+                return
+            if existing is None:
+                tool_msg["content"] = marker
+                return
+            try:
+                existing_text = json.dumps(existing)
+            except TypeError:
+                existing_text = str(existing)
+            tool_msg["content"] = f"{marker}\n{existing_text}"
+
+        message_index = 0
+        while message_index < len(messages):
+            msg = messages[message_index]
+            if not isinstance(msg, dict) or msg.get("role") != "assistant":
+                message_index += 1
+                continue
+
+            tool_calls = msg.get("tool_calls")
+            if not isinstance(tool_calls, list) or not tool_calls:
+                message_index += 1
+                continue
+
+            insert_at = message_index + 1
+            for tool_call in tool_calls:
+                if not isinstance(tool_call, dict):
+                    continue
+                function = tool_call.get("function")
+                if not isinstance(function, dict):
+                    continue
+
+                arguments = function.get("arguments")
+                if arguments is None or arguments == "":
+                    function["arguments"] = "{}"
+                    continue
+                if isinstance(arguments, str) and not arguments.strip():
+                    function["arguments"] = "{}"
+                    continue
+                if not isinstance(arguments, str):
+                    continue
+
+                try:
+                    json.loads(arguments)
+                except json.JSONDecodeError:
+                    tool_call_id = tool_call.get("id")
+                    function_name = function.get("name", "?")
+                    preview = arguments[:80]
+                    log.warning(
+                        "Corrupted tool_call arguments repaired before request "
+                        "(session=%s, message_index=%s, tool_call_id=%s, function=%s, preview=%r)",
+                        session_id or "-",
+                        message_index,
+                        tool_call_id or "-",
+                        function_name,
+                        preview,
+                    )
+                    function["arguments"] = "{}"
+
+                    existing_tool_msg = None
+                    scan_index = message_index + 1
+                    while scan_index < len(messages):
+                        candidate = messages[scan_index]
+                        if not isinstance(candidate, dict) or candidate.get("role") != "tool":
+                            break
+                        if candidate.get("tool_call_id") == tool_call_id:
+                            existing_tool_msg = candidate
+                            break
+                        scan_index += 1
+
+                    if existing_tool_msg is None:
+                        messages.insert(
+                            insert_at,
+                            {
+                                "role": "tool",
+                                "tool_call_id": tool_call_id,
+                                "content": marker,
+                            },
+                        )
+                        insert_at += 1
+                    else:
+                        _prepend_marker(existing_tool_msg)
+
+                    repaired += 1
+
+            message_index += 1
+
+        return repaired
+
    def _should_sanitize_tool_calls(self) -> bool:
        """Determine if tool_calls need sanitization for strict APIs.

@@ -7633,6 +7994,7 @@ class AIAgent:
                _flush_temperature = _fixed_temp
            else:
                _flush_temperature = 0.3
+            aux_error = None
            try:
                response = _call_llm(
                    task="flush_memories",
@@ -7642,14 +8004,19 @@ class AIAgent:
                    max_tokens=5120,
                    # timeout resolved from auxiliary.flush_memories.timeout config
                )
-            except RuntimeError:
+            except Exception as e:
+                aux_error = e
                _aux_available = False
                response = None

            if not _aux_available and self.api_mode == "codex_responses":
                # No auxiliary client -- use the Codex Responses path directly
                codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
+                _ct_flush = self._get_transport()
+                if _ct_flush is not None:
+                    codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
+                elif not codex_kwargs.get("tools"):
+                    codex_kwargs["tools"] = [memory_tool_def]
                if _flush_temperature is not None:
                    codex_kwargs["temperature"] = _flush_temperature
                else:
@@ -7681,11 +8048,37 @@ class AIAgent:
                    **api_kwargs, timeout=_get_task_timeout("flush_memories")
                )

+            if aux_error is not None:
+                logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
+                self._emit_auxiliary_failure("memory flush", aux_error)
+
+            def _openai_tool_calls(resp):
+                if resp is not None and hasattr(resp, "choices") and resp.choices:
+                    msg = getattr(resp.choices[0], "message", None)
+                    calls = getattr(msg, "tool_calls", None)
+                    if calls:
+                        return calls
+                return []
+
+            def _codex_output_tool_calls(resp):
+                calls = []
+                for item in getattr(resp, "output", []) or []:
+                    if getattr(item, "type", None) == "function_call":
+                        calls.append(SimpleNamespace(
+                            id=getattr(item, "call_id", None),
+                            type="function",
+                            function=SimpleNamespace(
+                                name=getattr(item, "name", ""),
+                                arguments=getattr(item, "arguments", "{}"),
+                            ),
+                        ))
+                return calls
+
            # Extract tool calls from the response, handling all API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not _aux_available:
                _ct_flush = self._get_transport()
-                _cnr_flush = _ct_flush.normalize_response(response)
+                _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
                if _cnr_flush and _cnr_flush.tool_calls:
                    tool_calls = [
                        SimpleNamespace(
@@ -7693,6 +8086,8 @@ class AIAgent:
                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
                        ) for tc in _cnr_flush.tool_calls
                    ]
+                else:
+                    tool_calls = _codex_output_tool_calls(response)
            elif self.api_mode == "anthropic_messages" and not _aux_available:
                _tfn = self._get_transport()
                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
@@ -7705,15 +8100,16 @@ class AIAgent:
                    ]
            elif self.api_mode in ("chat_completions", "bedrock_converse"):
                # chat_completions / bedrock — normalize through transport
-                _flush_result = self._get_transport().normalize_response(response)
-                if _flush_result.tool_calls:
+                _tfn = self._get_transport()
+                _flush_result = _tfn.normalize_response(response) if _tfn is not None else None
+                if _flush_result and _flush_result.tool_calls:
                    tool_calls = _flush_result.tool_calls
+                else:
+                    tool_calls = _openai_tool_calls(response)
            elif _aux_available and hasattr(response, "choices") and response.choices:
                # Auxiliary client returned OpenAI-shaped response while main
                # api_mode is codex/anthropic — extract tool_calls from .choices
-                _aux_msg = response.choices[0].message
-                if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
-                    tool_calls = _aux_msg.tool_calls
+                tool_calls = _openai_tool_calls(response)

            for tc in tool_calls:
                if tc.function.name == "memory":
@@ -7728,12 +8124,27 @@ class AIAgent:
                            old_text=args.get("old_text"),
                            store=self._memory_store,
                        )
+                        if self._memory_manager and args.get("action") in ("add", "replace"):
+                            try:
+                                self._memory_manager.on_memory_write(
+                                    args.get("action", ""),
+                                    flush_target,
+                                    args.get("content", ""),
+                                    metadata=self._build_memory_write_metadata(
+                                        write_origin="memory_flush",
+                                        execution_context="flush_memories",
+                                    ),
+                                )
+                            except Exception:
+                                pass
                        if not self.quiet_mode:
                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
                    except Exception as e:
-                        logger.debug("Memory flush tool call failed: %s", e)
+                        logger.warning("Memory flush tool call failed: %s", e)
+                        self._emit_auxiliary_failure("memory flush tool", e)
        except Exception as e:
-            logger.debug("Memory flush API call failed: %s", e)
+            logger.warning("Memory flush API call failed: %s", e)
+            self._emit_auxiliary_failure("memory flush", e)
        finally:
            # Strip flush artifacts: remove everything from the flush message onward.
            # Use sentinel marker instead of identity check for robustness.
@@ -7779,6 +8190,15 @@ class AIAgent:
            # focus_topic — fall back to calling without it.
            compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)

+        summary_error = getattr(self.context_compressor, "_last_summary_error", None)
+        if summary_error:
+            if getattr(self, "_last_compression_summary_warning", None) != summary_error:
+                self._last_compression_summary_warning = summary_error
+                self._emit_warning(
+                    f"⚠ Compression summary failed: {summary_error}. "
+                    "Inserted a fallback context marker."
+                )
+
        todo_snapshot = self._todo_store.format_for_injection()
        if todo_snapshot:
            compressed.append({"role": "user", "content": todo_snapshot})
@@ -7948,6 +8368,10 @@ class AIAgent:
                        function_args.get("action", ""),
                        target,
                        function_args.get("content", ""),
+                        metadata=self._build_memory_write_metadata(
+                            task_id=effective_task_id,
+                            tool_call_id=tool_call_id,
+                        ),
                    )
                except Exception:
                    pass
@@ -8459,6 +8883,10 @@ class AIAgent:
                            function_args.get("action", ""),
                            target,
                            function_args.get("content", ""),
+                            metadata=self._build_memory_write_metadata(
+                                task_id=effective_task_id,
+                                tool_call_id=getattr(tool_call, "id", None),
+                            ),
                        )
                    except Exception:
                        pass
@@ -8703,6 +9131,7 @@ class AIAgent:
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()
+                self._copy_reasoning_content_for_api(msg, api_msg)
                for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
                    api_msg.pop(internal_field, None)
                if _needs_sanitize:
@@ -9333,6 +9762,19 @@ class AIAgent:
            # Note: Reasoning is embedded in content via <think> tags for trajectory storage.
            # However, providers like Moonshot AI require a separate 'reasoning_content' field
            # on assistant messages with tool_calls. We handle both cases here.
+            request_logger = getattr(self, "logger", None) or logging.getLogger(__name__)
+            repaired_tool_calls = self._sanitize_tool_call_arguments(
+                messages,
+                logger=request_logger,
+                session_id=self.session_id,
+            )
+            if repaired_tool_calls > 0:
+                request_logger.info(
+                    "Sanitized %s corrupted tool_call arguments before request (session=%s)",
+                    repaired_tool_calls,
+                    self.session_id or "-",
+                )
+
            api_messages = []
            for idx, msg in enumerate(messages):
                api_msg = msg.copy()
@@ -12162,14 +12604,11 @@ class AIAgent:
            self._iters_since_skill = 0

        # External memory provider: sync the completed turn + queue next prefetch.
-        # Use original_user_message (clean input) — user_message may contain
-        # injected skill content that bloats / breaks provider queries.
-        if self._memory_manager and final_response and original_user_message:
-            try:
-                self._memory_manager.sync_all(original_user_message, final_response)
-                self._memory_manager.queue_prefetch_all(original_user_message)
-            except Exception:
-                pass
+        self._sync_external_memory_for_turn(
+            original_user_message=original_user_message,
+            final_response=final_response,
+            interrupted=interrupted,
+        )

        # Background memory/skill review — runs AFTER the response is delivered
        # so it never competes with the user's task for model attention.
@@ -48,6 +48,9 @@ AUTHOR_MAP = {
    "jefferson@heimdallstrategy.com": "Mind-Dragon",
    "130918800+devorun@users.noreply.github.com": "devorun",
    "maks.mir@yahoo.com": "say8hi",
+    "web3blind@users.noreply.github.com": "web3blind",
+    "julia@alexland.us": "alexg0bot",
+    "1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
    # contributors (from noreply pattern)
    "david.vv@icloud.com": "davidvv",
    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
@@ -59,14 +62,19 @@ AUTHOR_MAP = {
    "keifergu@tencent.com": "keifergu",
    "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
    "abner.the.foreman@agentmail.to": "Abnertheforeman",
+    "thomasgeorgevii09@gmail.com": "tochukwuada",
    "harryykyle1@gmail.com": "hharry11",
    "kshitijk4poor@gmail.com": "kshitijk4poor",
    "keira.voss94@gmail.com": "keiravoss94",
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
+    "simbamax99@gmail.com": "simbam99",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
    "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
+    "cyprian@ironin.pl": "iRonin",
    "valdi.jorge@gmail.com": "jvcl",
+    "q19dcp@gmail.com": "aj-nt",
+    "ebukau84@gmail.com": "UgwujaGeorge",
    "francip@gmail.com": "francip",
    "omni@comelse.com": "omnissiah-comelse",
    "oussama.redcode@gmail.com": "mavrickdeveloper",
@@ -106,6 +114,7 @@ AUTHOR_MAP = {
    "30841158+n-WN@users.noreply.github.com": "n-WN",
    "tsuijinglei@gmail.com": "hiddenpuppy",
    "jerome@clawwork.ai": "HiddenPuppy",
+    "jerome.benoit@sap.com": "jerome-benoit",
    "wysie@users.noreply.github.com": "Wysie",
    "leoyuan0099@gmail.com": "keyuyuan",
    "bxzt2006@163.com": "Only-Code-A",
@@ -200,6 +209,9 @@ AUTHOR_MAP = {
    "1434494126@qq.com": "5park1e",
    "158153005+5park1e@users.noreply.github.com": "5park1e",
    "innocarpe@gmail.com": "innocarpe",
+    "noreply@ked.com": "qike-ms",
+    "andrekurait@gmail.com": "AndreKurait",
+    "bsgdigital@users.noreply.github.com": "bsgdigital",
    "numman.ali@gmail.com": "nummanali",
    "rohithsaimidigudla@gmail.com": "whitehatjr1001",
    "0xNyk@users.noreply.github.com": "0xNyk",
@@ -490,6 +502,7 @@ AUTHOR_MAP = {
    "zhangxicen@example.com": "zhangxicen",
    "codex@openai.invalid": "teknium1",
    "screenmachine@gmail.com": "teknium1",
+    "chenzeshi@live.com": "chen1749144759",
 }


@@ -134,6 +134,7 @@ masks = processor.image_processor.post_process_masks(

 ### Model architecture

+<!-- ascii-guard-ignore -->
 ```
 SAM Architecture:
 ┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
@@ -144,6 +145,7 @@ SAM Architecture:
   Image Embeddings      Prompt Embeddings         Masks + IoU
   (computed once)       (per prompt)             predictions
 ```
+<!-- ascii-guard-ignore-end -->

 ### Model variants

@@ -0,0 +1,42 @@
+"""Resolve HERMES_HOME for standalone skill scripts.
+
+Skill scripts may run outside the Hermes process (e.g. system Python,
+nix env, CI) where ``hermes_constants`` is not importable.  This module
+provides the same ``get_hermes_home()`` and ``display_hermes_home()``
+contracts as ``hermes_constants`` without requiring it on ``sys.path``.
+
+When ``hermes_constants`` IS available it is used directly so that any
+future enhancements (profile resolution, Docker detection, etc.) are
+picked up automatically.  The fallback path replicates the core logic
+from ``hermes_constants.py`` using only the stdlib.
+
+All scripts under ``google-workspace/scripts/`` should import from here
+instead of duplicating the ``HERMES_HOME = Path(os.getenv(...))`` pattern.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+try:
+    from hermes_constants import display_hermes_home as display_hermes_home
+    from hermes_constants import get_hermes_home as get_hermes_home
+except (ModuleNotFoundError, ImportError):
+
+    def get_hermes_home() -> Path:
+        """Return the Hermes home directory (default: ~/.hermes).
+
+        Mirrors ``hermes_constants.get_hermes_home()``."""
+        val = os.environ.get("HERMES_HOME", "").strip()
+        return Path(val) if val else Path.home() / ".hermes"
+
+    def display_hermes_home() -> str:
+        """Return a user-friendly ``~/``-shortened display string.
+
+        Mirrors ``hermes_constants.display_hermes_home()``."""
+        home = get_hermes_home()
+        try:
+            return "~/" + str(home.relative_to(Path.home()))
+        except ValueError:
+            return str(home)
@@ -31,7 +31,14 @@ from datetime import datetime, timedelta, timezone
 from email.mime.text import MIMEText
 from pathlib import Path

-HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+# Ensure sibling modules (_hermes_home) are importable when run standalone.
+_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
+if _SCRIPTS_DIR not in sys.path:
+    sys.path.insert(0, _SCRIPTS_DIR)
+
+from _hermes_home import get_hermes_home
+
+HERMES_HOME = get_hermes_home()
 TOKEN_PATH = HERMES_HOME / "google_token.json"
 CLIENT_SECRET_PATH = HERMES_HOME / "google_client_secret.json"

@@ -10,9 +10,12 @@ import sys
 from datetime import datetime, timezone
 from pathlib import Path

+# Ensure sibling modules (_hermes_home) are importable when run standalone.
+_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
+if _SCRIPTS_DIR not in sys.path:
+    sys.path.insert(0, _SCRIPTS_DIR)

-def get_hermes_home() -> Path:
-    return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+from _hermes_home import get_hermes_home


 def get_token_path() -> Path:
@@ -21,6 +21,8 @@ Agent workflow:
  6. Run --check to verify. Done.
 """

+from __future__ import annotations  # allow PEP 604 `X | None` on Python 3.9+
+
 import argparse
 import json
 import os
@@ -28,13 +30,12 @@ import subprocess
 import sys
 from pathlib import Path

-try:
-    from hermes_constants import display_hermes_home, get_hermes_home
-except ModuleNotFoundError:
-    HERMES_AGENT_ROOT = Path(__file__).resolve().parents[4]
-    if HERMES_AGENT_ROOT.exists():
-        sys.path.insert(0, str(HERMES_AGENT_ROOT))
-    from hermes_constants import display_hermes_home, get_hermes_home
+# Ensure sibling modules (_hermes_home) are importable when run standalone.
+_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
+if _SCRIPTS_DIR not in sys.path:
+    sys.path.insert(0, _SCRIPTS_DIR)
+
+from _hermes_home import display_hermes_home, get_hermes_home

 HERMES_HOME = get_hermes_home()
 TOKEN_PATH = HERMES_HOME / "google_token.json"
@@ -111,7 +112,11 @@ def install_deps():
        return True
    except subprocess.CalledProcessError as e:
        print(f"ERROR: Failed to install dependencies: {e}")
-        print(f"Try manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
+        print(
+            "On environments without pip (e.g. Nix), install the optional extra instead:"
+        )
+        print("  pip install 'hermes-agent[google]'")
+        print(f"Or manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
        return False


@@ -22,6 +22,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target

 This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops.

+<!-- ascii-guard-ignore -->
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                    RESEARCH PAPER PIPELINE                  │
@@ -41,6 +42,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n
 │                                                             │
 └─────────────────────────────────────────────────────────────┘
 ```
+<!-- ascii-guard-ignore-end -->

 ---

@@ -1230,3 +1230,210 @@ class TestEmptyTextBlockFix:
        from agent.bedrock_adapter import _convert_content_to_converse
        blocks = _convert_content_to_converse("Hello")
        assert blocks[0]["text"] == "Hello"
+
+
+# ---------------------------------------------------------------------------
+# Stale-connection detection and per-region client invalidation
+# ---------------------------------------------------------------------------
+
+class TestInvalidateRuntimeClient:
+    """Per-region eviction used to discard dead/stale bedrock-runtime clients."""
+
+    def test_evicts_only_the_target_region(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            invalidate_runtime_client,
+            reset_client_cache,
+        )
+        reset_client_cache()
+        _bedrock_runtime_client_cache["us-east-1"] = "dead-client"
+        _bedrock_runtime_client_cache["us-west-2"] = "live-client"
+
+        evicted = invalidate_runtime_client("us-east-1")
+
+        assert evicted is True
+        assert "us-east-1" not in _bedrock_runtime_client_cache
+        assert _bedrock_runtime_client_cache["us-west-2"] == "live-client"
+
+    def test_returns_false_when_region_not_cached(self):
+        from agent.bedrock_adapter import invalidate_runtime_client, reset_client_cache
+        reset_client_cache()
+        assert invalidate_runtime_client("eu-west-1") is False
+
+
+class TestIsStaleConnectionError:
+    """Classifier that decides whether an exception warrants client eviction."""
+
+    def test_detects_botocore_connection_closed_error(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from botocore.exceptions import ConnectionClosedError
+        exc = ConnectionClosedError(endpoint_url="https://bedrock.example")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_botocore_endpoint_connection_error(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from botocore.exceptions import EndpointConnectionError
+        exc = EndpointConnectionError(endpoint_url="https://bedrock.example")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_botocore_read_timeout(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from botocore.exceptions import ReadTimeoutError
+        exc = ReadTimeoutError(endpoint_url="https://bedrock.example")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_urllib3_protocol_error(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from urllib3.exceptions import ProtocolError
+        exc = ProtocolError("Connection broken")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_library_internal_assertion_error(self):
+        """A bare AssertionError raised from inside urllib3/botocore signals
+        a corrupted connection-pool invariant and should trigger eviction."""
+        from agent.bedrock_adapter import is_stale_connection_error
+
+        # Fabricate an AssertionError whose traceback's last frame belongs
+        # to a module named "urllib3.connectionpool". We do this by exec'ing
+        # a tiny `assert False` under a fake globals dict — the resulting
+        # frame's ``f_globals["__name__"]`` is what the classifier inspects.
+        fake_globals = {"__name__": "urllib3.connectionpool"}
+        try:
+            exec("def _boom():\n    assert False\n_boom()", fake_globals)
+        except AssertionError as exc:
+            assert is_stale_connection_error(exc) is True
+        else:
+            pytest.fail("AssertionError not raised")
+
+    def test_detects_botocore_internal_assertion_error(self):
+        """Same as above but for a frame inside the botocore namespace."""
+        from agent.bedrock_adapter import is_stale_connection_error
+        fake_globals = {"__name__": "botocore.httpsession"}
+        try:
+            exec("def _boom():\n    assert False\n_boom()", fake_globals)
+        except AssertionError as exc:
+            assert is_stale_connection_error(exc) is True
+        else:
+            pytest.fail("AssertionError not raised")
+
+    def test_ignores_application_assertion_error(self):
+        """AssertionError from application code (not urllib3/botocore) should
+        NOT be classified as stale — those are real test/code bugs."""
+        from agent.bedrock_adapter import is_stale_connection_error
+        try:
+            assert False, "test-only"  # noqa: B011
+        except AssertionError as exc:
+            assert is_stale_connection_error(exc) is False
+
+    def test_ignores_unrelated_exceptions(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        assert is_stale_connection_error(ValueError("bad input")) is False
+        assert is_stale_connection_error(KeyError("missing")) is False
+
+
+class TestCallConverseInvalidatesOnStaleError:
+    """call_converse / call_converse_stream evict the cached client when the
+    boto3 call raises a stale-connection error — so the next invocation
+    reconnects instead of reusing the dead socket."""
+
+    def test_converse_evicts_client_on_stale_error(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse,
+            reset_client_cache,
+        )
+        from botocore.exceptions import ConnectionClosedError
+
+        reset_client_cache()
+        dead_client = MagicMock()
+        dead_client.converse.side_effect = ConnectionClosedError(
+            endpoint_url="https://bedrock.example",
+        )
+        _bedrock_runtime_client_cache["us-east-1"] = dead_client
+
+        with pytest.raises(ConnectionClosedError):
+            call_converse(
+                region="us-east-1",
+                model="anthropic.claude-3-sonnet-20240229-v1:0",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert "us-east-1" not in _bedrock_runtime_client_cache, (
+            "stale client should have been evicted so the retry reconnects"
+        )
+
+    def test_converse_stream_evicts_client_on_stale_error(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse_stream,
+            reset_client_cache,
+        )
+        from botocore.exceptions import ConnectionClosedError
+
+        reset_client_cache()
+        dead_client = MagicMock()
+        dead_client.converse_stream.side_effect = ConnectionClosedError(
+            endpoint_url="https://bedrock.example",
+        )
+        _bedrock_runtime_client_cache["us-east-1"] = dead_client
+
+        with pytest.raises(ConnectionClosedError):
+            call_converse_stream(
+                region="us-east-1",
+                model="anthropic.claude-3-sonnet-20240229-v1:0",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert "us-east-1" not in _bedrock_runtime_client_cache
+
+    def test_converse_does_not_evict_on_non_stale_error(self):
+        """Non-stale errors (e.g. ValidationException) leave the client cache alone."""
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse,
+            reset_client_cache,
+        )
+        from botocore.exceptions import ClientError
+
+        reset_client_cache()
+        live_client = MagicMock()
+        live_client.converse.side_effect = ClientError(
+            error_response={"Error": {"Code": "ValidationException", "Message": "bad"}},
+            operation_name="Converse",
+        )
+        _bedrock_runtime_client_cache["us-east-1"] = live_client
+
+        with pytest.raises(ClientError):
+            call_converse(
+                region="us-east-1",
+                model="anthropic.claude-3-sonnet-20240229-v1:0",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert _bedrock_runtime_client_cache.get("us-east-1") is live_client, (
+            "validation errors do not indicate a dead connection — keep the client"
+        )
+
+    def test_converse_leaves_successful_client_in_cache(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse,
+            reset_client_cache,
+        )
+
+        reset_client_cache()
+        live_client = MagicMock()
+        live_client.converse.return_value = {
+            "output": {"message": {"role": "assistant", "content": [{"text": "hi"}]}},
+            "stopReason": "end_turn",
+            "usage": {"inputTokens": 1, "outputTokens": 1, "totalTokens": 2},
+        }
+        _bedrock_runtime_client_cache["us-east-1"] = live_client
+
+        call_converse(
+            region="us-east-1",
+            model="anthropic.claude-3-sonnet-20240229-v1:0",
+            messages=[{"role": "user", "content": "hi"}],
+        )
+
+        assert _bedrock_runtime_client_cache.get("us-east-1") is live_client
@@ -376,17 +376,15 @@ class TestBedrockModelNameNormalization:
            "apac.anthropic.claude-haiku-4-5", preserve_dots=True
        ) == "apac.anthropic.claude-haiku-4-5"

-    def test_preserve_false_mangles_as_documented(self):
-        """Canary: with ``preserve_dots=False`` the function still
-        produces the broken all-hyphen form — this is the shape that
-        Bedrock rejected and that the fix avoids.  Keeping this test
-        locks in the existing behaviour of ``normalize_model_name`` so a
-        future refactor doesn't accidentally decouple the knob from its
-        effect."""
+    def test_bedrock_prefix_preserved_without_preserve_dots(self):
+        """Bedrock inference profile IDs are auto-detected by prefix and
+        always returned unmangled -- ``preserve_dots`` is irrelevant for
+        these IDs because the dots are namespace separators, not version
+        separators.  Regression for #12295."""
        from agent.anthropic_adapter import normalize_model_name
        assert normalize_model_name(
            "global.anthropic.claude-opus-4-7", preserve_dots=False
-        ) == "global-anthropic-claude-opus-4-7"
+        ) == "global.anthropic.claude-opus-4-7"

    def test_bare_foundation_model_id_preserved(self):
        """Non-inference-profile Bedrock IDs
@@ -422,12 +420,11 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
            f"{kwargs['model']!r}"
        )

-    def test_bedrock_model_mangled_without_preserve_dots(self):
-        """Inverse canary: without the flag, ``build_anthropic_kwargs``
-        still produces the broken form — so the fix in
-        ``_anthropic_preserve_dots`` is the load-bearing piece that
-        wires ``preserve_dots=True`` through to this builder for the
-        Bedrock case."""
+    def test_bedrock_model_preserved_without_preserve_dots(self):
+        """Bedrock inference profile IDs survive ``build_anthropic_kwargs``
+        even without ``preserve_dots=True`` -- the prefix auto-detection
+        in ``normalize_model_name`` is the load-bearing piece.
+        Regression for #12295."""
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="global.anthropic.claude-opus-4-7",
@@ -437,4 +434,157 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
            reasoning_config=None,
            preserve_dots=False,
        )
-        assert kwargs["model"] == "global-anthropic-claude-opus-4-7"
+        assert kwargs["model"] == "global.anthropic.claude-opus-4-7"
+
+
+class TestBedrockModelIdDetection:
+    """Tests for ``_is_bedrock_model_id`` and the auto-detection that
+    makes ``normalize_model_name`` preserve dots for Bedrock IDs
+    regardless of ``preserve_dots``.  Regression for #12295."""
+
+    def test_bare_bedrock_id_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("anthropic.claude-opus-4-7") is True
+
+    def test_regional_us_prefix_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("us.anthropic.claude-sonnet-4-5-v1:0") is True
+
+    def test_regional_global_prefix_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("global.anthropic.claude-opus-4-7") is True
+
+    def test_regional_eu_prefix_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("eu.anthropic.claude-sonnet-4-6") is True
+
+    def test_openrouter_format_not_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("claude-opus-4.6") is False
+
+    def test_bare_claude_not_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("claude-opus-4-7") is False
+
+    def test_bare_bedrock_id_preserved_without_flag(self):
+        """The primary bug from #12295: ``anthropic.claude-opus-4-7``
+        sent to bedrock-mantle via auxiliary clients that don't pass
+        ``preserve_dots=True``."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name(
+            "anthropic.claude-opus-4-7", preserve_dots=False
+        ) == "anthropic.claude-opus-4-7"
+
+    def test_openrouter_dots_still_converted(self):
+        """Non-Bedrock dotted model names must still be converted."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6"
+
+    def test_bare_bedrock_id_survives_build_kwargs(self):
+        """End-to-end: bare Bedrock ID through ``build_anthropic_kwargs``
+        without ``preserve_dots=True`` -- the auxiliary client path."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        kwargs = build_anthropic_kwargs(
+            model="anthropic.claude-opus-4-7",
+            messages=[{"role": "user", "content": "hi"}],
+            tools=None,
+            max_tokens=1024,
+            reasoning_config=None,
+            preserve_dots=False,
+        )
+        assert kwargs["model"] == "anthropic.claude-opus-4-7"
+
+
+# ---------------------------------------------------------------------------
+# auxiliary_client Bedrock resolution — fix for #13919
+# ---------------------------------------------------------------------------
+# Before the fix, resolve_provider_client("bedrock", ...) fell through to the
+# "unhandled auth_type" warning and returned (None, None), breaking all
+# auxiliary tasks (compression, memory, summarization) for Bedrock users.
+
+
+class TestAuxiliaryClientBedrockResolution:
+    """Verify resolve_provider_client handles Bedrock's aws_sdk auth type."""
+
+    def test_bedrock_returns_client_with_credentials(self, monkeypatch):
+        """With valid AWS credentials, Bedrock should return a usable client."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+        monkeypatch.setenv("AWS_REGION", "us-west-2")
+
+        mock_anthropic_bedrock = MagicMock()
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=mock_anthropic_bedrock):
+            from agent.auxiliary_client import resolve_provider_client, AnthropicAuxiliaryClient
+            client, model = resolve_provider_client("bedrock", None)
+
+        assert client is not None, (
+            "resolve_provider_client('bedrock') returned None — "
+            "aws_sdk auth type is not handled"
+        )
+        assert isinstance(client, AnthropicAuxiliaryClient)
+        assert model is not None
+        assert client.api_key == "aws-sdk"
+        assert "us-west-2" in client.base_url
+
+    def test_bedrock_returns_none_without_credentials(self, monkeypatch):
+        """Without AWS credentials, Bedrock should return (None, None) gracefully."""
+        with patch("agent.bedrock_adapter.has_aws_credentials", return_value=False):
+            from agent.auxiliary_client import resolve_provider_client
+            client, model = resolve_provider_client("bedrock", None)
+
+        assert client is None
+        assert model is None
+
+    def test_bedrock_uses_configured_region(self, monkeypatch):
+        """Bedrock client base_url should reflect AWS_REGION."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+        monkeypatch.setenv("AWS_REGION", "eu-central-1")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client
+            client, _ = resolve_provider_client("bedrock", None)
+
+        assert client is not None
+        assert "eu-central-1" in client.base_url
+
+    def test_bedrock_respects_explicit_model(self, monkeypatch):
+        """When caller passes an explicit model, it should be used."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client
+            _, model = resolve_provider_client(
+                "bedrock", "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
+            )
+
+        assert "claude-sonnet" in model
+
+    def test_bedrock_async_mode(self, monkeypatch):
+        """Async mode should return an AsyncAnthropicAuxiliaryClient."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client, AsyncAnthropicAuxiliaryClient
+            client, model = resolve_provider_client("bedrock", None, async_mode=True)
+
+        assert client is not None
+        assert isinstance(client, AsyncAnthropicAuxiliaryClient)
+
+    def test_bedrock_default_model_is_haiku(self, monkeypatch):
+        """Default auxiliary model for Bedrock should be Haiku (fast, cheap)."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client
+            _, model = resolve_provider_client("bedrock", None)
+
+        assert "haiku" in model.lower()
@@ -77,6 +77,13 @@ class FakeMemoryProvider(MemoryProvider):
        self.memory_writes.append((action, target, content))


+class MetadataMemoryProvider(FakeMemoryProvider):
+    """Provider that opts into write metadata."""
+
+    def on_memory_write(self, action, target, content, metadata=None):
+        self.memory_writes.append((action, target, content, metadata or {}))
+
+
 # ---------------------------------------------------------------------------
 # MemoryProvider ABC tests
 # ---------------------------------------------------------------------------
@@ -862,6 +869,51 @@ class TestOnMemoryWriteBridge:
        mgr.on_memory_write("add", "memory", "new fact")
        assert p.memory_writes == [("add", "memory", "new fact")]

+    def test_on_memory_write_metadata_passed_to_opt_in_provider(self):
+        """Providers that accept metadata receive structured write provenance."""
+        mgr = MemoryManager()
+        p = MetadataMemoryProvider("ext")
+        mgr.add_provider(p)
+
+        mgr.on_memory_write(
+            "add",
+            "memory",
+            "new fact",
+            metadata={
+                "write_origin": "assistant_tool",
+                "execution_context": "foreground",
+                "session_id": "sess-1",
+            },
+        )
+
+        assert p.memory_writes == [
+            (
+                "add",
+                "memory",
+                "new fact",
+                {
+                    "write_origin": "assistant_tool",
+                    "execution_context": "foreground",
+                    "session_id": "sess-1",
+                },
+            )
+        ]
+
+    def test_on_memory_write_metadata_keeps_legacy_provider_compatible(self):
+        """Old 3-arg providers keep working when the manager receives metadata."""
+        mgr = MemoryManager()
+        p = FakeMemoryProvider("ext")
+        mgr.add_provider(p)
+
+        mgr.on_memory_write(
+            "add",
+            "user",
+            "legacy provider fact",
+            metadata={"write_origin": "assistant_tool"},
+        )
+
+        assert p.memory_writes == [("add", "user", "legacy provider fact")]
+
    def test_on_memory_write_replace(self):
        """on_memory_write fires for 'replace' actions."""
        mgr = MemoryManager()
@@ -588,6 +588,57 @@ class TestGetModelContextLength:
        assert result == 200000


+# =========================================================================
+# Bedrock context resolution — must run BEFORE custom-endpoint probe
+# =========================================================================
+
+class TestBedrockContextResolution:
+    """Regression tests for Bedrock context-length resolution order.
+
+    Bug: because ``bedrock-runtime.<region>.amazonaws.com`` is not listed in
+    ``_URL_TO_PROVIDER``, ``_is_known_provider_base_url`` returned False and
+    the custom-endpoint probe at step 2 ran first — fetching ``/models`` from
+    Bedrock (which it doesn't serve), returning the 128K default-fallback
+    before execution ever reached the Bedrock branch.
+
+    Fix: promote the Bedrock branch ahead of the custom-endpoint probe.
+    """
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_bedrock_provider_returns_static_table_before_probe(self, mock_fetch):
+        """provider='bedrock' resolves via static table, bypasses /models probe."""
+        ctx = get_model_context_length(
+            "anthropic.claude-opus-4-v1:0",
+            provider="bedrock",
+            base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
+        )
+        # Must return the static Bedrock table value (200K for Claude),
+        # NOT DEFAULT_FALLBACK_CONTEXT (128K).
+        assert ctx == 200000
+        mock_fetch.assert_not_called()
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_bedrock_url_without_provider_hint(self, mock_fetch):
+        """bedrock-runtime host infers Bedrock even when provider is omitted."""
+        ctx = get_model_context_length(
+            "anthropic.claude-sonnet-4-v1:0",
+            base_url="https://bedrock-runtime.us-west-2.amazonaws.com",
+        )
+        assert ctx == 200000
+        mock_fetch.assert_not_called()
+
+    @patch("agent.model_metadata.fetch_endpoint_model_metadata")
+    def test_non_bedrock_url_still_probes(self, mock_fetch):
+        """Non-Bedrock hosts still reach the custom-endpoint probe."""
+        mock_fetch.return_value = {"some-model": {"context_length": 50000}}
+        ctx = get_model_context_length(
+            "some-model",
+            base_url="https://api.example.com/v1",
+        )
+        assert ctx == 50000
+        assert mock_fetch.called
+
+
 # =========================================================================
 # _strip_provider_prefix — Ollama model:tag vs provider:model
 # =========================================================================
@@ -0,0 +1,94 @@
+"""Tests for the /busy CLI command and busy-input-mode config handling."""
+
+import unittest
+from types import SimpleNamespace
+from unittest.mock import patch
+
+
+def _import_cli():
+    import hermes_cli.config as config_mod
+
+    if not hasattr(config_mod, "save_env_value_secure"):
+        config_mod.save_env_value_secure = lambda key, value: {
+            "success": True,
+            "stored_as": key,
+            "validated": False,
+        }
+
+    import cli as cli_mod
+
+    return cli_mod
+
+
+class TestHandleBusyCommand(unittest.TestCase):
+    def _make_cli(self, busy_input_mode="interrupt"):
+        return SimpleNamespace(
+            busy_input_mode=busy_input_mode,
+            agent=None,
+        )
+
+    def test_no_args_shows_status(self):
+        cli_mod = _import_cli()
+        stub = self._make_cli("queue")
+        with (
+            patch.object(cli_mod, "_cprint") as mock_cprint,
+            patch.object(cli_mod, "save_config_value") as mock_save,
+        ):
+            cli_mod.HermesCLI._handle_busy_command(stub, "/busy")
+
+        mock_save.assert_not_called()
+        printed = " ".join(str(c) for c in mock_cprint.call_args_list)
+        self.assertIn("queue", printed)
+        self.assertIn("interrupt", printed)
+
+    def test_queue_argument_sets_queue_mode_and_saves(self):
+        cli_mod = _import_cli()
+        stub = self._make_cli("interrupt")
+        with (
+            patch.object(cli_mod, "_cprint"),
+            patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
+        ):
+            cli_mod.HermesCLI._handle_busy_command(stub, "/busy queue")
+
+        self.assertEqual(stub.busy_input_mode, "queue")
+        mock_save.assert_called_once_with("display.busy_input_mode", "queue")
+
+    def test_interrupt_argument_sets_interrupt_mode_and_saves(self):
+        cli_mod = _import_cli()
+        stub = self._make_cli("queue")
+        with (
+            patch.object(cli_mod, "_cprint"),
+            patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
+        ):
+            cli_mod.HermesCLI._handle_busy_command(stub, "/busy interrupt")
+
+        self.assertEqual(stub.busy_input_mode, "interrupt")
+        mock_save.assert_called_once_with("display.busy_input_mode", "interrupt")
+
+    def test_invalid_argument_prints_usage(self):
+        cli_mod = _import_cli()
+        stub = self._make_cli()
+        with (
+            patch.object(cli_mod, "_cprint") as mock_cprint,
+            patch.object(cli_mod, "save_config_value") as mock_save,
+        ):
+            cli_mod.HermesCLI._handle_busy_command(stub, "/busy nonsense")
+
+        mock_save.assert_not_called()
+        printed = " ".join(str(c) for c in mock_cprint.call_args_list)
+        self.assertIn("Usage: /busy", printed)
+
+
+class TestBusyCommandRegistry(unittest.TestCase):
+    def test_busy_in_registry(self):
+        from hermes_cli.commands import COMMAND_REGISTRY
+
+        names = [c.name for c in COMMAND_REGISTRY]
+        assert "busy" in names
+
+    def test_busy_subcommands_documented(self):
+        from hermes_cli.commands import COMMAND_REGISTRY
+
+        busy = next(c for c in COMMAND_REGISTRY if c.name == "busy")
+        assert busy.args_hint == "[queue|interrupt|status]"
+        assert busy.category == "Configuration"
@@ -1374,6 +1374,139 @@ class TestResponsesStreaming:
                assert data["status"] == "completed"
                assert data["output"][-1]["content"][0]["text"] == "Stored response"

+    @pytest.mark.asyncio
+    async def test_stream_cancelled_persists_incomplete_snapshot(self, adapter):
+        """Server-side asyncio.CancelledError (shutdown, request timeout) must
+        still leave an ``incomplete`` snapshot in ResponseStore so
+        GET /v1/responses/{id} and previous_response_id chaining keep
+        working.  Regression for PR #15171 follow-up.
+
+        Calls _write_sse_responses directly so the test can await the
+        handler to completion (TestClient disconnection races the server
+        handler, which makes end-to-end assertion on the final stored
+        snapshot flaky).
+        """
+        # Build a minimal fake request + stream queue the writer understands.
+        fake_request = MagicMock()
+        fake_request.headers = {}
+
+        written_payloads: list = []
+
+        class _FakeStreamResponse:
+            async def prepare(self, req):
+                pass
+
+            async def write(self, payload):
+                written_payloads.append(payload)
+
+        # Patch web.StreamResponse for the duration of the writer call.
+        import gateway.platforms.api_server as api_mod
+        import queue as _q
+
+        stream_q: _q.Queue = _q.Queue()
+
+        async def _agent_coro():
+            # Feed one partial delta into the stream queue...
+            stream_q.put("partial output")
+            # ...then give the drain loop a moment to pick it up before
+            # raising CancelledError to simulate a server-side cancel.
+            await asyncio.sleep(0.01)
+            raise asyncio.CancelledError()
+
+        agent_task = asyncio.ensure_future(_agent_coro())
+        response_id = f"resp_{uuid.uuid4().hex[:28]}"
+
+        with patch.object(api_mod.web, "StreamResponse", return_value=_FakeStreamResponse()):
+            with pytest.raises(asyncio.CancelledError):
+                await adapter._write_sse_responses(
+                    request=fake_request,
+                    response_id=response_id,
+                    model="hermes-agent",
+                    created_at=int(time.time()),
+                    stream_q=stream_q,
+                    agent_task=agent_task,
+                    agent_ref=[None],
+                    conversation_history=[],
+                    user_message="will be cancelled",
+                    instructions=None,
+                    conversation=None,
+                    store=True,
+                    session_id=None,
+                )
+
+        # The in_progress snapshot was persisted on response.created,
+        # and the CancelledError handler must have updated it to
+        # ``incomplete`` with the partial text it saw.
+        stored = adapter._response_store.get(response_id)
+        assert stored is not None, "snapshot must be retrievable after cancellation"
+        assert stored["response"]["status"] == "incomplete"
+        # Partial text captured before cancel should be preserved.
+        output_text = "".join(
+            part.get("text", "")
+            for item in stored["response"].get("output", [])
+            if item.get("type") == "message"
+            for part in item.get("content", [])
+        )
+        assert "partial output" in output_text
+
+    @pytest.mark.asyncio
+    async def test_stream_client_disconnect_persists_incomplete_snapshot(self, adapter):
+        """Client disconnect (ConnectionResetError) during streaming must
+        persist an ``incomplete`` snapshot in ResponseStore.  Regression
+        for PR #15171."""
+        fake_request = MagicMock()
+        fake_request.headers = {}
+
+        write_call_count = {"n": 0}
+
+        class _DisconnectingStreamResponse:
+            async def prepare(self, req):
+                pass
+
+            async def write(self, payload):
+                # First two writes succeed (prepare + response.created).
+                # On the third write (a text delta), the "client"
+                # disconnects — simulate with ConnectionResetError.
+                write_call_count["n"] += 1
+                if write_call_count["n"] >= 3:
+                    raise ConnectionResetError("simulated client disconnect")
+
+        import gateway.platforms.api_server as api_mod
+        import queue as _q
+
+        stream_q: _q.Queue = _q.Queue()
+        stream_q.put("some streamed text")
+        stream_q.put(None)  # EOS sentinel
+
+        async def _agent_coro():
+            await asyncio.sleep(0.01)
+            return ({"final_response": "", "messages": [], "api_calls": 0},
+                    {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
+
+        agent_task = asyncio.ensure_future(_agent_coro())
+        response_id = f"resp_{uuid.uuid4().hex[:28]}"
+
+        with patch.object(api_mod.web, "StreamResponse", return_value=_DisconnectingStreamResponse()):
+            await adapter._write_sse_responses(
+                request=fake_request,
+                response_id=response_id,
+                model="hermes-agent",
+                created_at=int(time.time()),
+                stream_q=stream_q,
+                agent_task=agent_task,
+                agent_ref=[None],
+                conversation_history=[],
+                user_message="will disconnect",
+                instructions=None,
+                conversation=None,
+                store=True,
+                session_id=None,
+            )
+
+        stored = adapter._response_store.get(response_id)
+        assert stored is not None, "snapshot must survive client disconnect"
+        assert stored["response"]["status"] == "incomplete"
+

 # ---------------------------------------------------------------------------
 # Auth on endpoints
@@ -66,6 +66,37 @@ class TestBlueBubblesHelpers:

        assert check_bluebubbles_requirements() is True

+    def test_supports_message_editing_is_false(self, monkeypatch):
+        adapter = _make_adapter(monkeypatch)
+        assert adapter.SUPPORTS_MESSAGE_EDITING is False
+
+    def test_truncate_message_omits_pagination_suffixes(self, monkeypatch):
+        adapter = _make_adapter(monkeypatch)
+        chunks = adapter.truncate_message("abcdefghij", max_length=6)
+        assert len(chunks) > 1
+        assert "".join(chunks) == "abcdefghij"
+        assert all("(" not in chunk for chunk in chunks)
+
+    @pytest.mark.asyncio
+    async def test_send_splits_paragraphs_into_multiple_bubbles(self, monkeypatch):
+        adapter = _make_adapter(monkeypatch)
+        sent = []
+
+        async def fake_resolve_chat_guid(chat_id):
+            return "iMessage;-;user@example.com"
+
+        async def fake_api_post(path, payload):
+            sent.append(payload["message"])
+            return {"data": {"guid": f"msg-{len(sent)}"}}
+
+        monkeypatch.setattr(adapter, "_resolve_chat_guid", fake_resolve_chat_guid)
+        monkeypatch.setattr(adapter, "_api_post", fake_api_post)
+
+        result = await adapter.send("user@example.com", "first thought\n\nsecond thought")
+
+        assert result.success is True
+        assert sent == ["first thought", "second thought"]
+
    def test_format_message_strips_markdown(self, monkeypatch):
        adapter = _make_adapter(monkeypatch)
        assert adapter.format_message("**Hello** `world`") == "Hello world"
@@ -70,6 +70,9 @@ def _make_runner():
    runner.session_store = None
    runner.hooks = MagicMock()
    runner.hooks.emit = AsyncMock()
+    runner.pairing_store = MagicMock()
+    runner.pairing_store.is_approved.return_value = True
+    runner._is_user_authorized = lambda _source: True
    return runner, _AGENT_PENDING_SENTINEL


@@ -91,6 +94,30 @@ def _make_adapter(platform_val="telegram"):
 class TestBusySessionAck:
    """User sends a message while agent is running — should get acknowledgment."""

+    @pytest.mark.asyncio
+    async def test_handle_message_queue_mode_queues_without_interrupt(self):
+        """Runner queue mode must not interrupt an active agent for text follow-ups."""
+        from gateway.run import GatewayRunner
+
+        runner, _sentinel = _make_runner()
+        adapter = _make_adapter()
+
+        event = _make_event(text="follow up in queue mode")
+        sk = build_session_key(event.source)
+
+        running_agent = MagicMock()
+        runner._busy_input_mode = "queue"
+        runner._running_agents[sk] = running_agent
+        runner.adapters[event.source.platform] = adapter
+
+        result = await GatewayRunner._handle_message(runner, event)
+
+        assert result is None
+        assert sk in adapter._pending_messages
+        assert adapter._pending_messages[sk] is event
+        assert sk not in runner._pending_messages
+        running_agent.interrupt.assert_not_called()
+
    @pytest.mark.asyncio
    async def test_sends_ack_when_agent_running(self):
        """First message during busy session should get a status ack."""
@@ -52,6 +52,10 @@ class TestPlatformConfigRoundtrip:
        assert restored.enabled is False
        assert restored.token is None

+    def test_from_dict_coerces_quoted_false_enabled(self):
+        restored = PlatformConfig.from_dict({"enabled": "false"})
+        assert restored.enabled is False
+

 class TestGetConnectedPlatforms:
    def test_returns_enabled_with_token(self):
@@ -140,6 +144,10 @@ class TestSessionResetPolicy:
        assert restored.at_hour == 4
        assert restored.idle_minutes == 1440

+    def test_from_dict_coerces_quoted_false_notify(self):
+        restored = SessionResetPolicy.from_dict({"notify": "false"})
+        assert restored.notify is False
+

 class TestGatewayConfigRoundtrip:
    def test_full_roundtrip(self):
@@ -182,6 +190,10 @@ class TestGatewayConfigRoundtrip:
        assert restored.unauthorized_dm_behavior == "ignore"
        assert restored.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"

+    def test_from_dict_coerces_quoted_false_always_log_local(self):
+        restored = GatewayConfig.from_dict({"always_log_local": "false"})
+        assert restored.always_log_local is False
+

 class TestLoadGatewayConfig:
    def test_bridges_quick_commands_from_config_yaml(self, tmp_path, monkeypatch):
@@ -238,6 +250,55 @@ class TestLoadGatewayConfig:

        assert config.thread_sessions_per_user is False

+    def test_bridges_quoted_false_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "platforms:\n"
+            "  api_server:\n"
+            "    enabled: \"false\"\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        assert config.platforms[Platform.API_SERVER].enabled is False
+        assert Platform.API_SERVER not in config.get_connected_platforms()
+
+    def test_bridges_quoted_false_session_notify_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "session_reset:\n"
+            "  notify: \"false\"\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        assert config.default_reset_policy.notify is False
+
+    def test_bridges_quoted_false_always_log_local_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "always_log_local: \"false\"\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        assert config.always_log_local is False
+
    def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
@@ -137,11 +137,38 @@ class TestGetProxyUrl:
 class TestResolveProxyUrl:
    def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
-                    "https_proxy", "http_proxy", "all_proxy"):
+                    "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
            monkeypatch.delenv(key, raising=False)
        monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
        assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"

+    def test_no_proxy_bypasses_matching_host(self, monkeypatch):
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                    "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
+        monkeypatch.setenv("NO_PROXY", "api.telegram.org")
+
+        assert resolve_proxy_url(target_hosts="api.telegram.org") is None
+
+    def test_no_proxy_bypasses_cidr_target(self, monkeypatch):
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                    "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
+        monkeypatch.setenv("NO_PROXY", "149.154.160.0/20")
+
+        assert resolve_proxy_url(target_hosts=["149.154.167.220"]) is None
+
+    def test_no_proxy_ignored_without_target(self, monkeypatch):
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                    "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
+        monkeypatch.setenv("NO_PROXY", "*")
+
+        assert resolve_proxy_url() == "http://proxy.example:8080"
+

 class TestRunAgentProxyDispatch:
    """Test that _run_agent() delegates to proxy when configured."""
@@ -179,6 +179,40 @@ class TestHandleResumeCommand:
        assert call_args[0][1] == "sess_v2"
        db.close()

+    @pytest.mark.asyncio
+    async def test_resume_follows_compression_continuation(self, tmp_path):
+        """Gateway /resume should reopen the live descendant after compression."""
+        from hermes_state import SessionDB
+
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("compressed_root", "telegram")
+        db.set_session_title("compressed_root", "Compressed Work")
+        db.end_session("compressed_root", "compression")
+        db.create_session("compressed_child", "telegram", parent_session_id="compressed_root")
+        db.append_message("compressed_child", "user", "hello from continuation")
+        db.create_session("current_session_001", "telegram")
+
+        event = _make_event(text="/resume Compressed Work")
+        runner = _make_runner(
+            session_db=db,
+            current_session_id="current_session_001",
+            event=event,
+        )
+        runner.session_store.load_transcript.side_effect = (
+            lambda session_id: [{"role": "user", "content": "hello from continuation"}]
+            if session_id == "compressed_child"
+            else []
+        )
+
+        result = await runner._handle_resume_command(event)
+
+        assert "Resumed session" in result
+        assert "(1 message)" in result
+        call_args = runner.session_store.switch_session.call_args
+        assert call_args[0][1] == "compressed_child"
+        runner.session_store.load_transcript.assert_called_with("compressed_child")
+        db.close()
+
    @pytest.mark.asyncio
    async def test_resume_clears_running_agent(self, tmp_path):
        """Switching sessions clears any cached running agent."""
@@ -58,6 +58,13 @@ class ProgressCaptureAdapter(BasePlatformAdapter):
        return {"id": chat_id}


+class NonEditingProgressCaptureAdapter(ProgressCaptureAdapter):
+    SUPPORTS_MESSAGE_EDITING = False
+
+    async def edit_message(self, chat_id, message_id, content) -> SendResult:
+        raise AssertionError("non-editable adapters should not receive edit_message calls")
+
+
 class FakeAgent:
    def __init__(self, **kwargs):
        self.tool_progress_callback = kwargs.get("tool_progress_callback")
@@ -502,6 +509,7 @@ async def _run_with_agent(
    chat_id="-1001",
    chat_type="group",
    thread_id="17585",
+    adapter_cls=ProgressCaptureAdapter,
 ):
    if config_data:
        import yaml
@@ -516,7 +524,7 @@ async def _run_with_agent(
    fake_run_agent.AIAgent = agent_cls
    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)

-    adapter = ProgressCaptureAdapter(platform=platform)
+    adapter = adapter_cls(platform=platform)
    runner = _make_runner(adapter)
    gateway_run = importlib.import_module("gateway.run")
    if config_data and "streaming" in config_data:
@@ -666,6 +674,26 @@ async def test_run_agent_interim_commentary_works_with_tool_progress_off(monkeyp
    assert any(call["content"] == "I'll inspect the repo first." for call in adapter.sent)


+@pytest.mark.asyncio
+async def test_run_agent_bluebubbles_uses_commentary_send_path_for_quick_replies(monkeypatch, tmp_path):
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        CommentaryAgent,
+        session_id="sess-bluebubbles-commentary",
+        config_data={"display": {"interim_assistant_messages": True}},
+        platform=Platform.BLUEBUBBLES,
+        chat_id="iMessage;-;user@example.com",
+        chat_type="dm",
+        thread_id=None,
+        adapter_cls=NonEditingProgressCaptureAdapter,
+    )
+
+    assert result.get("already_sent") is not True
+    assert [call["content"] for call in adapter.sent] == ["I'll inspect the repo first."]
+    assert adapter.edits == []
+
+
@pytest.mark.asyncio
 async def test_run_agent_previewed_final_marks_already_sent(monkeypatch, tmp_path):
    adapter, result = await _run_with_agent(
@@ -11,6 +11,8 @@ from gateway.session import (
    build_session_context,
    build_session_context_prompt,
    build_session_key,
+    canonical_whatsapp_identifier,
+    normalize_whatsapp_identifier,
 )


@@ -183,6 +185,25 @@ class TestBuildSessionContextPrompt:
        assert "Telegram" in prompt
        assert "Home Chat" in prompt

+    def test_bluebubbles_prompt_mentions_short_conversational_i_message_format(self):
+        config = GatewayConfig(
+            platforms={
+                Platform.BLUEBUBBLES: PlatformConfig(enabled=True, extra={"server_url": "http://localhost:1234", "password": "secret"}),
+            },
+        )
+        source = SessionSource(
+            platform=Platform.BLUEBUBBLES,
+            chat_id="iMessage;-;user@example.com",
+            chat_name="Ben",
+            chat_type="dm",
+        )
+        ctx = build_session_context(source, config)
+        prompt = build_session_context_prompt(ctx)
+
+        assert "responding via iMessage" in prompt
+        assert "short and conversational" in prompt
+        assert "blank line" in prompt
+
    def test_discord_prompt(self):
        config = GatewayConfig(
            platforms={
@@ -626,9 +647,9 @@ class TestSessionStoreSwitchSession:
        db.close()


-class TestWhatsAppDMSessionKeyConsistency:
-    """Regression: all session-key construction must go through build_session_key
-    so DMs are isolated by chat_id across platforms."""
+class TestWhatsAppSessionKeyConsistency:
+    """Regression: WhatsApp session keys must collapse JID/LID aliases to a
+    single stable identity for both DM chat_ids and group participant_ids."""

    @pytest.fixture()
    def store(self, tmp_path):
@@ -639,7 +660,7 @@ class TestWhatsAppDMSessionKeyConsistency:
        s._loaded = True
        return s

-    def test_whatsapp_dm_includes_chat_id(self):
+    def test_whatsapp_dm_uses_canonical_identifier(self):
        source = SessionSource(
            platform=Platform.WHATSAPP,
            chat_id="15551234567@s.whatsapp.net",
@@ -647,7 +668,80 @@ class TestWhatsAppDMSessionKeyConsistency:
            user_name="Phone User",
        )
        key = build_session_key(source)
-        assert key == "agent:main:whatsapp:dm:15551234567@s.whatsapp.net"
+        assert key == "agent:main:whatsapp:dm:15551234567"
+
+    def test_whatsapp_dm_aliases_share_one_session_key(self, tmp_path, monkeypatch):
+        tmp_home = tmp_path / "hermes-home"
+        mapping_dir = tmp_home / "whatsapp" / "session"
+        mapping_dir.mkdir(parents=True, exist_ok=True)
+        (mapping_dir / "lid-mapping-999999999999999.json").write_text(
+            json.dumps("15551234567@s.whatsapp.net"),
+            encoding="utf-8",
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_home))
+
+        lid_source = SessionSource(
+            platform=Platform.WHATSAPP,
+            chat_id="999999999999999@lid",
+            chat_type="dm",
+            user_name="Phone User",
+        )
+        phone_source = SessionSource(
+            platform=Platform.WHATSAPP,
+            chat_id="15551234567@s.whatsapp.net",
+            chat_type="dm",
+            user_name="Phone User",
+        )
+
+        assert build_session_key(lid_source) == "agent:main:whatsapp:dm:15551234567"
+        assert build_session_key(phone_source) == "agent:main:whatsapp:dm:15551234567"
+
+    def test_whatsapp_group_participant_aliases_share_session_key(self, tmp_path, monkeypatch):
+        """With group_sessions_per_user, the same human flipping between
+        phone-JID and LID inside a group must not produce two isolated
+        per-user sessions."""
+        tmp_home = tmp_path / "hermes-home"
+        mapping_dir = tmp_home / "whatsapp" / "session"
+        mapping_dir.mkdir(parents=True, exist_ok=True)
+        (mapping_dir / "lid-mapping-999999999999999.json").write_text(
+            json.dumps("15551234567@s.whatsapp.net"),
+            encoding="utf-8",
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_home))
+
+        lid_source = SessionSource(
+            platform=Platform.WHATSAPP,
+            chat_id="120363000000000000@g.us",
+            chat_type="group",
+            user_id="999999999999999@lid",
+            user_name="Group Member",
+        )
+        phone_source = SessionSource(
+            platform=Platform.WHATSAPP,
+            chat_id="120363000000000000@g.us",
+            chat_type="group",
+            user_id="15551234567@s.whatsapp.net",
+            user_name="Group Member",
+        )
+
+        expected = "agent:main:whatsapp:group:120363000000000000@g.us:15551234567"
+        assert build_session_key(lid_source, group_sessions_per_user=True) == expected
+        assert build_session_key(phone_source, group_sessions_per_user=True) == expected
+
+    def test_whatsapp_group_shared_sessions_untouched_by_canonicalisation(self):
+        """When group_sessions_per_user is False, participant_id is not in the
+        key at all, so canonicalisation is a no-op for this mode."""
+        source = SessionSource(
+            platform=Platform.WHATSAPP,
+            chat_id="120363000000000000@g.us",
+            chat_type="group",
+            user_id="999999999999999@lid",
+            user_name="Group Member",
+        )
+        assert (
+            build_session_key(source, group_sessions_per_user=False)
+            == "agent:main:whatsapp:group:120363000000000000@g.us"
+        )

    def test_store_delegates_to_build_session_key(self, store):
        """SessionStore._generate_session_key must produce the same result."""
@@ -866,6 +960,57 @@ class TestWhatsAppDMSessionKeyConsistency:
        assert key == "agent:main:telegram:dm:99:topic-1"


+class TestWhatsAppIdentifierPublicHelpers:
+    """Contract tests for the public WhatsApp identifier helpers.
+
+    These helpers are part of the public API for plugins that need
+    WhatsApp identity awareness. Breaking these contracts is a
+    breaking change for downstream plugins.
+    """
+
+    def test_normalize_strips_jid_suffix(self):
+        assert normalize_whatsapp_identifier("60123456789@s.whatsapp.net") == "60123456789"
+
+    def test_normalize_strips_lid_suffix(self):
+        assert normalize_whatsapp_identifier("999999999999999@lid") == "999999999999999"
+
+    def test_normalize_strips_device_suffix(self):
+        assert normalize_whatsapp_identifier("60123456789:47@s.whatsapp.net") == "60123456789"
+
+    def test_normalize_strips_leading_plus(self):
+        assert normalize_whatsapp_identifier("+60123456789") == "60123456789"
+
+    def test_normalize_handles_bare_numeric(self):
+        assert normalize_whatsapp_identifier("60123456789") == "60123456789"
+
+    def test_normalize_handles_empty_and_none(self):
+        assert normalize_whatsapp_identifier("") == ""
+        assert normalize_whatsapp_identifier(None) == ""  # type: ignore[arg-type]
+
+    def test_canonical_without_mapping_returns_normalized(self, tmp_path, monkeypatch):
+        """With no bridge mapping files, the normalized input is returned."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        assert canonical_whatsapp_identifier("60123456789@lid") == "60123456789"
+
+    def test_canonical_walks_lid_mapping(self, tmp_path, monkeypatch):
+        """LID is resolved to its paired phone identity via lid-mapping files."""
+        mapping_dir = tmp_path / "whatsapp" / "session"
+        mapping_dir.mkdir(parents=True, exist_ok=True)
+        (mapping_dir / "lid-mapping-999999999999999.json").write_text(
+            json.dumps("15551234567@s.whatsapp.net"),
+            encoding="utf-8",
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        canonical = canonical_whatsapp_identifier("999999999999999@lid")
+        assert canonical == "15551234567"
+        assert canonical_whatsapp_identifier("15551234567@s.whatsapp.net") == "15551234567"
+
+    def test_canonical_empty_input(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        assert canonical_whatsapp_identifier("") == ""
+
+
 class TestSessionStoreEntriesAttribute:
    """Regression: /reset must access _entries, not _sessions."""

@@ -322,7 +322,7 @@ class TestFallbackTransportInit:
            seen_kwargs.append(kwargs.copy())
            return FakeTransport([], {})

-        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY"):
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY", "NO_PROXY", "no_proxy"):
            monkeypatch.delenv(key, raising=False)
        monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
        monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory)
@@ -333,6 +333,25 @@ class TestFallbackTransportInit:
        assert len(seen_kwargs) == 2
        assert all(kwargs["proxy"] == "http://proxy.example:8080" for kwargs in seen_kwargs)

+    def test_no_proxy_bypasses_fallback_ip_cidr(self, monkeypatch):
+        seen_kwargs = []
+
+        def factory(**kwargs):
+            seen_kwargs.append(kwargs.copy())
+            return FakeTransport([], {})
+
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY", "NO_PROXY", "no_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
+        monkeypatch.setenv("NO_PROXY", "149.154.160.0/20")
+        monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory)
+
+        transport = tnet.TelegramFallbackTransport(["149.154.167.220"])
+
+        assert transport._fallback_ips == ["149.154.167.220"]
+        assert len(seen_kwargs) == 2
+        assert all("proxy" not in kwargs for kwargs in seen_kwargs)
+

 class TestFallbackTransportClose:
    @pytest.mark.asyncio
@@ -3,7 +3,6 @@ from unittest.mock import AsyncMock, MagicMock

 import pytest

-import gateway.run as gateway_run
 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.base import MessageEvent
 from gateway.session import SessionSource
@@ -12,6 +11,7 @@ from gateway.session import SessionSource
 def _clear_auth_env(monkeypatch) -> None:
    for key in (
        "TELEGRAM_ALLOWED_USERS",
+        "TELEGRAM_GROUP_ALLOWED_USERS",
        "DISCORD_ALLOWED_USERS",
        "WHATSAPP_ALLOWED_USERS",
        "SLACK_ALLOWED_USERS",
@@ -75,7 +75,7 @@ def _make_runner(platform: Platform, config: GatewayConfig):
 def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypatch, tmp_path):
    _clear_auth_env(monkeypatch)
    monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15550000001")
-    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))

    session_dir = tmp_path / "whatsapp" / "session"
    session_dir.mkdir(parents=True)
@@ -178,6 +178,26 @@ def test_qq_group_allowlist_does_not_authorize_other_groups(monkeypatch):
    assert runner._is_user_authorized(source) is False


+def test_telegram_group_allowlist_authorizes_forum_chat_without_user_allowlist(monkeypatch):
+    _clear_auth_env(monkeypatch)
+    monkeypatch.setenv("TELEGRAM_GROUP_ALLOWED_USERS", "-1001878443972")
+
+    runner, _adapter = _make_runner(
+        Platform.TELEGRAM,
+        GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="t")}),
+    )
+
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="999",
+        chat_id="-1001878443972",
+        user_name="tester",
+        chat_type="forum",
+    )
+
+    assert runner._is_user_authorized(source) is True
+
+
@pytest.mark.asyncio
 async def test_unauthorized_dm_pairs_by_default(monkeypatch):
    _clear_auth_env(monkeypatch)
@@ -0,0 +1,90 @@
+"""Regression test for /model context-length display on provider-capped models.
+
+Bug (April 2026): `/model gpt-5.5` on openai-codex (ChatGPT OAuth) showed
+"Context: 1,050,000 tokens" because the display code used the raw models.dev
+``ModelInfo.context_window`` (which reports the direct-OpenAI API value) instead
+of the provider-aware resolver. The agent was actually running at 272K — Codex
+OAuth's enforced cap — so the display was lying to the user.
+
+Fix: ``resolve_display_context_length()`` prefers
+``agent.model_metadata.get_model_context_length`` (which knows about Codex OAuth,
+Copilot, Nous, etc.) and falls back to models.dev only if that returns nothing.
+"""
+from __future__ import annotations
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import resolve_display_context_length
+
+
+class _FakeModelInfo:
+    def __init__(self, ctx):
+        self.context_window = ctx
+
+
+class TestResolveDisplayContextLength:
+    def test_codex_oauth_overrides_models_dev(self):
+        """gpt-5.5 on openai-codex must show Codex's 272K cap, not models.dev's 1.05M."""
+        fake_mi = _FakeModelInfo(1_050_000)  # what models.dev reports
+        with patch(
+            "agent.model_metadata.get_model_context_length",
+            return_value=272_000,  # what Codex OAuth actually enforces
+        ):
+            ctx = resolve_display_context_length(
+                "gpt-5.5",
+                "openai-codex",
+                base_url="https://chatgpt.com/backend-api/codex",
+                api_key="",
+                model_info=fake_mi,
+            )
+        assert ctx == 272_000, (
+            "Codex OAuth's 272K cap must win over models.dev's 1.05M for gpt-5.5"
+        )
+
+    def test_falls_back_to_model_info_when_resolver_returns_none(self):
+        fake_mi = _FakeModelInfo(1_048_576)
+        with patch(
+            "agent.model_metadata.get_model_context_length", return_value=None
+        ):
+            ctx = resolve_display_context_length(
+                "some-model",
+                "some-provider",
+                model_info=fake_mi,
+            )
+        assert ctx == 1_048_576
+
+    def test_returns_none_when_both_sources_empty(self):
+        with patch(
+            "agent.model_metadata.get_model_context_length", return_value=None
+        ):
+            ctx = resolve_display_context_length(
+                "unknown-model",
+                "unknown-provider",
+                model_info=None,
+            )
+        assert ctx is None
+
+    def test_resolver_exception_falls_back_to_model_info(self):
+        fake_mi = _FakeModelInfo(200_000)
+        with patch(
+            "agent.model_metadata.get_model_context_length",
+            side_effect=RuntimeError("network down"),
+        ):
+            ctx = resolve_display_context_length(
+                "x", "y", model_info=fake_mi
+            )
+        assert ctx == 200_000
+
+    def test_prefers_resolver_even_when_model_info_has_larger_value(self):
+        """Invariant: provider-aware resolver is authoritative, even if models.dev
+        reports a bigger window."""
+        fake_mi = _FakeModelInfo(2_000_000)
+        with patch(
+            "agent.model_metadata.get_model_context_length", return_value=128_000
+        ):
+            ctx = resolve_display_context_length(
+                "capped-model",
+                "capped-provider",
+                model_info=fake_mi,
+            )
+        assert ctx == 128_000
@@ -0,0 +1,172 @@
+"""Unit tests for hermes_cli.pty_bridge — PTY spawning + byte forwarding.
+
+These tests drive the bridge with minimal POSIX processes (echo, env, sleep,
+printf) to verify it behaves like a PTY you can read/write/resize/close.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+import time
+
+import pytest
+
+pytest.importorskip("ptyprocess", reason="ptyprocess not installed")
+
+from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
+
+
+skip_on_windows = pytest.mark.skipif(
+    sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
+)
+
+
+def _read_until(bridge: PtyBridge, needle: bytes, timeout: float = 5.0) -> bytes:
+    """Accumulate PTY output until we see `needle` or time out."""
+    deadline = time.monotonic() + timeout
+    buf = bytearray()
+    while time.monotonic() < deadline:
+        chunk = bridge.read(timeout=0.2)
+        if chunk is None:
+            break
+        buf.extend(chunk)
+        if needle in buf:
+            return bytes(buf)
+    return bytes(buf)
+
+
+@skip_on_windows
+class TestPtyBridgeSpawn:
+    def test_is_available_on_posix(self):
+        assert PtyBridge.is_available() is True
+
+    def test_spawn_returns_bridge_with_pid(self):
+        bridge = PtyBridge.spawn(["true"])
+        try:
+            assert bridge.pid > 0
+        finally:
+            bridge.close()
+
+    def test_spawn_raises_on_missing_argv0(self, tmp_path):
+        with pytest.raises((FileNotFoundError, OSError)):
+            PtyBridge.spawn([str(tmp_path / "definitely-not-a-real-binary")])
+
+
+@skip_on_windows
+class TestPtyBridgeIO:
+    def test_reads_child_stdout(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf hermes-ok"])
+        try:
+            output = _read_until(bridge, b"hermes-ok")
+            assert b"hermes-ok" in output
+        finally:
+            bridge.close()
+
+    def test_write_sends_to_child_stdin(self):
+        # `cat` with no args echoes stdin back to stdout.  We write a line,
+        # read it back, then signal EOF to let cat exit cleanly.
+        bridge = PtyBridge.spawn(["/bin/cat"])
+        try:
+            bridge.write(b"hello-pty\n")
+            output = _read_until(bridge, b"hello-pty")
+            assert b"hello-pty" in output
+        finally:
+            bridge.close()
+
+    def test_read_returns_none_after_child_exits(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf done"])
+        try:
+            _read_until(bridge, b"done")
+            # Give the child a beat to exit cleanly, then drain until EOF.
+            deadline = time.monotonic() + 3.0
+            while bridge.is_alive() and time.monotonic() < deadline:
+                bridge.read(timeout=0.1)
+            # Next reads after exit should return None (EOF), not raise.
+            got_none = False
+            for _ in range(10):
+                if bridge.read(timeout=0.1) is None:
+                    got_none = True
+                    break
+            assert got_none, "PtyBridge.read did not return None after child EOF"
+        finally:
+            bridge.close()
+
+
+@skip_on_windows
+class TestPtyBridgeResize:
+    def test_resize_updates_child_winsize(self):
+        # tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ).
+        # Spawn a shell, resize, then ask tput for the dimensions.
+        bridge = PtyBridge.spawn(
+            ["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"],
+            cols=80,
+            rows=24,
+        )
+        try:
+            bridge.resize(cols=123, rows=45)
+            output = _read_until(bridge, b"45", timeout=5.0)
+            # tput prints just the numbers, one per line
+            assert b"123" in output
+            assert b"45" in output
+        finally:
+            bridge.close()
+
+
+@skip_on_windows
+class TestPtyBridgeClose:
+    def test_close_is_idempotent(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
+        bridge.close()
+        bridge.close()  # must not raise
+        assert not bridge.is_alive()
+
+    def test_close_terminates_long_running_child(self):
+        bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
+        pid = bridge.pid
+        bridge.close()
+        # Give the kernel a moment to reap
+        deadline = time.monotonic() + 3.0
+        reaped = False
+        while time.monotonic() < deadline:
+            try:
+                os.kill(pid, 0)
+                time.sleep(0.05)
+            except ProcessLookupError:
+                reaped = True
+                break
+        assert reaped, f"pid {pid} still running after close()"
+
+
+@skip_on_windows
+class TestPtyBridgeEnv:
+    def test_cwd_is_respected(self, tmp_path):
+        bridge = PtyBridge.spawn(
+            ["/bin/sh", "-c", "pwd"],
+            cwd=str(tmp_path),
+        )
+        try:
+            output = _read_until(bridge, str(tmp_path).encode())
+            assert str(tmp_path).encode() in output
+        finally:
+            bridge.close()
+
+    def test_env_is_forwarded(self):
+        bridge = PtyBridge.spawn(
+            ["/bin/sh", "-c", "printf %s \"$HERMES_PTY_TEST\""],
+            env={**os.environ, "HERMES_PTY_TEST": "pty-env-works"},
+        )
+        try:
+            output = _read_until(bridge, b"pty-env-works")
+            assert b"pty-env-works" in output
+        finally:
+            bridge.close()
+
+
+class TestPtyBridgeUnavailable:
+    """Platform fallback semantics — PtyUnavailableError is importable and
+    carries a user-readable message."""
+
+    def test_error_carries_user_message(self):
+        err = PtyUnavailableError("platform not supported")
+        assert "platform" in str(err)
@@ -601,3 +601,53 @@ class TestImagegenModelPicker:
            _configure_imagegen_model("fal", config)
        assert isinstance(config["image_gen"], dict)
        assert config["image_gen"]["model"] == "fal-ai/flux-2/klein/9b"
+
+
+def test_save_platform_tools_normalizes_numeric_entries():
+    """YAML may parse bare numeric toolset names as int. They should be
+    normalized to str so they survive the save round-trip.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", 12306, "custom-mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "12306" in saved
+    assert 12306 not in saved
+
+
+def test_save_platform_tools_clears_stale_no_mcp():
+    """When the new selection doesn't include no_mcp, the sentinel should
+    be stripped from preserved entries so MCP servers are re-enabled.
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", "no_mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "browser"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "no_mcp" not in saved
+
+
+def test_save_platform_tools_preserves_explicit_no_mcp():
+    """When the new selection explicitly includes no_mcp, it should be kept."""
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "no_mcp"]
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web", "no_mcp"})
+
+    saved = config["platform_toolsets"]["cli"]
+    assert "no_mcp" in saved
@@ -1677,3 +1677,251 @@ class TestDashboardPluginManifestExtensions:
        plugins = web_server._get_dashboard_plugins(force_rescan=True)
        entry = next(p for p in plugins if p["name"] == "mixed-slots")
        assert entry["slots"] == ["sidebar", "header-right"]
+
+
+# ---------------------------------------------------------------------------
+# /api/pty WebSocket — terminal bridge for the dashboard "Chat" tab.
+#
+# These tests drive the endpoint with a tiny fake command (typically ``cat``
+# or ``sh -c 'printf …'``) instead of the real ``hermes --tui`` binary.  The
+# endpoint resolves its argv through ``_resolve_chat_argv``, so tests
+# monkeypatch that hook.
+# ---------------------------------------------------------------------------
+
+import sys
+
+
+skip_on_windows = pytest.mark.skipif(
+    sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
+)
+
+
+@skip_on_windows
+class TestPtyWebSocket:
+    @pytest.fixture(autouse=True)
+    def _setup(self, monkeypatch, _isolate_hermes_home):
+        from starlette.testclient import TestClient
+
+        import hermes_cli.web_server as ws
+
+        # Avoid exec'ing the actual TUI in tests: every test below installs
+        # its own fake argv via ``ws._resolve_chat_argv``.
+        self.ws_module = ws
+        monkeypatch.setattr(ws, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", True)
+        self.token = ws._SESSION_TOKEN
+        self.client = TestClient(ws.app)
+
+    def _url(self, token: str | None = None, **params: str) -> str:
+        tok = token if token is not None else self.token
+        # TestClient.websocket_connect takes the path; it reconstructs the
+        # query string, so we pass it inline.
+        from urllib.parse import urlencode
+
+        q = {"token": tok, **params}
+        return f"/api/pty?{urlencode(q)}"
+
+    def test_rejects_when_embedded_chat_disabled(self, monkeypatch):
+        monkeypatch.setattr(self.ws_module, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", False)
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect(self._url()):
+                pass
+        assert exc.value.code == 4403
+
+    def test_rejects_missing_token(self, monkeypatch):
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect("/api/pty"):
+                pass
+        assert exc.value.code == 4401
+
+    def test_rejects_bad_token(self, monkeypatch):
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect(self._url(token="wrong")):
+                pass
+        assert exc.value.code == 4401
+
+    def test_streams_child_stdout_to_client(self, monkeypatch):
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (
+                ["/bin/sh", "-c", "printf hermes-ws-ok"],
+                None,
+                None,
+            ),
+        )
+        with self.client.websocket_connect(self._url()) as conn:
+            # Drain frames until we see the needle or time out.  TestClient's
+            # recv_bytes blocks; loop until we have the signal byte string.
+            buf = b""
+            import time
+
+            deadline = time.monotonic() + 5.0
+            while time.monotonic() < deadline:
+                try:
+                    frame = conn.receive_bytes()
+                except Exception:
+                    break
+                if frame:
+                    buf += frame
+                if b"hermes-ws-ok" in buf:
+                    break
+            assert b"hermes-ws-ok" in buf
+
+    def test_client_input_reaches_child_stdin(self, monkeypatch):
+        # ``cat`` echoes stdin back, so a write → read round-trip proves
+        # the full duplex path.
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        with self.client.websocket_connect(self._url()) as conn:
+            conn.send_bytes(b"round-trip-payload\n")
+            buf = b""
+            import time
+
+            deadline = time.monotonic() + 5.0
+            while time.monotonic() < deadline:
+                frame = conn.receive_bytes()
+                if frame:
+                    buf += frame
+                if b"round-trip-payload" in buf:
+                    break
+            assert b"round-trip-payload" in buf
+
+    def test_resize_escape_is_forwarded(self, monkeypatch):
+        # Resize escape gets intercepted and applied via TIOCSWINSZ,
+        # then ``tput cols/lines`` reports the new dimensions back.
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            # sleep gives the test time to push the resize before tput runs
+            lambda resume=None, sidecar_url=None: (
+                ["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"],
+                None,
+                None,
+            ),
+        )
+        with self.client.websocket_connect(self._url()) as conn:
+            conn.send_text("\x1b[RESIZE:99;41]")
+            buf = b""
+            import time
+
+            deadline = time.monotonic() + 5.0
+            while time.monotonic() < deadline:
+                frame = conn.receive_bytes()
+                if frame:
+                    buf += frame
+                if b"99" in buf and b"41" in buf:
+                    break
+            assert b"99" in buf and b"41" in buf
+
+    def test_unavailable_platform_closes_with_message(self, monkeypatch):
+        from hermes_cli.pty_bridge import PtyUnavailableError
+
+        def _raise(argv, **kwargs):
+            raise PtyUnavailableError("pty missing for tests")
+
+        monkeypatch.setattr(
+            self.ws_module,
+            "_resolve_chat_argv",
+            lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
+        )
+        # Patch PtyBridge.spawn at the web_server module's binding.
+        import hermes_cli.web_server as ws_mod
+
+        monkeypatch.setattr(ws_mod.PtyBridge, "spawn", classmethod(lambda cls, *a, **k: _raise(*a, **k)))
+
+        with self.client.websocket_connect(self._url()) as conn:
+            # Expect a final text frame with the error message, then close.
+            msg = conn.receive_text()
+            assert "pty missing" in msg or "unavailable" in msg.lower() or "pty" in msg.lower()
+
+    def test_resume_parameter_is_forwarded_to_argv(self, monkeypatch):
+        captured: dict = {}
+
+        def fake_resolve(resume=None, sidecar_url=None):
+            captured["resume"] = resume
+            return (["/bin/sh", "-c", "printf resume-arg-ok"], None, None)
+
+        monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve)
+
+        with self.client.websocket_connect(self._url(resume="sess-42")) as conn:
+            # Drain briefly so the handler actually invokes the resolver.
+            try:
+                conn.receive_bytes()
+            except Exception:
+                pass
+        assert captured.get("resume") == "sess-42"
+
+    def test_channel_param_propagates_sidecar_url(self, monkeypatch):
+        """When /api/pty is opened with ?channel=, the PTY child gets a
+        HERMES_TUI_SIDECAR_URL env var pointing back at /api/pub on the
+        same channel — which is how tool events reach the dashboard sidebar."""
+        captured: dict = {}
+
+        def fake_resolve(resume=None, sidecar_url=None):
+            captured["sidecar_url"] = sidecar_url
+            return (["/bin/sh", "-c", "printf sidecar-ok"], None, None)
+
+        monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve)
+        monkeypatch.setattr(
+            self.ws_module.app.state, "bound_host", "127.0.0.1", raising=False
+        )
+        monkeypatch.setattr(
+            self.ws_module.app.state, "bound_port", 9119, raising=False
+        )
+
+        with self.client.websocket_connect(self._url(channel="abc-123")) as conn:
+            try:
+                conn.receive_bytes()
+            except Exception:
+                pass
+
+        url = captured.get("sidecar_url") or ""
+        assert url.startswith("ws://127.0.0.1:9119/api/pub?")
+        assert "channel=abc-123" in url
+        assert "token=" in url
+
+    def test_pub_broadcasts_to_events_subscribers(self, monkeypatch):
+        """Frame written to /api/pub is rebroadcast verbatim to every
+        /api/events subscriber on the same channel."""
+        from urllib.parse import urlencode
+
+        qs = urlencode({"token": self.token, "channel": "broadcast-test"})
+        pub_path = f"/api/pub?{qs}"
+        sub_path = f"/api/events?{qs}"
+
+        with self.client.websocket_connect(sub_path) as sub:
+            with self.client.websocket_connect(pub_path) as pub:
+                pub.send_text('{"type":"tool.start","payload":{"tool_id":"t1"}}')
+                received = sub.receive_text()
+
+        assert "tool.start" in received
+        assert '"tool_id":"t1"' in received
+
+    def test_events_rejects_missing_channel(self):
+        from starlette.websockets import WebSocketDisconnect
+
+        with pytest.raises(WebSocketDisconnect) as exc:
+            with self.client.websocket_connect(
+                f"/api/events?token={self.token}"
+            ):
+                pass
+        assert exc.value.code == 4400
@@ -0,0 +1,213 @@
+"""Regression test: DeepSeek V4 thinking mode reasoning_content echo.
+
+DeepSeek V4-flash / V4-pro thinking mode requires ``reasoning_content`` on
+every assistant message that carries ``tool_calls``. When a persisted
+session replays an assistant tool-call turn that was recorded without the
+field, DeepSeek rejects the next request with HTTP 400::
+
+    The reasoning_content in the thinking mode must be passed back to the API.
+
+Fix covers three paths:
+
+1. ``_build_assistant_message`` — new tool-call messages without raw
+   reasoning_content get ``""`` pinned at creation time so nothing gets
+   persisted poisoned.
+2. ``_copy_reasoning_content_for_api`` — already-poisoned history replays
+   with ``reasoning_content=""`` injected defensively.
+3. Detection covers three signals: ``provider == "deepseek"``,
+   ``"deepseek" in model``, and ``api.deepseek.com`` host match. The third
+   catches custom-provider setups pointing at DeepSeek.
+
+Refs #15250 / #15353.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from run_agent import AIAgent
+
+
+def _make_agent(provider: str = "", model: str = "", base_url: str = "") -> AIAgent:
+    agent = object.__new__(AIAgent)
+    agent.provider = provider
+    agent.model = model
+    agent.base_url = base_url
+    return agent
+
+
+class TestNeedsDeepSeekToolReasoning:
+    """_needs_deepseek_tool_reasoning() recognises all three detection signals."""
+
+    def test_provider_deepseek(self) -> None:
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
+        assert agent._needs_deepseek_tool_reasoning() is True
+
+    def test_model_substring(self) -> None:
+        # Custom provider pointing at DeepSeek with provider='custom'
+        agent = _make_agent(provider="custom", model="deepseek-v4-pro")
+        assert agent._needs_deepseek_tool_reasoning() is True
+
+    def test_base_url_host(self) -> None:
+        agent = _make_agent(
+            provider="custom",
+            model="some-aliased-name",
+            base_url="https://api.deepseek.com/v1",
+        )
+        assert agent._needs_deepseek_tool_reasoning() is True
+
+    def test_provider_case_insensitive(self) -> None:
+        agent = _make_agent(provider="DeepSeek", model="")
+        assert agent._needs_deepseek_tool_reasoning() is True
+
+    def test_non_deepseek_provider(self) -> None:
+        agent = _make_agent(
+            provider="openrouter",
+            model="anthropic/claude-sonnet-4.6",
+            base_url="https://openrouter.ai/api/v1",
+        )
+        assert agent._needs_deepseek_tool_reasoning() is False
+
+    def test_empty_everything(self) -> None:
+        agent = _make_agent()
+        assert agent._needs_deepseek_tool_reasoning() is False
+
+
+class TestCopyReasoningContentForApi:
+    """_copy_reasoning_content_for_api pads reasoning_content for DeepSeek tool-calls."""
+
+    def test_deepseek_tool_call_poisoned_history_gets_empty_string(self) -> None:
+        """Already-poisoned history (no reasoning_content, no reasoning) gets ''."""
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
+        source = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg.get("reasoning_content") == ""
+
+    def test_deepseek_assistant_no_tool_call_left_alone(self) -> None:
+        """Plain assistant turns without tool_calls don't get padded."""
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
+        source = {"role": "assistant", "content": "hello"}
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert "reasoning_content" not in api_msg
+
+    def test_deepseek_explicit_reasoning_content_preserved(self) -> None:
+        """When reasoning_content is already set, it's copied verbatim."""
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
+        source = {
+            "role": "assistant",
+            "reasoning_content": "<think>real chain of thought</think>",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg["reasoning_content"] == "<think>real chain of thought</think>"
+
+    def test_deepseek_reasoning_field_promoted(self) -> None:
+        """When only 'reasoning' is set, it gets promoted to reasoning_content."""
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
+        source = {
+            "role": "assistant",
+            "reasoning": "thought trace",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg["reasoning_content"] == "thought trace"
+
+    def test_kimi_path_still_works(self) -> None:
+        """Existing Kimi detection still pads reasoning_content."""
+        agent = _make_agent(provider="kimi-coding", model="kimi-k2.5")
+        source = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg.get("reasoning_content") == ""
+
+    def test_kimi_moonshot_base_url(self) -> None:
+        agent = _make_agent(
+            provider="custom", model="kimi-k2", base_url="https://api.moonshot.ai/v1"
+        )
+        source = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg.get("reasoning_content") == ""
+
+    def test_non_thinking_provider_not_padded(self) -> None:
+        """Providers that don't require the echo are untouched."""
+        agent = _make_agent(
+            provider="openrouter",
+            model="anthropic/claude-sonnet-4.6",
+            base_url="https://openrouter.ai/api/v1",
+        )
+        source = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert "reasoning_content" not in api_msg
+
+    def test_deepseek_custom_base_url(self) -> None:
+        """Custom provider pointing at api.deepseek.com is detected via host."""
+        agent = _make_agent(
+            provider="custom",
+            model="whatever",
+            base_url="https://api.deepseek.com/v1",
+        )
+        source = {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
+        }
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert api_msg.get("reasoning_content") == ""
+
+    def test_non_assistant_role_ignored(self) -> None:
+        """User/tool messages are left alone."""
+        agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
+        source = {"role": "user", "content": "hi"}
+        api_msg: dict = {}
+        agent._copy_reasoning_content_for_api(source, api_msg)
+        assert "reasoning_content" not in api_msg
+
+
+class TestNeedsKimiToolReasoning:
+    """The extracted _needs_kimi_tool_reasoning() helper keeps Kimi behavior intact."""
+
+    @pytest.mark.parametrize(
+        "provider,base_url",
+        [
+            ("kimi-coding", ""),
+            ("kimi-coding-cn", ""),
+            ("custom", "https://api.kimi.com/v1"),
+            ("custom", "https://api.moonshot.ai/v1"),
+            ("custom", "https://api.moonshot.cn/v1"),
+        ],
+    )
+    def test_kimi_signals(self, provider: str, base_url: str) -> None:
+        agent = _make_agent(provider=provider, model="kimi-k2", base_url=base_url)
+        assert agent._needs_kimi_tool_reasoning() is True
+
+    def test_non_kimi_provider(self) -> None:
+        agent = _make_agent(
+            provider="openrouter",
+            model="moonshotai/kimi-k2",
+            base_url="https://openrouter.ai/api/v1",
+        )
+        # model name contains 'moonshot' but host is openrouter — should be False
+        assert agent._needs_kimi_tool_reasoning() is False
@@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient:

        agent.client.chat.completions.create.assert_called_once()

+    def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
+        """Provider/API failures from auxiliary flush must be visible.
+
+        Exhausted keys and rate limits are not always RuntimeError. They used
+        to fall into the broad outer handler and disappear into debug logs.
+        """
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+        events = []
+        agent.status_callback = lambda kind, text=None: events.append((kind, text))
+
+        with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
+             patch("tools.memory_tool.memory_tool", return_value="Saved."):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+        assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
+
    def test_flush_executes_memory_tool_calls(self, monkeypatch):
        """Verify that memory tool calls from the flush response actually get executed."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
@@ -209,6 +233,31 @@ class TestFlushMemoriesUsesAuxiliaryClient:
        assert call_kwargs.kwargs["target"] == "notes"
        assert "dark mode" in call_kwargs.kwargs["content"]

+    def test_flush_bridges_memory_write_metadata(self, monkeypatch):
+        """Flush memory writes notify external providers with flush provenance."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent._memory_manager = MagicMock()
+        agent.session_id = "sess-flush"
+        agent.platform = "cli"
+
+        mock_response = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Note this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        agent._memory_manager.on_memory_write.assert_called_once()
+        call_kwargs = agent._memory_manager.on_memory_write.call_args
+        assert call_kwargs.args[:3] == ("add", "notes", "User prefers dark mode.")
+        assert call_kwargs.kwargs["metadata"]["write_origin"] == "memory_flush"
+        assert call_kwargs.kwargs["metadata"]["execution_context"] == "flush_memories"
+        assert call_kwargs.kwargs["metadata"]["session_id"] == "sess-flush"
+
    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
        """After flush, the flush prompt and any response should be removed from messages."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
@@ -0,0 +1,189 @@
+"""Regression guard for #15218 — external memory sync must skip interrupted turns.
+
+Before this fix, ``run_conversation`` called
+``memory_manager.sync_all(original_user_message, final_response)`` at the
+end of every turn where both args were present.  That gate didn't check
+the ``interrupted`` flag, so an external memory backend received partial
+assistant output, aborted tool chains, or mid-stream resets as durable
+conversational truth.  Downstream recall then treated that not-yet-real
+state as if the user had seen it complete.
+
+The fix is ``AIAgent._sync_external_memory_for_turn`` — a small helper
+that replaces the inline block and returns early when ``interrupted``
+is True (regardless of whether ``final_response`` and
+``original_user_message`` happen to be populated).
+
+These tests exercise the helper directly on a bare ``AIAgent`` built
+via ``__new__`` so the full ``run_conversation`` machinery isn't needed
+— the method is pure logic and three state arguments.
+"""
+from unittest.mock import MagicMock
+
+import pytest
+
+
+def _bare_agent():
+    """Build an ``AIAgent`` with only the attributes
+    ``_sync_external_memory_for_turn`` touches — matches the bare-agent
+    pattern used across ``tests/run_agent/test_interrupt_propagation.py``.
+    """
+    from run_agent import AIAgent
+
+    agent = AIAgent.__new__(AIAgent)
+    agent._memory_manager = MagicMock()
+    return agent
+
+
+class TestSyncExternalMemoryForTurn:
+    # --- Interrupt guard (the #15218 fix) -------------------------------
+
+    def test_interrupted_turn_does_not_sync(self):
+        """The whole point of #15218: even with a final_response and a
+        user message, an interrupted turn must NOT reach the memory
+        backend."""
+        agent = _bare_agent()
+        agent._sync_external_memory_for_turn(
+            original_user_message="What time is it?",
+            final_response="It is 3pm.",  # looks complete — but partial
+            interrupted=True,
+        )
+        agent._memory_manager.sync_all.assert_not_called()
+        agent._memory_manager.queue_prefetch_all.assert_not_called()
+
+    def test_interrupted_turn_skips_even_when_response_is_full(self):
+        """A long, seemingly-complete assistant response is still
+        partial if ``interrupted`` is True — an interrupt may have
+        landed between the streamed reply and the next tool call.  The
+        memory backend has no way to distinguish on its own, so we must
+        gate at the source."""
+        agent = _bare_agent()
+        agent._sync_external_memory_for_turn(
+            original_user_message="Plan a trip to Lisbon",
+            final_response="Here's a detailed 7-day itinerary: [...]",
+            interrupted=True,
+        )
+        agent._memory_manager.sync_all.assert_not_called()
+
+    # --- Normal completed turn still syncs ------------------------------
+
+    def test_completed_turn_syncs_and_queues_prefetch(self):
+        """Regression guard for the positive path: a normal completed
+        turn must still trigger both ``sync_all`` AND
+        ``queue_prefetch_all`` — otherwise the external memory backend
+        never learns about anything and every user complains.
+        """
+        agent = _bare_agent()
+        agent._sync_external_memory_for_turn(
+            original_user_message="What's the weather in Paris?",
+            final_response="It's sunny and 22°C.",
+            interrupted=False,
+        )
+        agent._memory_manager.sync_all.assert_called_once_with(
+            "What's the weather in Paris?", "It's sunny and 22°C.",
+        )
+        agent._memory_manager.queue_prefetch_all.assert_called_once_with(
+            "What's the weather in Paris?",
+        )
+
+    # --- Edge cases (pre-existing behaviour preserved) ------------------
+
+    def test_no_final_response_skips(self):
+        """If the model produced no final_response (e.g. tool-only turn
+        that never resolved), we must not fabricate an empty sync."""
+        agent = _bare_agent()
+        agent._sync_external_memory_for_turn(
+            original_user_message="Hello",
+            final_response=None,
+            interrupted=False,
+        )
+        agent._memory_manager.sync_all.assert_not_called()
+
+    def test_no_original_user_message_skips(self):
+        """No user-origin message means this wasn't a user turn (e.g.
+        a system-initiated refresh).  Don't sync an assistant-only
+        exchange as if a user said something."""
+        agent = _bare_agent()
+        agent._sync_external_memory_for_turn(
+            original_user_message=None,
+            final_response="Proactive notification text",
+            interrupted=False,
+        )
+        agent._memory_manager.sync_all.assert_not_called()
+
+    def test_no_memory_manager_is_a_no_op(self):
+        """Sessions without an external memory manager must not crash
+        or try to call .sync_all on None."""
+        from run_agent import AIAgent
+
+        agent = AIAgent.__new__(AIAgent)
+        agent._memory_manager = None
+
+        # Must not raise.
+        agent._sync_external_memory_for_turn(
+            original_user_message="hi",
+            final_response="hey",
+            interrupted=False,
+        )
+
+    # --- Exception safety ----------------------------------------------
+
+    def test_sync_exception_is_swallowed(self):
+        """External memory providers are best-effort; a misconfigured
+        or offline backend must not block the user from seeing their
+        response by propagating the exception up."""
+        agent = _bare_agent()
+        agent._memory_manager.sync_all.side_effect = RuntimeError(
+            "backend unreachable"
+        )
+
+        # Must not raise.
+        agent._sync_external_memory_for_turn(
+            original_user_message="hi",
+            final_response="hey",
+            interrupted=False,
+        )
+        # sync_all was attempted.
+        agent._memory_manager.sync_all.assert_called_once()
+
+    def test_prefetch_exception_is_swallowed(self):
+        """Same best-effort contract applies to the prefetch step — a
+        failure in queue_prefetch_all must not bubble out."""
+        agent = _bare_agent()
+        agent._memory_manager.queue_prefetch_all.side_effect = RuntimeError(
+            "prefetch worker dead"
+        )
+
+        # Must not raise.
+        agent._sync_external_memory_for_turn(
+            original_user_message="hi",
+            final_response="hey",
+            interrupted=False,
+        )
+        # sync_all still happened before the prefetch blew up.
+        agent._memory_manager.sync_all.assert_called_once()
+
+    # --- The specific matrix the reporter asked about ------------------
+
+    @pytest.mark.parametrize("interrupted,final,user,expect_sync", [
+        (False, "resp", "user",  True),   # normal completed → sync
+        (True,  "resp", "user",  False),  # interrupted → skip (the fix)
+        (False, None,   "user",  False),  # no response → skip
+        (False, "resp", None,    False),  # no user msg → skip
+        (True,  None,   "user",  False),  # interrupted + no response → skip
+        (True,  "resp", None,    False),  # interrupted + no user → skip
+        (False, None,   None,    False),  # nothing → skip
+        (True,  None,   None,    False),  # interrupted + nothing → skip
+    ])
+    def test_sync_matrix(self, interrupted, final, user, expect_sync):
+        agent = _bare_agent()
+        agent._sync_external_memory_for_turn(
+            original_user_message=user,
+            final_response=final,
+            interrupted=interrupted,
+        )
+        if expect_sync:
+            agent._memory_manager.sync_all.assert_called_once()
+            agent._memory_manager.queue_prefetch_all.assert_called_once()
+        else:
+            agent._memory_manager.sync_all.assert_not_called()
+            agent._memory_manager.queue_prefetch_all.assert_not_called()
@@ -105,3 +105,39 @@ class TestRepairToolCallArguments:
        result = _repair_tool_call_arguments(raw, "terminal")
        # Should at least be valid JSON, even if background is lost
        json.loads(result)
+
+    # -- Stage 0: strict=False (literal control chars in strings) --
+    # llama.cpp backends sometimes emit literal tabs/newlines inside JSON
+    # string values. strict=False accepts these; we re-serialise to the
+    # canonical wire form (#12068).
+
+    def test_literal_newline_inside_string_value(self):
+        raw = '{"summary": "line one\nline two"}'
+        result = _repair_tool_call_arguments(raw, "t")
+        parsed = json.loads(result)
+        assert parsed == {"summary": "line one\nline two"}
+
+    def test_literal_tab_inside_string_value(self):
+        raw = '{"summary": "col1\tcol2"}'
+        result = _repair_tool_call_arguments(raw, "t")
+        parsed = json.loads(result)
+        assert parsed == {"summary": "col1\tcol2"}
+
+    def test_literal_control_char_reserialised_to_wire_form(self):
+        """After repair, the output must parse under strict=True."""
+        raw = '{"msg": "has\tliteral\ttabs"}'
+        result = _repair_tool_call_arguments(raw, "t")
+        # strict=True must now accept this
+        parsed = json.loads(result)
+        assert parsed["msg"] == "has\tliteral\ttabs"
+
+    # -- Stage 4: control-char escape fallback --
+
+    def test_control_chars_with_trailing_comma(self):
+        """strict=False fails due to trailing comma, but brace-count pass
+        + control-char escape rescues it."""
+        raw = '{"msg": "line\none",}'
+        result = _repair_tool_call_arguments(raw, "t")
+        parsed = json.loads(result)
+        assert "line" in parsed["msg"]
+
@@ -943,6 +943,113 @@ def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(mo
    assert "inspect the repository" in (assistant_message.content or "")


+def test_normalize_codex_response_detects_leaked_tool_call_text(monkeypatch):
+    """Harmony-style `to=functions.foo` leaked into assistant content with no
+    structured function_call items must be treated as incomplete so the
+    continuation path can re-elicit a proper tool call. This is the
+    Taiwan-embassy-email (Discord bug report) failure mode: child agent
+    produces a confident-looking summary, tool_trace is empty because no
+    tools actually ran, parent can't audit the claim.
+    """
+    agent = _build_agent(monkeypatch)
+    from agent.codex_responses_adapter import _normalize_codex_response
+
+    leaked_content = (
+        "I'll check the official page directly.\n"
+        "to=functions.exec_command {\"cmd\": \"curl https://example.test\"}\n"
+        "assistant to=functions.exec_command {\"stdout\": \"mailto:foo@example.test\"}\n"
+        "Extracted: foo@example.test"
+    )
+    response = SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="completed",
+                content=[SimpleNamespace(type="output_text", text=leaked_content)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5.4",
+    )
+
+    assistant_message, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "incomplete"
+    # Content is scrubbed so the parent never surfaces the leaked text as a
+    # summary. tool_calls stays empty because no structured function_call
+    # item existed.
+    assert (assistant_message.content or "") == ""
+    assert assistant_message.tool_calls == []
+
+
+def test_normalize_codex_response_ignores_tool_call_text_when_real_tool_call_present(monkeypatch):
+    """If the model emitted BOTH a structured function_call AND some text that
+    happens to contain `to=functions.*` (unlikely but possible), trust the
+    structured call — don't wipe content that came alongside a real tool use.
+    """
+    agent = _build_agent(monkeypatch)
+    from agent.codex_responses_adapter import _normalize_codex_response
+
+    response = SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="completed",
+                content=[SimpleNamespace(
+                    type="output_text",
+                    text="Running the command via to=functions.exec_command now.",
+                )],
+            ),
+            SimpleNamespace(
+                type="function_call",
+                id="fc_1",
+                call_id="call_1",
+                name="terminal",
+                arguments="{}",
+            ),
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5.4",
+    )
+
+    assistant_message, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "tool_calls"
+    assert assistant_message.tool_calls  # real call preserved
+    assert "Running the command" in (assistant_message.content or "")
+
+
+def test_normalize_codex_response_no_leak_passes_through(monkeypatch):
+    """Sanity: normal assistant content that doesn't contain the leak pattern
+    is returned verbatim with finish_reason=stop."""
+    agent = _build_agent(monkeypatch)
+    from agent.codex_responses_adapter import _normalize_codex_response
+
+    response = SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                status="completed",
+                content=[SimpleNamespace(
+                    type="output_text",
+                    text="Here is the answer with no leak.",
+                )],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
+        status="completed",
+        model="gpt-5.4",
+    )
+
+    assistant_message, finish_reason = _normalize_codex_response(response)
+
+    assert finish_reason == "stop"
+    assert assistant_message.content == "Here is the answer with no leak."
+    assert assistant_message.tool_calls == []
+
+
 def test_interim_commentary_is_not_marked_already_streamed_without_callbacks(monkeypatch):
    agent = _build_agent(monkeypatch)
    observed = {}
@@ -0,0 +1,116 @@
+"""Tests for tool call argument repair in the streaming assembly path.
+
+The streaming path (run_agent._call_chat_completions) assembles tool call
+deltas into full arguments.  When a model truncates or malforms the JSON
+(e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON
+straight through — setting has_truncated_tool_args but NOT repairing it.
+That triggered the truncation handler to kill the session with /new required.
+
+The fix: repair arguments in the streaming assembly path using
+_repair_tool_call_arguments() so repairable malformations (trailing commas,
+unclosed brackets, Python None) don't kill the session.
+"""
+
+import json
+import pytest
+
+from run_agent import _repair_tool_call_arguments
+
+
+class TestStreamingAssemblyRepair:
+    """Verify that _repair_tool_call_arguments is applied to streaming tool
+    call arguments before they're assembled into mock_tool_calls.
+
+    These tests verify the REPAIR FUNCTION itself works correctly for the
+    cases that arise during streaming assembly.  Integration tests that
+    exercise the full streaming path are in test_agent_loop_tool_calling.py.
+    """
+
+    # -- Truncation cases (most common streaming failure) --
+
+    def test_truncated_object_no_close_brace(self):
+        """Model stops mid-JSON, common with output length limits."""
+        raw = '{"command": "ls -la", "timeout": 30'
+        result = _repair_tool_call_arguments(raw, "terminal")
+        parsed = json.loads(result)
+        assert parsed["command"] == "ls -la"
+        assert parsed["timeout"] == 30
+
+    def test_truncated_nested_object(self):
+        """Model truncates inside a nested structure."""
+        raw = '{"path": "/tmp/foo", "content": "hello"'
+        result = _repair_tool_call_arguments(raw, "write_file")
+        parsed = json.loads(result)
+        assert parsed["path"] == "/tmp/foo"
+
+    def test_truncated_mid_value(self):
+        """Model cuts off mid-string-value."""
+        raw = '{"command": "git clone ht'
+        result = _repair_tool_call_arguments(raw, "terminal")
+        # Should produce valid JSON (even if command value is lost)
+        json.loads(result)
+
+    # -- Trailing comma cases (Ollama/GLM common) --
+
+    def test_trailing_comma_before_close_brace(self):
+        raw = '{"path": "/tmp", "content": "x",}'
+        result = _repair_tool_call_arguments(raw, "write_file")
+        assert json.loads(result) == {"path": "/tmp", "content": "x"}
+
+    def test_trailing_comma_in_list(self):
+        raw = '{"items": [1, 2, 3,]}'
+        result = _repair_tool_call_arguments(raw, "test")
+        assert json.loads(result) == {"items": [1, 2, 3]}
+
+    # -- Python None from model output --
+
+    def test_python_none_literal(self):
+        raw = "None"
+        result = _repair_tool_call_arguments(raw, "test")
+        assert result == "{}"
+
+    # -- Empty arguments (some models emit empty string) --
+
+    def test_empty_string(self):
+        assert _repair_tool_call_arguments("", "test") == "{}"
+
+    def test_whitespace_only(self):
+        assert _repair_tool_call_arguments("   \n  ", "test") == "{}"
+
+    # -- Already-valid JSON passes through unchanged --
+
+    def test_valid_json_passthrough(self):
+        raw = '{"path": "/tmp/foo", "content": "hello"}'
+        result = _repair_tool_call_arguments(raw, "write_file")
+        assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"}
+
+    # -- Extra closing brackets (rare but happens) --
+
+    def test_extra_closing_brace(self):
+        raw = '{"key": "value"}}'
+        result = _repair_tool_call_arguments(raw, "test")
+        assert json.loads(result) == {"key": "value"}
+
+    # -- Real-world GLM-5.1 truncation pattern --
+
+    def test_glm_truncation_pattern(self):
+        """GLM-5.1 via Ollama commonly truncates like this.
+
+        This pattern has an unclosed colon at the end ("background":) which
+        makes it unrepairable — the last-resort empty object {} is the
+        safest option.  The important thing is that repairable patterns
+        (trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed.
+        """
+        raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
+        result = _repair_tool_call_arguments(raw, "terminal")
+        # Unrepairable — returns empty object (hanging colon can't be fixed)
+        parsed = json.loads(result)
+        assert parsed == {}
+
+    def test_glm_truncation_repairable(self):
+        """GLM-5.1 truncation pattern that IS repairable."""
+        raw = '{"command": "ls -la /tmp", "timeout": 30'
+        result = _repair_tool_call_arguments(raw, "terminal")
+        parsed = json.loads(result)
+        assert parsed["command"] == "ls -la /tmp"
+        assert parsed["timeout"] == 30
@@ -0,0 +1,157 @@
+"""Tests for AIAgent._sanitize_tool_call_arguments."""
+
+import copy
+import logging
+
+from run_agent import AIAgent
+
+
+_MISSING = object()
+
+
+def _tool_call(call_id="call_1", name="read_file", arguments='{"path":"/tmp/foo"}'):
+    function = {"name": name}
+    if arguments is not _MISSING:
+        function["arguments"] = arguments
+    return {
+        "id": call_id,
+        "type": "function",
+        "function": function,
+    }
+
+
+def _assistant_message(*tool_calls):
+    return {
+        "role": "assistant",
+        "content": "tooling",
+        "tool_calls": list(tool_calls),
+    }
+
+
+def _tool_message(call_id="call_1", content="ok"):
+    return {
+        "role": "tool",
+        "tool_call_id": call_id,
+        "content": content,
+    }
+
+
+def test_valid_arguments_unchanged():
+    messages = [
+        {"role": "user", "content": "hello"},
+        _assistant_message(_tool_call(arguments='{"path":"/tmp/foo"}')),
+        _tool_message(content="done"),
+    ]
+    original = copy.deepcopy(messages)
+
+    repaired = AIAgent._sanitize_tool_call_arguments(messages)
+
+    assert repaired == 0
+    assert messages == original
+
+
+def test_truncated_arguments_replaced_with_empty_object(caplog):
+    messages = [
+        _assistant_message(_tool_call(arguments='{"path": "/tmp/foo')),
+    ]
+
+    with caplog.at_level(logging.WARNING, logger="run_agent"):
+        repaired = AIAgent._sanitize_tool_call_arguments(
+            messages,
+            logger=logging.getLogger("run_agent"),
+            session_id="session-123",
+        )
+
+    assert repaired == 1
+    assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}"
+    assert any(
+        "session=session-123" in record.message
+        and "tool_call_id=call_1" in record.message
+        for record in caplog.records
+    )
+
+
+def test_marker_appended_to_existing_tool_message():
+    marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
+    messages = [
+        _assistant_message(_tool_call(arguments='{"path": "/tmp/foo')),
+        _tool_message(content="existing tool output"),
+    ]
+
+    repaired = AIAgent._sanitize_tool_call_arguments(messages)
+
+    assert repaired == 1
+    assert messages[1]["content"] == f"{marker}\nexisting tool output"
+
+
+def test_marker_message_inserted_when_missing():
+    marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
+    messages = [
+        _assistant_message(_tool_call(arguments='{"path": "/tmp/foo')),
+        {"role": "user", "content": "next turn"},
+    ]
+
+    repaired = AIAgent._sanitize_tool_call_arguments(messages)
+
+    assert repaired == 1
+    assert messages[1] == {
+        "role": "tool",
+        "tool_call_id": "call_1",
+        "content": marker,
+    }
+    assert messages[2] == {"role": "user", "content": "next turn"}
+
+
+def test_multiple_corrupted_tool_calls_in_one_message():
+    marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
+    messages = [
+        _assistant_message(
+            _tool_call(call_id="call_1", arguments='{"path": "/tmp/foo'),
+            _tool_call(call_id="call_2", arguments='{"path":"/tmp/bar"}'),
+            _tool_call(call_id="call_3", arguments='{"mode":"tail"'),
+        ),
+    ]
+
+    repaired = AIAgent._sanitize_tool_call_arguments(messages)
+
+    assert repaired == 2
+    assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}"
+    assert messages[0]["tool_calls"][1]["function"]["arguments"] == '{"path":"/tmp/bar"}'
+    assert messages[0]["tool_calls"][2]["function"]["arguments"] == "{}"
+    assert messages[1]["tool_call_id"] == "call_1"
+    assert messages[1]["content"] == marker
+    assert messages[2]["tool_call_id"] == "call_3"
+    assert messages[2]["content"] == marker
+
+
+def test_empty_string_arguments_treated_as_empty_object(caplog):
+    messages = [
+        _assistant_message(_tool_call(arguments="")),
+    ]
+
+    with caplog.at_level(logging.WARNING, logger="run_agent"):
+        repaired = AIAgent._sanitize_tool_call_arguments(
+            messages,
+            logger=logging.getLogger("run_agent"),
+            session_id="session-123",
+        )
+
+    assert repaired == 0
+    assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}"
+    assert caplog.records == []
+
+
+def test_non_assistant_messages_ignored():
+    messages = [
+        {"role": "user", "content": "hello", "tool_calls": [_tool_call(arguments='{"bad":')]},
+        {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
+        {"role": "system", "content": "sys", "tool_calls": [_tool_call(arguments='{"bad":')]},
+        None,
+        "not a dict",
+    ]
+    original = copy.deepcopy(messages)
+
+    repaired = AIAgent._sanitize_tool_call_arguments(messages)
+
+    assert repaired == 0
+    assert messages == original
@@ -240,3 +240,69 @@ class TestExchangeAuthCode:
        assert setup_module.TOKEN_PATH.exists()
        # Pending auth is cleaned up
        assert not setup_module.PENDING_AUTH_PATH.exists()
+
+
+class TestHermesConstantsFallback:
+    """Tests for _hermes_home.py fallback when hermes_constants is unavailable."""
+
+    HELPER_PATH = (
+        Path(__file__).resolve().parents[2]
+        / "skills/productivity/google-workspace/scripts/_hermes_home.py"
+    )
+
+    def _load_helper(self, monkeypatch):
+        """Load _hermes_home.py with hermes_constants blocked."""
+        monkeypatch.setitem(sys.modules, "hermes_constants", None)
+        spec = importlib.util.spec_from_file_location("_hermes_home_test", self.HELPER_PATH)
+        module = importlib.util.module_from_spec(spec)
+        assert spec.loader is not None
+        spec.loader.exec_module(module)
+        return module
+
+    def test_fallback_uses_hermes_home_env_var(self, monkeypatch, tmp_path):
+        """When hermes_constants is missing, HERMES_HOME comes from env var."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "custom-hermes"))
+        module = self._load_helper(monkeypatch)
+        assert module.get_hermes_home() == tmp_path / "custom-hermes"
+
+    def test_fallback_defaults_to_dot_hermes(self, monkeypatch):
+        """When hermes_constants is missing and HERMES_HOME unset, default to ~/.hermes."""
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+        module = self._load_helper(monkeypatch)
+        assert module.get_hermes_home() == Path.home() / ".hermes"
+
+    def test_fallback_ignores_empty_hermes_home(self, monkeypatch):
+        """Empty/whitespace HERMES_HOME is treated as unset."""
+        monkeypatch.setenv("HERMES_HOME", "  ")
+        module = self._load_helper(monkeypatch)
+        assert module.get_hermes_home() == Path.home() / ".hermes"
+
+    def test_fallback_display_hermes_home_shortens_path(self, monkeypatch):
+        """Fallback display_hermes_home() uses ~/ shorthand like the real one."""
+        monkeypatch.delenv("HERMES_HOME", raising=False)
+        module = self._load_helper(monkeypatch)
+        assert module.display_hermes_home() == "~/.hermes"
+
+    def test_fallback_display_hermes_home_profile_path(self, monkeypatch):
+        """Fallback display_hermes_home() handles profile paths under ~/."""
+        monkeypatch.setenv("HERMES_HOME", str(Path.home() / ".hermes/profiles/coder"))
+        module = self._load_helper(monkeypatch)
+        assert module.display_hermes_home() == "~/.hermes/profiles/coder"
+
+    def test_fallback_display_hermes_home_custom_path(self, monkeypatch):
+        """Fallback display_hermes_home() returns full path for non-home locations."""
+        monkeypatch.setenv("HERMES_HOME", "/opt/hermes-custom")
+        module = self._load_helper(monkeypatch)
+        assert module.display_hermes_home() == "/opt/hermes-custom"
+
+    def test_delegates_to_hermes_constants_when_available(self):
+        """When hermes_constants IS importable, _hermes_home delegates to it."""
+        spec = importlib.util.spec_from_file_location(
+            "_hermes_home_happy", self.HELPER_PATH
+        )
+        module = importlib.util.module_from_spec(spec)
+        assert spec.loader is not None
+        spec.loader.exec_module(module)
+        import hermes_constants
+        assert module.get_hermes_home is hermes_constants.get_hermes_home
+        assert module.display_hermes_home is hermes_constants.display_hermes_home
@@ -186,3 +186,67 @@ class TestBatchWorkerResumeBehavior:
        assert result["discarded_no_reasoning"] == 1
        assert result["completed_prompts"] == [0]
        assert not batch_file.exists() or batch_file.read_text() == ""
+
+
+class TestFinalCheckpointNoDuplicates:
+    """Regression: the final checkpoint must not contain duplicate prompt
+    indices.
+
+    Before PR #15161, `run()` populated `completed_prompts_set` incrementally
+    as each batch completed, then at the end built `all_completed_prompts =
+    list(completed_prompts_set)` AND extended it again with every batch's
+    `completed_prompts` — double-counting every index.
+    """
+
+    def _simulate_final_aggregation_fixed(self, batch_results):
+        """Mirror the fixed code path in batch_runner.run()."""
+        completed_prompts_set = set()
+        for result in batch_results:
+            completed_prompts_set.update(result.get("completed_prompts", []))
+        # This is what the fixed code now writes to the checkpoint:
+        return sorted(completed_prompts_set)
+
+    def test_no_duplicates_in_final_list(self):
+        batch_results = [
+            {"completed_prompts": [0, 1, 2]},
+            {"completed_prompts": [3, 4]},
+            {"completed_prompts": [5]},
+        ]
+        final = self._simulate_final_aggregation_fixed(batch_results)
+        assert final == [0, 1, 2, 3, 4, 5]
+        assert len(final) == len(set(final))  # no duplicates
+
+    def test_persisted_checkpoint_has_unique_prompts(self, runner):
+        """Write what run()'s fixed aggregation produces to disk; the file
+        must load back with no duplicate indices."""
+        batch_results = [
+            {"completed_prompts": [0, 1]},
+            {"completed_prompts": [2, 3]},
+        ]
+        final = self._simulate_final_aggregation_fixed(batch_results)
+        runner._save_checkpoint({
+            "run_name": runner.run_name,
+            "completed_prompts": final,
+            "batch_stats": {},
+        })
+        loaded = json.loads(runner.checkpoint_file.read_text())
+        cp = loaded["completed_prompts"]
+        assert cp == sorted(set(cp))
+        assert len(cp) == 4
+
+    def test_old_buggy_pattern_would_have_duplicates(self):
+        """Document the bug this PR fixes: the old code shape produced
+        duplicates.  Kept as a sanity anchor so a future refactor that
+        re-introduces the pattern is immediately visible."""
+        completed_prompts_set = set()
+        results = []
+        for batch in ({"completed_prompts": [0, 1, 2]},
+                      {"completed_prompts": [3, 4]}):
+            completed_prompts_set.update(batch["completed_prompts"])
+            results.append(batch)
+        # Buggy aggregation (pre-fix):
+        buggy = list(completed_prompts_set)
+        for br in results:
+            buggy.extend(br.get("completed_prompts", []))
+        # Every index appears twice
+        assert len(buggy) == 2 * len(set(buggy))
@@ -231,3 +231,46 @@ class TestBackwardCompat:
    def test_tool_to_toolset_map(self):
        assert isinstance(TOOL_TO_TOOLSET_MAP, dict)
        assert len(TOOL_TO_TOOLSET_MAP) > 0
+
+
+# =========================================================================
+# _coerce_number — inf / nan must fall through to the original string
+# (regression: fix: eliminate duplicate checkpoint entries and JSON-unsafe coercion)
+# =========================================================================
+
+class TestCoerceNumberInfNan:
+    """_coerce_number must honor its documented contract ("Returns original
+    string on failure") for inf/nan inputs, because float('inf') and
+    float('nan') are not JSON-compliant under strict serialization."""
+
+    def test_inf_returns_original_string(self):
+        from model_tools import _coerce_number
+        assert _coerce_number("inf") == "inf"
+
+    def test_negative_inf_returns_original_string(self):
+        from model_tools import _coerce_number
+        assert _coerce_number("-inf") == "-inf"
+
+    def test_nan_returns_original_string(self):
+        from model_tools import _coerce_number
+        assert _coerce_number("nan") == "nan"
+
+    def test_infinity_spelling_returns_original_string(self):
+        from model_tools import _coerce_number
+        # Python's float() parses "Infinity" too — still not JSON-safe.
+        assert _coerce_number("Infinity") == "Infinity"
+
+    def test_coerced_result_is_strict_json_safe(self):
+        """Whatever _coerce_number returns for inf/nan must round-trip
+        through strict (allow_nan=False) json.dumps without raising."""
+        from model_tools import _coerce_number
+        for s in ("inf", "-inf", "nan", "Infinity"):
+            result = _coerce_number(s)
+            json.dumps({"x": result}, allow_nan=False)  # must not raise
+
+    def test_normal_numbers_still_coerce(self):
+        """Guard against over-correction — real numbers still coerce."""
+        from model_tools import _coerce_number
+        assert _coerce_number("42") == 42
+        assert _coerce_number("3.14") == 3.14
+        assert _coerce_number("1e3") == 1000
@@ -60,6 +60,22 @@ class TestWrapCommand:
        assert "cd ~" in wrapped
        assert "cd '~'" not in wrapped

+    def test_tilde_subpath_with_spaces_uses_home_and_quotes_suffix(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("ls", "~/my repo")
+
+        assert "cd $HOME/'my repo'" in wrapped
+        assert "cd ~/my repo" not in wrapped
+
+    def test_tilde_slash_maps_to_home(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("ls", "~/")
+
+        assert "cd $HOME" in wrapped
+        assert "cd ~/" not in wrapped
+
    def test_cd_failure_exit_126(self):
        env = _TestableEnv()
        env._snapshot_ready = True
@@ -1319,6 +1319,112 @@ class TestDelegateHeartbeat(unittest.TestCase):
            any("API call #5 completed" in desc for desc in touch_calls),
            f"Heartbeat should include last_activity_desc: {touch_calls}")

+    def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self):
+        """A long-running tool (no iteration advance, but current_tool set)
+        must not be flagged stale at the idle threshold.
+
+        Bug #13041: when a child is legitimately busy inside a slow tool
+        (terminal command, browser fetch), api_call_count does not advance.
+        The previous stale check treated this as idle and stopped the
+        heartbeat after 5 cycles (~150s), letting the gateway kill the
+        session. The fix uses a much higher in-tool threshold and only
+        applies the tight idle threshold when current_tool is None.
+        """
+        from tools.delegate_tool import _run_single_child
+
+        parent = _make_mock_parent()
+        touch_calls = []
+        parent._touch_activity = lambda desc: touch_calls.append(desc)
+
+        child = MagicMock()
+        # Child is stuck inside a single terminal call for the whole run.
+        # api_call_count never advances, current_tool is always set.
+        child.get_activity_summary.return_value = {
+            "current_tool": "terminal",
+            "api_call_count": 1,
+            "max_iterations": 50,
+            "last_activity_desc": "executing tool: terminal",
+        }
+
+        def slow_run(**kwargs):
+            # Long enough to exceed the OLD idle threshold (5 cycles) at
+            # the patched interval, but shorter than the new in-tool
+            # threshold.
+            time.sleep(0.4)
+            return {"final_response": "done", "completed": True, "api_calls": 1}
+
+        child.run_conversation.side_effect = slow_run
+
+        # Patch both the interval AND the idle ceiling so the test proves
+        # the in-tool branch takes effect: with a 0.05s interval and the
+        # default _HEARTBEAT_STALE_CYCLES_IDLE=5, the old behavior would
+        # trip after 0.25s and stop firing. We should see heartbeats
+        # continuing through the full 0.4s run.
+        with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
+            _run_single_child(
+                task_index=0,
+                goal="Test long-running tool",
+                child=child,
+                parent_agent=parent,
+            )
+
+        # With the old idle threshold (5 cycles = 0.25s), touch_calls
+        # would cap at ~5. With the in-tool threshold (20 cycles = 1.0s),
+        # we should see substantially more heartbeats over 0.4s.
+        self.assertGreater(
+            len(touch_calls), 6,
+            f"Heartbeat stopped too early while child was inside a tool; "
+            f"got {len(touch_calls)} touches over 0.4s at 0.05s interval",
+        )
+
+    def test_heartbeat_still_trips_idle_stale_when_no_tool(self):
+        """A wedged child with no current_tool still trips the idle threshold.
+
+        Regression guard: the fix for #13041 must not disable stale
+        detection entirely. A child that's hung between turns (no tool
+        running, no iteration progress) must still stop touching the
+        parent so the gateway timeout can fire.
+        """
+        from tools.delegate_tool import _run_single_child
+
+        parent = _make_mock_parent()
+        touch_calls = []
+        parent._touch_activity = lambda desc: touch_calls.append(desc)
+
+        child = MagicMock()
+        # Wedged child: no tool running, iteration frozen.
+        child.get_activity_summary.return_value = {
+            "current_tool": None,
+            "api_call_count": 3,
+            "max_iterations": 50,
+            "last_activity_desc": "waiting for API response",
+        }
+
+        def slow_run(**kwargs):
+            time.sleep(0.6)
+            return {"final_response": "done", "completed": True, "api_calls": 3}
+
+        child.run_conversation.side_effect = slow_run
+
+        # At interval 0.05s, idle threshold (5 cycles) trips at ~0.25s.
+        # We should see the heartbeat stop firing well before 0.6s.
+        with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
+            _run_single_child(
+                task_index=0,
+                goal="Test wedged child",
+                child=child,
+                parent_agent=parent,
+            )
+
+        # With idle threshold=5 + interval=0.05s, touches should cap
+        # around 5. Bound loosely to avoid timing flakes.
+        self.assertLess(
+            len(touch_calls), 9,
+            f"Idle stale detection did not fire: got {len(touch_calls)} "
+            f"touches over 0.6s — expected heartbeat to stop after "
+            f"~5 stale cycles",
+        )
+

 class TestDelegationReasoningEffort(unittest.TestCase):
    """Tests for delegation.reasoning_effort config override."""
@@ -347,6 +347,70 @@ class TestSkillView:
        assert result["name"] == "my-skill"
        assert "Step 1" in result["content"]

+    def test_skill_view_applies_template_vars(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_preprocessing.load_skills_config",
+                return_value={"template_vars": True, "inline_shell": False},
+            ),
+        ):
+            skill_dir = _make_skill(
+                tmp_path,
+                "templated",
+                body="Run ${HERMES_SKILL_DIR}/scripts/do.sh in ${HERMES_SESSION_ID}",
+            )
+            raw = skill_view("templated", task_id="session-123")
+
+        result = json.loads(raw)
+        assert result["success"] is True
+        assert f"Run {skill_dir}/scripts/do.sh in session-123" in result["content"]
+        assert "${HERMES_SKILL_DIR}" not in result["content"]
+
+    def test_skill_view_applies_inline_shell_when_enabled(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_preprocessing.load_skills_config",
+                return_value={
+                    "template_vars": True,
+                    "inline_shell": True,
+                    "inline_shell_timeout": 5,
+                },
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "dynamic",
+                body="Current date: !`printf 2026-04-24`",
+            )
+            raw = skill_view("dynamic")
+
+        result = json.loads(raw)
+        assert result["success"] is True
+        assert "Current date: 2026-04-24" in result["content"]
+        assert "!`printf 2026-04-24`" not in result["content"]
+
+    def test_skill_view_leaves_inline_shell_literal_when_disabled(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_preprocessing.load_skills_config",
+                return_value={"template_vars": True, "inline_shell": False},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "static",
+                body="Current date: !`printf SHOULD_NOT_RUN`",
+            )
+            raw = skill_view("static")
+
+        result = json.loads(raw)
+        assert result["success"] is True
+        assert "Current date: !`printf SHOULD_NOT_RUN`" in result["content"]
+        assert "Current date: SHOULD_NOT_RUN" not in result["content"]
+
    def test_view_nonexistent_skill(self, tmp_path):
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
            _make_skill(tmp_path, "other-skill")
@@ -27,16 +27,22 @@ def test_make_agent_passes_resolved_provider():
        "agent": {"system_prompt": "test"},
    }

-    with patch("tui_gateway.server._load_cfg", return_value=fake_cfg), \
-         patch("tui_gateway.server._get_db", return_value=MagicMock()), \
-         patch("tui_gateway.server._load_tool_progress_mode", return_value="compact"), \
-         patch("tui_gateway.server._load_reasoning_config", return_value=None), \
-         patch("tui_gateway.server._load_service_tier", return_value=None), \
-         patch("tui_gateway.server._load_enabled_toolsets", return_value=None), \
-         patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_runtime) as mock_resolve, \
-         patch("run_agent.AIAgent") as mock_agent:
+    with (
+        patch("tui_gateway.server._load_cfg", return_value=fake_cfg),
+        patch("tui_gateway.server._get_db", return_value=MagicMock()),
+        patch("tui_gateway.server._load_tool_progress_mode", return_value="compact"),
+        patch("tui_gateway.server._load_reasoning_config", return_value=None),
+        patch("tui_gateway.server._load_service_tier", return_value=None),
+        patch("tui_gateway.server._load_enabled_toolsets", return_value=None),
+        patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            return_value=fake_runtime,
+        ) as mock_resolve,
+        patch("run_agent.AIAgent") as mock_agent,
+    ):

        from tui_gateway.server import _make_agent
+
        _make_agent("sid-1", "key-1")

        mock_resolve.assert_called_once_with(requested=None)
@@ -46,3 +52,136 @@ def test_make_agent_passes_resolved_provider():
        assert call_kwargs.kwargs["base_url"] == "https://api.anthropic.com"
        assert call_kwargs.kwargs["api_key"] == "sk-test-key"
        assert call_kwargs.kwargs["api_mode"] == "anthropic_messages"
+
+
+def test_make_agent_ignores_display_personality_without_system_prompt():
+    """The TUI matches the classic CLI: personality only becomes active once
+    it has been saved to agent.system_prompt."""
+
+    fake_runtime = {
+        "provider": "openrouter",
+        "base_url": "https://api.synthetic.new/v1",
+        "api_key": "sk-test",
+        "api_mode": "chat_completions",
+        "command": None,
+        "args": None,
+        "credential_pool": None,
+    }
+    fake_cfg = {
+        "agent": {
+            "system_prompt": "",
+            "personalities": {"kawaii": "sparkle system prompt"},
+        },
+        "display": {"personality": "kawaii"},
+        "model": {"default": "glm-5"},
+    }
+
+    with (
+        patch("tui_gateway.server._load_cfg", return_value=fake_cfg),
+        patch("tui_gateway.server._get_db", return_value=MagicMock()),
+        patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            return_value=fake_runtime,
+        ),
+        patch("run_agent.AIAgent") as mock_agent,
+    ):
+        from tui_gateway.server import _make_agent
+
+        _make_agent("sid-default-personality", "key-default-personality")
+
+        assert mock_agent.call_args.kwargs["ephemeral_system_prompt"] is None
+
+
+def test_probe_config_health_flags_null_sections():
+    """Bare YAML keys (`agent:` with no value) parse as None and silently
+    drop nested settings; probe must surface them so users can fix."""
+    from tui_gateway.server import _probe_config_health
+
+    assert _probe_config_health({"agent": {"x": 1}}) == ""
+    assert _probe_config_health({}) == ""
+
+    msg = _probe_config_health({"agent": None, "display": None, "model": {}})
+    assert "agent" in msg and "display" in msg
+    assert "model" not in msg
+
+
+def test_probe_config_health_flags_null_personalities_with_active_personality():
+    from tui_gateway.server import _probe_config_health
+
+    msg = _probe_config_health(
+        {
+            "agent": {"personalities": None},
+            "display": {"personality": "kawaii"},
+            "model": {},
+        }
+    )
+    assert "display.personality" in msg
+    assert "agent.personalities" in msg
+
+
+def test_make_agent_tolerates_null_config_sections():
+    """Bare `agent:` / `display:` keys in ~/.hermes/config.yaml parse as
+    None. cfg.get("agent", {}) returns None (default only fires on missing
+    key), so downstream .get() chains must be guarded. Reported via Twitter
+    against the new TUI."""
+
+    fake_runtime = {
+        "provider": "openrouter",
+        "base_url": "https://api.synthetic.new/v1",
+        "api_key": "sk-test",
+        "api_mode": "chat_completions",
+        "command": None,
+        "args": None,
+        "credential_pool": None,
+    }
+    null_cfg = {"agent": None, "display": None, "model": {"default": "glm-5"}}
+
+    with (
+        patch("tui_gateway.server._load_cfg", return_value=null_cfg),
+        patch("tui_gateway.server._get_db", return_value=MagicMock()),
+        patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            return_value=fake_runtime,
+        ),
+        patch("run_agent.AIAgent") as mock_agent,
+    ):
+
+        from tui_gateway.server import _make_agent
+
+        _make_agent("sid-null", "key-null")
+
+        assert mock_agent.called
+
+
+def test_make_agent_tolerates_null_personalities_with_active_personality():
+    fake_runtime = {
+        "provider": "openrouter",
+        "base_url": "https://api.synthetic.new/v1",
+        "api_key": "sk-test",
+        "api_mode": "chat_completions",
+        "command": None,
+        "args": None,
+        "credential_pool": None,
+    }
+    cfg = {
+        "agent": {"personalities": None},
+        "display": {"personality": "kawaii"},
+        "model": {"default": "glm-5"},
+    }
+
+    with (
+        patch("tui_gateway.server._load_cfg", return_value=cfg),
+        patch("tui_gateway.server._get_db", return_value=MagicMock()),
+        patch("cli.load_cli_config", return_value={"agent": {"personalities": None}}),
+        patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            return_value=fake_runtime,
+        ),
+        patch("run_agent.AIAgent") as mock_agent,
+    ):
+        from tui_gateway.server import _make_agent
+
+        _make_agent("sid-null-personality", "key-null-personality")
+
+        assert mock_agent.called
+        assert mock_agent.call_args.kwargs["ephemeral_system_prompt"] is None
@@ -411,9 +411,15 @@ def _preserve_parent_mcp_toolsets(
 DEFAULT_MAX_ITERATIONS = 50
 DEFAULT_CHILD_TIMEOUT = 600  # seconds before a child agent is considered stuck
 _HEARTBEAT_INTERVAL = 30  # seconds between parent activity heartbeats during delegation
-_HEARTBEAT_STALE_CYCLES = (
-    5  # mark child stale after this many heartbeats with no iteration progress
-)
+# Stale-heartbeat thresholds. A child with no API-call progress is either:
+#   - idle between turns (no current_tool) — probably stuck on a slow API call
+#   - inside a tool (current_tool set) — probably running a legitimately long
+#     operation (terminal command, web fetch, large file read)
+# The idle ceiling stays tight so genuinely stuck children don't mask the gateway
+# timeout. The in-tool ceiling is much higher so legit long-running tools get
+# time to finish; child_timeout_seconds (default 600s) is still the hard cap.
+_HEARTBEAT_STALE_CYCLES_IDLE = 5  # 5 * 30s = 150s idle between turns → stale
+_HEARTBEAT_STALE_CYCLES_IN_TOOL = 20  # 20 * 30s = 600s stuck on same tool → stale
 DEFAULT_TOOLSETS = ["terminal", "file", "web"]


@@ -1201,7 +1207,11 @@ def _run_single_child(
    # Without this, the parent's _last_activity_ts freezes when delegate_task
    # starts and the gateway eventually kills the agent for "no activity".
    _heartbeat_stop = threading.Event()
-    _last_seen_iter = [0]  # mutable container for heartbeat stale detection
+    # Stale detection: track the child's (tool, iteration) pair across
+    # heartbeat cycles. If neither advances, count the cycle as stale.
+    # Different thresholds for idle vs in-tool (see _HEARTBEAT_STALE_CYCLES_*).
+    _last_seen_iter = [0]
+    _last_seen_tool = [None]  # type: list
    _stale_count = [0]

    def _heartbeat_loop():
@@ -1219,22 +1229,38 @@ def _run_single_child(
                child_iter = child_summary.get("api_call_count", 0)
                child_max = child_summary.get("max_iterations", 0)

-                # Stale detection: if iteration count hasn't advanced,
-                # increment stale counter.  After N cycles with no
-                # progress, stop masking the hang so the gateway
-                # inactivity timeout can fire as a last resort.
-                if child_iter <= _last_seen_iter[0]:
-                    _stale_count[0] += 1
-                else:
+                # Stale detection: count cycles where neither the iteration
+                # count nor the current_tool advances. A child running a
+                # legitimately long-running tool (terminal command, web
+                # fetch) keeps current_tool set but doesn't advance
+                # api_call_count — we don't want that to look stale at the
+                # idle threshold.
+                iter_advanced = child_iter > _last_seen_iter[0]
+                tool_changed = child_tool != _last_seen_tool[0]
+                if iter_advanced or tool_changed:
                    _last_seen_iter[0] = child_iter
+                    _last_seen_tool[0] = child_tool
                    _stale_count[0] = 0
+                else:
+                    _stale_count[0] += 1

-                if _stale_count[0] >= _HEARTBEAT_STALE_CYCLES:
+                # Pick threshold based on whether the child is currently
+                # inside a tool call. In-tool threshold is high enough to
+                # cover legitimately slow tools; idle threshold stays
+                # tight so the gateway timeout can fire on a truly wedged
+                # child.
+                stale_limit = (
+                    _HEARTBEAT_STALE_CYCLES_IN_TOOL
+                    if child_tool
+                    else _HEARTBEAT_STALE_CYCLES_IDLE
+                )
+                if _stale_count[0] >= stale_limit:
                    logger.warning(
-                        "Subagent %d appears stale (no iteration progress "
-                        "for %d heartbeat cycles) — stopping heartbeat",
+                        "Subagent %d appears stale (no progress for %d "
+                        "heartbeat cycles, tool=%s) — stopping heartbeat",
                        task_index,
                        _stale_count[0],
+                        child_tool or "<none>",
                    )
                    break  # stop touching parent, let gateway timeout fire

@@ -368,6 +368,17 @@ class BaseEnvironment(ABC):
    # Command wrapping
    # ------------------------------------------------------------------

+    @staticmethod
+    def _quote_cwd_for_cd(cwd: str) -> str:
+        """Quote a ``cd`` target while preserving ``~`` expansion."""
+        if cwd == "~":
+            return cwd
+        if cwd == "~/":
+            return "$HOME"
+        if cwd.startswith("~/"):
+            return f"$HOME/{shlex.quote(cwd[2:])}"
+        return shlex.quote(cwd)
+
    def _wrap_command(self, command: str, cwd: str) -> str:
        """Build the full bash script that sources snapshot, cd's, runs command,
        re-dumps env vars, and emits CWD markers."""
@@ -379,10 +390,9 @@ class BaseEnvironment(ABC):
        if self._snapshot_ready:
            parts.append(f"source {self._snapshot_path} 2>/dev/null || true")

-        # cd to working directory — let bash expand ~ natively
-        quoted_cwd = (
-            shlex.quote(cwd) if cwd != "~" and not cwd.startswith("~/") else cwd
-        )
+        # Preserve bare ``~`` expansion, but rewrite ``~/...`` through
+        # ``$HOME`` so suffixes with spaces remain a single shell word.
+        quoted_cwd = self._quote_cwd_for_cd(cwd)
        parts.append(f"builtin cd {quoted_cwd} || exit 126")

        # Run the actual command
@@ -743,6 +743,9 @@ def _serve_plugin_skill(
    skill_md: Path,
    namespace: str,
    bare: str,
+    *,
+    preprocess: bool = True,
+    session_id: str | None = None,
 ) -> str:
    """Read a plugin-provided skill, apply guards, return JSON."""
    from hermes_cli.plugins import _get_disabled_plugins, get_plugin_manager
@@ -812,11 +815,26 @@ def _serve_plugin_skill(
    except Exception:
        banner = ""

+    rendered_content = content
+    if preprocess:
+        try:
+            from agent.skill_preprocessing import preprocess_skill_content
+
+            rendered_content = preprocess_skill_content(
+                content,
+                skill_md.parent,
+                session_id=session_id,
+            )
+        except Exception:
+            logger.debug(
+                "Could not preprocess plugin skill %s:%s", namespace, bare, exc_info=True
+            )
+
    return json.dumps(
        {
            "success": True,
            "name": f"{namespace}:{bare}",
-            "content": f"{banner}{content}" if banner else content,
+            "content": f"{banner}{rendered_content}" if banner else rendered_content,
            "description": description,
            "linked_files": None,
            "readiness_status": SkillReadinessStatus.AVAILABLE.value,
@@ -825,7 +843,12 @@ def _serve_plugin_skill(
    )


-def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
+def skill_view(
+    name: str,
+    file_path: str = None,
+    task_id: str = None,
+    preprocess: bool = True,
+) -> str:
    """
    View the content of a skill or a specific file within a skill directory.

@@ -834,6 +857,9 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
            Qualified names like "plugin:skill" resolve to plugin-provided skills.
        file_path: Optional path to a specific file within the skill (e.g., "references/api.md")
        task_id: Optional task identifier used to probe the active backend
+        preprocess: Apply configured SKILL.md template and inline shell rendering
+            to main skill content. Internal slash/preload callers disable this
+            because they render the skill message themselves.

    Returns:
        JSON string with skill content or error message
@@ -879,7 +905,13 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
                        },
                        ensure_ascii=False,
                    )
-                return _serve_plugin_skill(plugin_skill_md, namespace, bare)
+                return _serve_plugin_skill(
+                    plugin_skill_md,
+                    namespace,
+                    bare,
+                    preprocess=preprocess,
+                    session_id=task_id,
+                )

            # Plugin exists but this specific skill is missing?
            available = pm.list_plugin_skills(namespace)
@@ -1280,13 +1312,28 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
                    exc_info=True,
                )

+        rendered_content = content
+        if preprocess:
+            try:
+                from agent.skill_preprocessing import preprocess_skill_content
+
+                rendered_content = preprocess_skill_content(
+                    content,
+                    skill_dir,
+                    session_id=task_id,
+                )
+            except Exception:
+                logger.debug(
+                    "Could not preprocess skill content for %s", skill_name, exc_info=True
+                )
+
        result = {
            "success": True,
            "name": skill_name,
            "description": frontmatter.get("description", ""),
            "tags": tags,
            "related_skills": related_skills,
-            "content": content,
+            "content": rendered_content,
            "path": rel_path,
            "skill_dir": str(skill_dir) if skill_dir else None,
            "linked_files": linked_files if linked_files else None,
@@ -5,7 +5,28 @@ import sys
 import time
 import traceback

+from tui_gateway import server
 from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
+from tui_gateway.transport import TeeTransport
+
+
+def _install_sidecar_publisher() -> None:
+    """Mirror every dispatcher emit to the dashboard sidebar via WS.
+
+    Activated by `HERMES_TUI_SIDECAR_URL`, set by the dashboard's
+    ``/api/pty`` endpoint when a chat tab passes a ``channel`` query param.
+    Best-effort: connect failure or runtime drop falls back to stdio-only.
+    """
+    url = os.environ.get("HERMES_TUI_SIDECAR_URL")
+
+    if not url:
+        return
+
+    from tui_gateway.event_publisher import WsPublisherTransport
+
+    server._stdio_transport = TeeTransport(
+        server._stdio_transport, WsPublisherTransport(url)
+    )


 def _log_signal(signum: int, frame) -> None:
@@ -82,6 +103,8 @@ def _log_exit(reason: str) -> None:


 def main():
+    _install_sidecar_publisher()
+
    if not write_json({
        "jsonrpc": "2.0",
        "method": "event",
@@ -0,0 +1,126 @@
+"""Best-effort WebSocket publisher transport for the PTY-side gateway.
+
+The dashboard's `/api/pty` spawns `hermes --tui` as a child process, which
+spawns its own ``tui_gateway.entry``.  Tool/reasoning/status events fire on
+*that* gateway's transport — three processes removed from the dashboard
+server itself.  To surface them in the dashboard sidebar (`/api/events`),
+the PTY-side gateway opens a back-WS to the dashboard at startup and
+mirrors every emit through this transport.
+
+Wire protocol: newline-framed JSON dicts (the same shape the dispatcher
+already passes to ``write``).  No JSON-RPC envelope here — the dashboard's
+``/api/pub`` endpoint just rebroadcasts the bytes verbatim to subscribers.
+
+Failure mode: silent.  The agent loop must never block waiting for the
+sidecar to drain.  A dead WS short-circuits all subsequent writes.
+Actual ``send`` calls run on a daemon thread so the TeeTransport's
+``write`` returns after enqueueing (best-effort; drop when the queue is full).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import queue
+import threading
+from typing import Optional
+
+try:
+    from websockets.sync.client import connect as ws_connect
+except ImportError:  # pragma: no cover - websockets is a required install path
+    ws_connect = None  # type: ignore[assignment]
+
+_log = logging.getLogger(__name__)
+
+_DRAIN_STOP = object()
+
+_QUEUE_MAX = 256
+
+
+class WsPublisherTransport:
+    __slots__ = ("_url", "_lock", "_ws", "_dead", "_q", "_worker")
+
+    def __init__(self, url: str, *, connect_timeout: float = 2.0) -> None:
+        self._url = url
+        self._lock = threading.Lock()
+        self._ws: Optional[object] = None
+        self._dead = False
+        self._q: queue.Queue[object] = queue.Queue(maxsize=_QUEUE_MAX)
+        self._worker: Optional[threading.Thread] = None
+
+        if ws_connect is None:
+            self._dead = True
+
+            return
+
+        try:
+            self._ws = ws_connect(url, open_timeout=connect_timeout, max_size=None)
+        except Exception as exc:
+            _log.debug("event publisher connect failed: %s", exc)
+            self._dead = True
+            self._ws = None
+
+            return
+
+        self._worker = threading.Thread(
+            target=self._drain,
+            name="hermes-ws-pub",
+            daemon=True,
+        )
+        self._worker.start()
+
+    def _drain(self) -> None:
+        while True:
+            item = self._q.get()
+            if item is _DRAIN_STOP:
+                return
+            if not isinstance(item, str):
+                continue
+            if self._ws is None:
+                continue
+            try:
+                with self._lock:
+                    if self._ws is not None:
+                        self._ws.send(item)  # type: ignore[union-attr]
+            except Exception as exc:
+                _log.debug("event publisher write failed: %s", exc)
+                self._dead = True
+                self._ws = None
+
+    def write(self, obj: dict) -> bool:
+        if self._dead or self._ws is None or self._worker is None:
+            return False
+
+        line = json.dumps(obj, ensure_ascii=False)
+
+        try:
+            self._q.put_nowait(line)
+
+            return True
+        except queue.Full:
+            return False
+
+    def close(self) -> None:
+        self._dead = True
+        w = self._worker
+        if w is not None and w.is_alive():
+            try:
+                self._q.put_nowait(_DRAIN_STOP)
+            except queue.Full:
+                # Best-effort: if the queue is wedged, the daemon thread
+                # will be torn down with the process.
+                pass
+            w.join(timeout=3.0)
+        self._worker = None
+
+        if self._ws is None:
+            return
+
+        try:
+            with self._lock:
+                if self._ws is not None:
+                    self._ws.close()  # type: ignore[union-attr]
+        except Exception:
+            pass
+
+        self._ws = None
@@ -1,5 +1,6 @@
 import atexit
 import concurrent.futures
+import contextvars
 import copy
 import json
 import logging
@@ -12,9 +13,17 @@ import time
 import uuid
 from datetime import datetime
 from pathlib import Path
+from typing import Optional

 from hermes_constants import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
+from tui_gateway.transport import (
+    StdioTransport,
+    Transport,
+    bind_transport,
+    current_transport,
+    reset_transport,
+)

 logger = logging.getLogger(__name__)

@@ -52,7 +61,11 @@ def _panic_hook(exc_type, exc_value, exc_tb):
    # Stderr goes through to the TUI as a gateway.stderr Activity line —
    # the first line here is what the user will see without opening any
    # log files.  Rest of the stack is still in the log for full context.
-    first = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else exc_type.__name__
+    first = (
+        str(exc_value).strip().splitlines()[0]
+        if str(exc_value).strip()
+        else exc_type.__name__
+    )
    print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True)
    # Chain to the default hook so the process still terminates normally.
    sys.__excepthook__(exc_type, exc_value, exc_tb)
@@ -147,6 +160,11 @@ atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))
 _real_stdout = sys.stdout
 sys.stdout = sys.stderr

+# Module-level stdio transport — fallback sink when no transport is bound via
+# contextvar or session. Stream resolved through a lambda so runtime monkey-
+# patches of `_real_stdout` (used extensively in tests) still land correctly.
+_stdio_transport = StdioTransport(lambda: _real_stdout, _stdout_lock)
+

 class _SlashWorker:
    """Persistent HermesCLI subprocess for slash commands."""
@@ -266,14 +284,24 @@ def _db_unavailable_error(rid, *, code: int):


 def write_json(obj: dict) -> bool:
-    line = json.dumps(obj, ensure_ascii=False) + "\n"
-    try:
-        with _stdout_lock:
-            _real_stdout.write(line)
-            _real_stdout.flush()
-        return True
-    except BrokenPipeError:
-        return False
+    """Emit one JSON frame. Routes via the most-specific transport available.
+
+    Precedence:
+
+    1. Event frames with a session id → the transport stored on that session,
+       so async events land with the client that owns the session even if
+       the emitting thread has no contextvar binding.
+    2. Otherwise the transport bound on the current context (set by
+       :func:`dispatch` for the lifetime of a request).
+    3. Otherwise the module-level stdio transport, matching the historical
+       behaviour and keeping tests that monkey-patch ``_real_stdout`` green.
+    """
+    if obj.get("method") == "event":
+        sid = ((obj.get("params") or {}).get("session_id")) or ""
+        if sid and (t := (_sessions.get(sid) or {}).get("transport")) is not None:
+            return t.write(obj)
+
+    return (current_transport() or _stdio_transport).write(obj)


 def _emit(event: str, sid: str, payload: dict | None = None):
@@ -343,27 +371,40 @@ def handle_request(req: dict) -> dict | None:
    return fn(req.get("id"), req.get("params", {}))


-def dispatch(req: dict) -> dict | None:
+def dispatch(req: dict, transport: Optional[Transport] = None) -> dict | None:
    """Route inbound RPCs — long handlers to the pool, everything else inline.

    Returns a response dict when handled inline. Returns None when the
-    handler was scheduled on the pool; the worker writes its own
-    response via write_json when done.
+    handler was scheduled on the pool; the worker writes its own response
+    via the bound transport when done.
+
+    *transport* (optional): pins every write produced by this request —
+    including any events emitted by the handler — to the given transport.
+    Omitting it falls back to the module-level stdio transport, preserving
+    the original behaviour for ``tui_gateway.entry``.
    """
-    if req.get("method") not in _LONG_HANDLERS:
-        return handle_request(req)
+    t = transport or _stdio_transport
+    token = bind_transport(t)
+    try:
+        if req.get("method") not in _LONG_HANDLERS:
+            return handle_request(req)

-    def run():
-        try:
-            resp = handle_request(req)
-        except Exception as exc:
-            resp = _err(req.get("id"), -32000, f"handler error: {exc}")
-        if resp is not None:
-            write_json(resp)
+        # Snapshot the context so the pool worker sees the bound transport.
+        ctx = contextvars.copy_context()

-    _pool.submit(run)
+        def run():
+            try:
+                resp = handle_request(req)
+            except Exception as exc:
+                resp = _err(req.get("id"), -32000, f"handler error: {exc}")
+            if resp is not None:
+                t.write(resp)

-    return None
+        _pool.submit(lambda: ctx.run(run))
+
+        return None
+    finally:
+        reset_transport(token)


 def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
@@ -556,13 +597,17 @@ def _coerce_statusbar(raw) -> str:
 def _load_reasoning_config() -> dict | None:
    from hermes_constants import parse_reasoning_effort

-    effort = str(_load_cfg().get("agent", {}).get("reasoning_effort", "") or "").strip()
+    effort = str(
+        (_load_cfg().get("agent") or {}).get("reasoning_effort", "") or ""
+    ).strip()
    return parse_reasoning_effort(effort)


 def _load_service_tier() -> str | None:
    raw = (
-        str(_load_cfg().get("agent", {}).get("service_tier", "") or "").strip().lower()
+        str((_load_cfg().get("agent") or {}).get("service_tier", "") or "")
+        .strip()
+        .lower()
    )
    if not raw or raw in {"normal", "default", "standard", "off", "none"}:
        return None
@@ -572,11 +617,11 @@ def _load_service_tier() -> str | None:


 def _load_show_reasoning() -> bool:
-    return bool(_load_cfg().get("display", {}).get("show_reasoning", False))
+    return bool((_load_cfg().get("display") or {}).get("show_reasoning", False))


 def _load_tool_progress_mode() -> str:
-    raw = _load_cfg().get("display", {}).get("tool_progress", "all")
+    raw = (_load_cfg().get("display") or {}).get("tool_progress", "all")
    if raw is False:
        return "off"
    if raw is True:
@@ -779,6 +824,39 @@ def _probe_credentials(agent) -> str:
    return ""


+def _probe_config_health(cfg: dict) -> str:
+    """Flag bare YAML keys (`agent:` with no value → None) that silently
+    drop nested settings. Returns warning or ''."""
+    if not isinstance(cfg, dict):
+        return ""
+    warnings: list[str] = []
+    null_keys = sorted(k for k, v in cfg.items() if v is None)
+    if not null_keys:
+        pass
+    else:
+        keys = ", ".join(f"`{k}`" for k in null_keys)
+        warnings.append(
+            f"config.yaml has empty section(s): {keys}. "
+            f"Remove the line(s) or set them to `{{}}` — "
+            f"empty sections silently drop nested settings."
+        )
+    display_cfg = cfg.get("display")
+    agent_cfg = cfg.get("agent")
+    if isinstance(display_cfg, dict):
+        personality = str(display_cfg.get("personality", "") or "").strip().lower()
+        if (
+            personality
+            and personality not in {"default", "none", "neutral"}
+            and isinstance(agent_cfg, dict)
+            and agent_cfg.get("personalities") is None
+        ):
+            warnings.append(
+                "`display.personality` is set but `agent.personalities` is empty/null; "
+                "personality overlay will be skipped."
+            )
+    return " ".join(warnings).strip()
+
+
 def _session_info(agent) -> dict:
    info: dict = {
        "model": getattr(agent, "model", ""),
@@ -1065,28 +1143,6 @@ def _wire_callbacks(sid: str):
    set_secret_capture_callback(secret_cb)


-def _resolve_personality_prompt(cfg: dict) -> str:
-    """Resolve the active personality into a system prompt string."""
-    name = (cfg.get("display", {}).get("personality", "") or "").strip().lower()
-    if not name or name in ("default", "none", "neutral"):
-        return ""
-    try:
-        from cli import load_cli_config
-
-        personalities = load_cli_config().get("agent", {}).get("personalities", {})
-    except Exception:
-        try:
-            from hermes_cli.config import load_config as _load_full_cfg
-
-            personalities = _load_full_cfg().get("agent", {}).get("personalities", {})
-        except Exception:
-            personalities = cfg.get("agent", {}).get("personalities", {})
-    pval = personalities.get(name)
-    if pval is None:
-        return ""
-    return _render_personality_prompt(pval)
-
-
 def _render_personality_prompt(value) -> str:
    if isinstance(value, dict):
        parts = [value.get("system_prompt", "")]
@@ -1102,15 +1158,15 @@ def _available_personalities(cfg: dict | None = None) -> dict:
    try:
        from cli import load_cli_config

-        return load_cli_config().get("agent", {}).get("personalities", {}) or {}
+        return (load_cli_config().get("agent") or {}).get("personalities", {}) or {}
    except Exception:
        try:
            from hermes_cli.config import load_config as _load_full_cfg

-            return _load_full_cfg().get("agent", {}).get("personalities", {}) or {}
+            return (_load_full_cfg().get("agent") or {}).get("personalities", {}) or {}
        except Exception:
            cfg = cfg or _load_cfg()
-            return cfg.get("agent", {}).get("personalities", {}) or {}
+            return (cfg.get("agent") or {}).get("personalities", {}) or {}


 def _validate_personality(value: str, cfg: dict | None = None) -> tuple[str, str]:
@@ -1220,9 +1276,7 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
    from hermes_cli.runtime_provider import resolve_runtime_provider

    cfg = _load_cfg()
-    system_prompt = cfg.get("agent", {}).get("system_prompt", "") or ""
-    if not system_prompt:
-        system_prompt = _resolve_personality_prompt(cfg)
+    system_prompt = ((cfg.get("agent") or {}).get("system_prompt", "") or "").strip()
    runtime = resolve_runtime_provider(requested=None)
    return AIAgent(
        model=_resolve_model(),
@@ -1262,6 +1316,9 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80):
        "tool_progress_mode": _load_tool_progress_mode(),
        "edit_snapshots": {},
        "tool_started_at": {},
+        # Pin async event emissions to whichever transport created the
+        # session (stdio for Ink, JSON-RPC WS for the dashboard sidebar).
+        "transport": current_transport() or _stdio_transport,
    }
    try:
        _sessions[sid]["slash_worker"] = _SlashWorker(
@@ -1404,6 +1461,7 @@ def _(rid, params: dict) -> dict:
        "slash_worker": None,
        "tool_progress_mode": _load_tool_progress_mode(),
        "tool_started_at": {},
+        "transport": current_transport() or _stdio_transport,
    }

    def _build() -> None:
@@ -1462,6 +1520,10 @@ def _(rid, params: dict) -> dict:
            warn = _probe_credentials(agent)
            if warn:
                info["credential_warning"] = warn
+            cfg_warn = _probe_config_health(_load_cfg())
+            if cfg_warn:
+                info["config_warning"] = cfg_warn
+                logger.warning(cfg_warn)
            _emit("session.info", sid, info)
        except Exception as e:
            session["agent_error"] = str(e)
@@ -1608,9 +1670,7 @@ def _(rid, params: dict) -> dict:
        return _db_unavailable_error(rid, code=5007)
    title, key = params.get("title", ""), session["session_key"]
    if not title:
-        return _ok(
-            rid, {"title": db.get_session_title(key) or "", "session_key": key}
-        )
+        return _ok(rid, {"title": db.get_session_title(key) or "", "session_key": key})
    try:
        db.set_session_title(key, title)
        return _ok(rid, {"title": title})
@@ -2237,7 +2297,9 @@ def _(rid, params: dict) -> dict:
                    f.write(trace)
            except Exception:
                pass
-            print(f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True)
+            print(
+                f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True
+            )
            _emit("error", sid, {"message": str(e)})
        finally:
            try:
@@ -2660,9 +2722,7 @@ def _(rid, params: dict) -> dict:
        cfg = _load_cfg()
        display = cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
        sections_cfg = (
-            display.get("sections")
-            if isinstance(display.get("sections"), dict)
-            else {}
+            display.get("sections") if isinstance(display.get("sections"), dict) else {}
        )

        nv = str(value or "").strip().lower()
@@ -2797,18 +2857,21 @@ def _(rid, params: dict) -> dict:
        return _ok(rid, {"prompt": _load_cfg().get("custom_prompt", "")})
    if key == "skin":
        return _ok(
-            rid, {"value": _load_cfg().get("display", {}).get("skin", "default")}
+            rid, {"value": (_load_cfg().get("display") or {}).get("skin", "default")}
        )
    if key == "personality":
        return _ok(
-            rid, {"value": _load_cfg().get("display", {}).get("personality", "default")}
+            rid,
+            {"value": (_load_cfg().get("display") or {}).get("personality", "default")},
        )
    if key == "reasoning":
        cfg = _load_cfg()
-        effort = str(cfg.get("agent", {}).get("reasoning_effort", "medium") or "medium")
+        effort = str(
+            (cfg.get("agent") or {}).get("reasoning_effort", "medium") or "medium"
+        )
        display = (
            "show"
-            if bool(cfg.get("display", {}).get("show_reasoning", False))
+            if bool((cfg.get("display") or {}).get("show_reasoning", False))
            else "hide"
        )
        return _ok(rid, {"value": effort, "display": display})
@@ -2816,7 +2879,7 @@ def _(rid, params: dict) -> dict:
        allowed_dm = frozenset({"hidden", "collapsed", "expanded"})
        raw = (
            str(
-                _load_cfg().get("display", {}).get("details_mode", "collapsed")
+                (_load_cfg().get("display") or {}).get("details_mode", "collapsed")
                or "collapsed"
            )
            .strip()
@@ -2827,13 +2890,17 @@ def _(rid, params: dict) -> dict:
    if key == "thinking_mode":
        allowed_tm = frozenset({"collapsed", "truncated", "full"})
        cfg = _load_cfg()
-        raw = str(cfg.get("display", {}).get("thinking_mode", "") or "").strip().lower()
+        raw = (
+            str((cfg.get("display") or {}).get("thinking_mode", "") or "")
+            .strip()
+            .lower()
+        )
        if raw in allowed_tm:
            nv = raw
        else:
            dm = (
                str(
-                    cfg.get("display", {}).get("details_mode", "collapsed")
+                    (cfg.get("display") or {}).get("details_mode", "collapsed")
                    or "collapsed"
                )
                .strip()
@@ -2842,7 +2909,7 @@ def _(rid, params: dict) -> dict:
            nv = "full" if dm == "expanded" else "collapsed"
        return _ok(rid, {"value": nv})
    if key == "compact":
-        on = bool(_load_cfg().get("display", {}).get("tui_compact", False))
+        on = bool((_load_cfg().get("display") or {}).get("tui_compact", False))
        return _ok(rid, {"value": "on" if on else "off"})
    if key == "statusbar":
        display = _load_cfg().get("display")
@@ -3328,7 +3395,16 @@ def _list_repo_files(root: str) -> list[str]:
        if top_result.returncode == 0:
            top = top_result.stdout.decode("utf-8", "replace").strip()
            list_result = subprocess.run(
-                ["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"],
+                [
+                    "git",
+                    "-C",
+                    top,
+                    "ls-files",
+                    "-z",
+                    "--cached",
+                    "--others",
+                    "--exclude-standard",
+                ],
                capture_output=True,
                timeout=2.0,
                check=False,
@@ -3337,7 +3413,9 @@ def _list_repo_files(root: str) -> list[str]:
                for p in list_result.stdout.decode("utf-8", "replace").split("\0"):
                    if not p:
                        continue
-                    rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/")
+                    rel = os.path.relpath(os.path.join(top, p), root).replace(
+                        os.sep, "/"
+                    )
                    # Skip parents/siblings of cwd — keep the picker scoped
                    # to root-and-below, matching Cmd-P workspace semantics.
                    if rel.startswith("../"):
@@ -3471,12 +3549,7 @@ def _(rid, params: dict) -> dict:
        # editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with
        # `/`, `./`, `~/`, `/abs`) fall through to the directory-listing
        # path so explicit navigation intent is preserved.
-        if (
-            is_context
-            and path_part
-            and "/" not in path_part
-            and prefix_tag != "folder"
-        ):
+        if is_context and path_part and "/" not in path_part and prefix_tag != "folder":
            root = os.getcwd()
            ranked: list[tuple[tuple[int, int], str, str]] = []
            for rel in _list_repo_files(root):
@@ -3680,7 +3753,7 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
            _apply_personality_to_session(sid, session, new_prompt)
        elif name == "prompt" and agent:
            cfg = _load_cfg()
-            new_prompt = cfg.get("agent", {}).get("system_prompt", "") or ""
+            new_prompt = (cfg.get("agent") or {}).get("system_prompt", "") or ""
            agent.ephemeral_system_prompt = new_prompt or None
            agent._cached_system_prompt = None
        elif name == "compress" and agent:
@@ -3902,9 +3975,7 @@ def _(rid, params: dict) -> dict:

            voice_cfg = _load_cfg().get("voice", {})
            start_continuous(
-                on_transcript=lambda t: _voice_emit(
-                    "voice.transcript", {"text": t}
-                ),
+                on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
                on_status=lambda s: _voice_emit("voice.status", {"state": s}),
                on_silent_limit=lambda: _voice_emit(
                    "voice.transcript", {"no_speech_limit": True}
@@ -0,0 +1,127 @@
+"""Transport abstraction for the tui_gateway JSON-RPC server.
+
+Historically the gateway wrote every JSON frame directly to real stdout.  This
+module decouples the I/O sink from the handler logic so the same dispatcher
+can be driven over stdio (``tui_gateway.entry``) or WebSocket
+(``tui_gateway.ws``) without duplicating code.
+
+A :class:`Transport` is anything that can accept a JSON-serialisable dict and
+forward it to its peer.  The active transport for the current request is
+tracked in a :class:`contextvars.ContextVar` so handlers — including those
+dispatched onto the worker pool — route their writes to the right peer.
+
+Backward compatibility
+----------------------
+``tui_gateway.server.write_json`` still works without any transport bound.
+When nothing is on the contextvar and no session-level transport is found,
+it falls back to the module-level :class:`StdioTransport`, which wraps the
+original ``_real_stdout`` + ``_stdout_lock`` pair.  Tests that monkey-patch
+``server._real_stdout`` continue to work because the stdio transport resolves
+the stream lazily through a callback.
+"""
+
+from __future__ import annotations
+
+import contextvars
+import json
+import threading
+from typing import Any, Callable, Optional, Protocol, runtime_checkable
+
+
+@runtime_checkable
+class Transport(Protocol):
+    """Minimal interface every transport implements."""
+
+    def write(self, obj: dict) -> bool:
+        """Emit one JSON frame. Return ``False`` when the peer is gone."""
+
+    def close(self) -> None:
+        """Release any resources owned by this transport."""
+
+
+_current_transport: contextvars.ContextVar[Optional[Transport]] = (
+    contextvars.ContextVar(
+        "hermes_gateway_transport",
+        default=None,
+    )
+)
+
+
+def current_transport() -> Optional[Transport]:
+    """Return the transport bound for the current request, if any."""
+    return _current_transport.get()
+
+
+def bind_transport(transport: Optional[Transport]):
+    """Bind *transport* for the current context. Returns a token for :func:`reset_transport`."""
+    return _current_transport.set(transport)
+
+
+def reset_transport(token) -> None:
+    """Restore the transport binding captured by :func:`bind_transport`."""
+    _current_transport.reset(token)
+
+
+class StdioTransport:
+    """Writes JSON frames to a stream (usually ``sys.stdout``).
+
+    The stream is resolved via a callable so runtime monkey-patches of the
+    underlying stream continue to work — this preserves the behaviour the
+    existing test suite relies on (``monkeypatch.setattr(server, "_real_stdout", ...)``).
+    """
+
+    __slots__ = ("_stream_getter", "_lock")
+
+    def __init__(self, stream_getter: Callable[[], Any], lock: threading.Lock) -> None:
+        self._stream_getter = stream_getter
+        self._lock = lock
+
+    def write(self, obj: dict) -> bool:
+        line = json.dumps(obj, ensure_ascii=False) + "\n"
+        try:
+            with self._lock:
+                stream = self._stream_getter()
+                stream.write(line)
+                stream.flush()
+            return True
+        except BrokenPipeError:
+            return False
+
+    def close(self) -> None:
+        return None
+
+
+class TeeTransport:
+    """Mirrors writes to one primary plus N best-effort secondaries.
+
+    The primary's return value (and exceptions) determine the result —
+    secondaries swallow failures so a wedged sidecar never stalls the
+    main IO path.  Used by the PTY child so every dispatcher emit lands
+    on stdio (Ink) AND on a back-WS feeding the dashboard sidebar.
+    """
+
+    __slots__ = ("_primary", "_secondaries")
+
+    def __init__(self, primary: "Transport", *secondaries: "Transport") -> None:
+        self._primary = primary
+        self._secondaries = secondaries
+
+    def write(self, obj: dict) -> bool:
+        # Primary first so a slow sidecar (WS publisher) never delays Ink/stdio.
+        ok = self._primary.write(obj)
+        for sec in self._secondaries:
+            try:
+                sec.write(obj)
+            except Exception:
+                pass
+        return ok
+
+    def close(self) -> None:
+        try:
+            self._primary.close()
+        finally:
+            for sec in self._secondaries:
+                try:
+                    sec.close()
+                except Exception:
+                    pass
@@ -0,0 +1,174 @@
+"""WebSocket transport for the tui_gateway JSON-RPC server.
+
+Reuses :func:`tui_gateway.server.dispatch` verbatim so every RPC method, every
+slash command, every approval/clarify/sudo flow, and every agent event flows
+through the same handlers whether the client is Ink over stdio or an iOS /
+web client over WebSocket.
+
+Wire protocol
+-------------
+Identical to stdio: newline-delimited JSON-RPC in both directions. The server
+emits a ``gateway.ready`` event immediately after connection accept, then
+echoes responses/events for inbound requests. No framing differences.
+
+Mounting
+--------
+    from fastapi import WebSocket
+    from tui_gateway.ws import handle_ws
+
+    @app.websocket("/api/ws")
+    async def ws(ws: WebSocket):
+        await handle_ws(ws)
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+from typing import Any
+
+from tui_gateway import server
+
+_log = logging.getLogger(__name__)
+
+# Max seconds a pool-dispatched handler will block waiting for the event loop
+# to flush a WS frame before we mark the transport dead. Protects handler
+# threads from a wedged socket.
+_WS_WRITE_TIMEOUT_S = 10.0
+
+# Keep starlette optional at import time; handle_ws uses the real class when
+# it's available and falls back to a generic Exception sentinel otherwise.
+try:
+    from starlette.websockets import WebSocketDisconnect as _WebSocketDisconnect
+except ImportError:  # pragma: no cover - starlette is a required install path
+    _WebSocketDisconnect = Exception  # type: ignore[assignment]
+
+
+class WSTransport:
+    """Per-connection WS transport.
+
+    ``write`` is safe to call from any thread *other than* the event loop
+    thread that owns the socket. Pool workers (the only real caller) run in
+    their own threads, so marshalling onto the loop via
+    :func:`asyncio.run_coroutine_threadsafe` + ``future.result()`` is correct
+    and deadlock-free there.
+
+    When called from the loop thread itself (e.g. by ``handle_ws`` for an
+    inline response) the same call would deadlock: we'd schedule work onto
+    the loop we're currently blocking. We detect that case and fire-and-
+    forget instead. Callers that need to know when the bytes are on the wire
+    should use :meth:`write_async` from the loop thread.
+    """
+
+    def __init__(self, ws: Any, loop: asyncio.AbstractEventLoop) -> None:
+        self._ws = ws
+        self._loop = loop
+        self._closed = False
+
+    def write(self, obj: dict) -> bool:
+        if self._closed:
+            return False
+
+        line = json.dumps(obj, ensure_ascii=False)
+
+        try:
+            on_loop = asyncio.get_running_loop() is self._loop
+        except RuntimeError:
+            on_loop = False
+
+        if on_loop:
+            # Fire-and-forget — don't block the loop waiting on itself.
+            self._loop.create_task(self._safe_send(line))
+            return True
+
+        try:
+            fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop)
+            fut.result(timeout=_WS_WRITE_TIMEOUT_S)
+            return not self._closed
+        except Exception as exc:
+            self._closed = True
+            _log.debug("ws write failed: %s", exc)
+            return False
+
+    async def write_async(self, obj: dict) -> bool:
+        """Send from the owning event loop. Awaits until the frame is on the wire."""
+        if self._closed:
+            return False
+        await self._safe_send(json.dumps(obj, ensure_ascii=False))
+        return not self._closed
+
+    async def _safe_send(self, line: str) -> None:
+        try:
+            await self._ws.send_text(line)
+        except Exception as exc:
+            self._closed = True
+            _log.debug("ws send failed: %s", exc)
+
+    def close(self) -> None:
+        self._closed = True
+
+
+async def handle_ws(ws: Any) -> None:
+    """Run one WebSocket session. Wire-compatible with ``tui_gateway.entry``."""
+    await ws.accept()
+
+    transport = WSTransport(ws, asyncio.get_running_loop())
+
+    await transport.write_async(
+        {
+            "jsonrpc": "2.0",
+            "method": "event",
+            "params": {
+                "type": "gateway.ready",
+                "payload": {"skin": server.resolve_skin()},
+            },
+        }
+    )
+
+    try:
+        while True:
+            try:
+                raw = await ws.receive_text()
+            except _WebSocketDisconnect:
+                break
+
+            line = raw.strip()
+            if not line:
+                continue
+
+            try:
+                req = json.loads(line)
+            except json.JSONDecodeError:
+                ok = await transport.write_async(
+                    {
+                        "jsonrpc": "2.0",
+                        "error": {"code": -32700, "message": "parse error"},
+                        "id": None,
+                    }
+                )
+                if not ok:
+                    break
+                continue
+
+            # dispatch() may schedule long handlers on the pool; it returns
+            # None in that case and the worker writes the response itself via
+            # the transport we pass in (a separate thread, so transport.write
+            # is the safe path there). For inline handlers it returns the
+            # response dict, which we write here from the loop.
+            resp = await asyncio.to_thread(server.dispatch, req, transport)
+            if resp is not None and not await transport.write_async(resp):
+                break
+    finally:
+        transport.close()
+
+        # Detach the transport from any sessions it owned so later emits
+        # fall back to stdio instead of crashing into a closed socket.
+        for _, sess in list(server._sessions.items()):
+            if sess.get("transport") is transport:
+                sess["transport"] = server._stdio_transport
+
+        try:
+            await ws.close()
+        except Exception:
+            pass
@@ -175,14 +175,16 @@ describe('createGatewayEventHandler', () => {

    onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)

-    // Three transcript messages: pre-tool narration → diff (kind='diff',
-    // so MessageLine gives it blank-line breathing room) → post-tool
-    // narration. The final message does NOT contain a diff.
-    expect(appended).toHaveLength(3)
+    // Four transcript messages: pre-tool narration → tool trail → diff
+    // (kind='diff', so MessageLine gives it blank-line breathing room) →
+    // post-tool narration. The final message does NOT contain a diff.
+    expect(appended).toHaveLength(4)
    expect(appended[0]?.text).toBe('Editing the file')
-    expect(appended[1]).toMatchObject({ kind: 'diff', text: block })
-    expect(appended[2]?.text).toBe('patch applied')
-    expect(appended[2]?.text).not.toContain('```diff')
+    expect(appended[1]).toMatchObject({ kind: 'trail' })
+    expect(appended[1]?.tools?.[0]).toContain('Patch')
+    expect(appended[2]).toMatchObject({ kind: 'diff', text: block })
+    expect(appended[3]?.text).toBe('patch applied')
+    expect(appended[3]?.text).not.toContain('```diff')
  })

  it('drops the diff segment when the final assistant text narrates the same diff', () => {
@@ -209,12 +211,13 @@ describe('createGatewayEventHandler', () => {
    onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    // diff segment first (kind='diff'), final narration second
-    expect(appended).toHaveLength(2)
-    expect(appended[0]?.kind).toBe('diff')
-    expect(appended[0]?.text).not.toContain('┊ review diff')
-    expect(appended[0]?.text).toContain('--- a/foo.ts')
-    expect(appended[1]?.text).toBe('done')
+    // Tool trail first, then diff segment (kind='diff'), then final narration.
+    expect(appended).toHaveLength(3)
+    expect(appended[0]?.kind).toBe('trail')
+    expect(appended[1]?.kind).toBe('diff')
+    expect(appended[1]?.text).not.toContain('┊ review diff')
+    expect(appended[1]?.text).toContain('--- a/foo.ts')
+    expect(appended[2]?.text).toBe('done')
  })

  it('drops the diff segment when assistant writes its own ```diff fence', () => {
@@ -242,16 +245,17 @@ describe('createGatewayEventHandler', () => {
    } as any)
    onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)

-    // Two segments: the diff block (kind='diff', no tool row) and the final
-    // narration (tool row belongs here since pendingSegmentTools carries
-    // across the flushStreamingSegment call).
-    expect(appended).toHaveLength(2)
-    expect(appended[0]?.kind).toBe('diff')
-    expect(appended[0]?.text).toContain('```diff')
-    expect(appended[0]?.tools ?? []).toEqual([])
-    expect(appended[1]?.text).toBe('done')
-    expect(appended[1]?.tools?.[0]).toContain('Review Diff')
-    expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    // Tool row is now placed before the diff, so telemetry does not render
+    // below the patch that came from that tool.
+    expect(appended).toHaveLength(3)
+    expect(appended[0]?.kind).toBe('trail')
+    expect(appended[0]?.tools?.[0]).toContain('Review Diff')
+    expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
+    expect(appended[1]?.kind).toBe('diff')
+    expect(appended[1]?.text).toContain('```diff')
+    expect(appended[1]?.tools ?? []).toEqual([])
+    expect(appended[2]?.text).toBe('done')
+    expect(appended[2]?.tools ?? []).toEqual([])
  })

  it('shows setup panel for missing provider startup error', () => {
@@ -246,7 +246,7 @@ export const coreCommands: SlashCommand[] = [
      }

      writeOsc52Clipboard(target.text)
-      sys('sent OSC52 copy sequence (terminal support required)')
+      sys(`copied ${target.text.length} chars`)
    }
  },

@@ -33,6 +33,12 @@ const diffSegmentBody = (msg: Msg): null | string => {
  return m ? m[1]! : null
 }

+const insertBeforeFirstDiff = (segments: Msg[], msg: Msg): Msg[] => {
+  const index = segments.findIndex(segment => segment.kind === 'diff')
+
+  return index < 0 ? [...segments, msg] : [...segments.slice(0, index), msg, ...segments.slice(index)]
+}
+
 export interface InterruptDeps {
  appendMessage: (msg: Msg) => void
  gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
@@ -292,16 +298,30 @@ class TurnController {
      return body === null || (!finalHasOwnDiffFence && !finalText.includes(body))
    })

-    const finalMessages = [...segments]
+    const hasDiffSegment = segments.some(msg => msg.kind === 'diff')
+    const detailsBelongBeforeDiff = hasDiffSegment && (tools.length > 0 || Boolean(savedReasoning))
+    const finalMessages = detailsBelongBeforeDiff
+      ? insertBeforeFirstDiff(segments, {
+          kind: 'trail',
+          role: 'system',
+          text: '',
+          thinking: savedReasoning || undefined,
+          thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
+          toolTokens: savedToolTokens || undefined,
+          ...(tools.length && { tools })
+        })
+      : [...segments]

    if (finalText) {
      finalMessages.push({
        role: 'assistant',
        text: finalText,
-        thinking: savedReasoning || undefined,
-        thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
-        toolTokens: savedToolTokens || undefined,
-        ...(tools.length && { tools })
+        ...(!detailsBelongBeforeDiff && {
+          thinking: savedReasoning || undefined,
+          thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
+          toolTokens: savedToolTokens || undefined,
+          ...(tools.length && { tools })
+        })
      })
    }

@@ -142,6 +142,10 @@ export function useSessionLifecycle(opts: UseSessionLifecycleOptions) {
        sys(`warning: ${info.credential_warning}`)
      }

+      if (info?.config_warning) {
+        sys(`warning: ${info.config_warning}`)
+      }
+
      if (msg) {
        sys(msg)
      }
@@ -31,11 +31,20 @@ export const MessageLine = memo(function MessageLine({
  const thinkingMode = sectionMode('thinking', detailsMode, sections)
  const toolsMode = sectionMode('tools', detailsMode, sections)
  const activityMode = sectionMode('activity', detailsMode, sections)
+  const thinking = msg.thinking?.trim() ?? ''

-  if (msg.kind === 'trail' && msg.tools?.length) {
-    return toolsMode !== 'hidden' || activityMode !== 'hidden' ? (
+  if (msg.kind === 'trail' && (msg.tools?.length || thinking)) {
+    return thinkingMode !== 'hidden' || toolsMode !== 'hidden' || activityMode !== 'hidden' ? (
      <Box flexDirection="column" marginTop={1}>
-        <ToolTrail detailsMode={detailsMode} sections={sections} t={t} trail={msg.tools} />
+        <ToolTrail
+          detailsMode={detailsMode}
+          reasoning={thinking}
+          reasoningTokens={msg.thinkingTokens}
+          sections={sections}
+          t={t}
+          toolTokens={msg.toolTokens}
+          trail={msg.tools ?? []}
+        />
      </Box>
    ) : null
  }
@@ -61,7 +70,6 @@ export const MessageLine = memo(function MessageLine({
  }

  const { body, glyph, prefix } = ROLE[msg.role](t)
-  const thinking = msg.thinking?.trim() ?? ''

  const showDetails =
    (toolsMode !== 'hidden' && Boolean(msg.tools?.length)) ||
@@ -392,6 +392,9 @@ function SubagentAccordion({
  const hasTools = item.tools.length > 0
  const noteRows = [...(summary ? [summary] : []), ...item.notes]
  const hasNotes = noteRows.length > 0
+  // `showChildren` only seeds the recursive `expanded` prop for nested
+  // subagents — it MUST NOT be OR-ed into the local section toggles, or
+  // expand-all permanently locks the inner chevrons open.
  const showChildren = expanded || deep
  const noteColor = statusTone === 'error' ? t.color.error : statusTone === 'warn' ? t.color.warn : t.color.dim

@@ -414,13 +417,13 @@ function SubagentAccordion({
              setOpenThinking(v => !v)
            }
          }}
-          open={showChildren || openThinking}
+          open={openThinking}
          t={t}
          title="Thinking"
        />
      ),
      key: 'thinking',
-      open: showChildren || openThinking,
+      open: openThinking,
      render: childRails => (
        <Thinking
          active={item.status === 'running'}
@@ -447,13 +450,13 @@ function SubagentAccordion({
              setOpenTools(v => !v)
            }
          }}
-          open={showChildren || openTools}
+          open={openTools}
          t={t}
          title="Tool calls"
        />
      ),
      key: 'tools',
-      open: showChildren || openTools,
+      open: openTools,
      render: childRails => (
        <Box flexDirection="column">
          {item.tools.map((line, index) => (
@@ -488,14 +491,14 @@ function SubagentAccordion({
              setOpenNotes(v => !v)
            }
          }}
-          open={showChildren || openNotes}
+          open={openNotes}
          t={t}
          title="Progress"
          tone={statusTone}
        />
      ),
      key: 'notes',
-      open: showChildren || openNotes,
+      open: openNotes,
      render: childRails => (
        <Box flexDirection="column">
          {noteRows.map((line, index) => (
@@ -528,14 +531,14 @@ function SubagentAccordion({
              setOpenKids(v => !v)
            }
          }}
-          open={showChildren || openKids}
+          open={openKids}
          suffix={`d${item.depth + 1} · ${aggregate.descendantCount} total`}
          t={t}
          title="Spawned"
        />
      ),
      key: 'subagents',
-      open: showChildren || openKids,
+      open: openKids,
      render: childRails => (
        <Box flexDirection="column">
          {children.map((child, i) => (
@@ -718,6 +721,13 @@ export const ToolTrail = memo(function ToolTrail({
  )

  const [now, setNow] = useState(() => Date.now())
+  // Local toggles own the open state once mounted.  Init from the resolved
+  // section visibility so default-expanded sections (thinking/tools) render
+  // open on first paint; the useEffect below re-syncs when the user mutates
+  // visibility at runtime via /details.  NEVER OR these against
+  // `visible.X === 'expanded'` at render time — that locks the panel open
+  // and silently breaks manual chevron clicks for default-expanded
+  // sections (regression caught after #14968).
  const [openThinking, setOpenThinking] = useState(visible.thinking === 'expanded')
  const [openTools, setOpenTools] = useState(visible.tools === 'expanded')
  const [openSubagents, setOpenSubagents] = useState(visible.subagents === 'expanded')
@@ -960,7 +970,7 @@ export const ToolTrail = memo(function ToolTrail({
          }}
        >
          <Text color={t.color.dim} dim={!thinkingLive}>
-            <Text color={t.color.amber}>{visible.thinking === 'expanded' || openThinking ? '▾ ' : '▸ '}</Text>
+            <Text color={t.color.amber}>{openThinking ? '▾ ' : '▸ '}</Text>
            {thinkingLive ? (
              <Text bold color={t.color.cornsilk}>
                Thinking
@@ -980,7 +990,7 @@ export const ToolTrail = memo(function ToolTrail({
        </Box>
      ),
      key: 'thinking',
-      open: visible.thinking === 'expanded' || openThinking,
+      open: openThinking,
      render: rails => (
        <Thinking
          active={reasoningActive}
@@ -1007,14 +1017,14 @@ export const ToolTrail = memo(function ToolTrail({
              setOpenTools(v => !v)
            }
          }}
-          open={visible.tools === 'expanded' || openTools}
+          open={openTools}
          suffix={toolTokensLabel}
          t={t}
          title="Tool calls"
        />
      ),
      key: 'tools',
-      open: visible.tools === 'expanded' || openTools,
+      open: openTools,
      render: rails => (
        <Box flexDirection="column">
          {groups.map((group, index) => {
@@ -1072,14 +1082,14 @@ export const ToolTrail = memo(function ToolTrail({
              setDeepSubagents(false)
            }
          }}
-          open={visible.subagents === 'expanded' || openSubagents}
+          open={openSubagents}
          suffix={suffix}
          t={t}
          title="Spawn tree"
        />
      ),
      key: 'subagents',
-      open: visible.subagents === 'expanded' || openSubagents,
+      open: openSubagents,
      render: renderSubagentList
    })
  }
@@ -1096,14 +1106,14 @@ export const ToolTrail = memo(function ToolTrail({
              setOpenMeta(v => !v)
            }
          }}
-          open={visible.activity === 'expanded' || openMeta}
+          open={openMeta}
          t={t}
          title="Activity"
          tone={metaTone}
        />
      ),
      key: 'meta',
-      open: visible.activity === 'expanded' || openMeta,
+      open: openMeta,
      render: rails => (
        <Box flexDirection="column">
          {meta.map((row, index) => (
@@ -93,7 +93,7 @@ export interface SetupStatusResponse {
 // ── Session lifecycle ────────────────────────────────────────────────

 export interface SessionCreateResponse {
-  info?: SessionInfo & { credential_warning?: string }
+  info?: SessionInfo & { config_warning?: string; credential_warning?: string }
  session_id: string
 }

@@ -9,7 +9,7 @@ resolution-markers = [
 ]

 [options]
-exclude-newer = "2026-04-16T11:49:00.318115Z"
+exclude-newer = "2026-04-17T16:49:45.944715922Z"
 exclude-newer-span = "P7D"

 [[package]]
@@ -1870,7 +1870,7 @@ wheels = [

 [[package]]
 name = "hermes-agent"
-version = "0.10.0"
+version = "0.11.0"
 source = { editable = "." }
 dependencies = [
    { name = "anthropic" },
@@ -13,10 +13,15 @@
    "preview": "vite preview"
  },
  "dependencies": {
-    "@nous-research/ui": "^0.3.0",
+    "@nous-research/ui": "^0.4.0",
    "@observablehq/plot": "^0.6.17",
    "@react-three/fiber": "^9.6.0",
    "@tailwindcss/vite": "^4.2.1",
+    "@xterm/addon-fit": "^0.11.0",
+    "@xterm/addon-unicode11": "^0.9.0",
+    "@xterm/addon-web-links": "^0.12.0",
+    "@xterm/addon-webgl": "^0.19.0",
+    "@xterm/xterm": "^6.0.0",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "gsap": "^3.15.0",
@@ -1,50 +1,86 @@
-import { useMemo } from "react";
-import { Routes, Route, NavLink, Navigate } from "react-router-dom";
+import {
+  useCallback,
+  useEffect,
+  useMemo,
+  useState,
+  type ComponentType,
+  type ReactNode,
+} from "react";
+import {
+  Routes,
+  Route,
+  NavLink,
+  Navigate,
+  useLocation,
+  useNavigate,
+} from "react-router-dom";
 import {
  Activity,
  BarChart3,
+  BookOpen,
  Clock,
+  Code,
+  Database,
+  Download,
+  Eye,
  FileText,
+  Globe,
+  Heart,
  KeyRound,
+  Loader2,
+  Menu,
  MessageSquare,
  Package,
-  Settings,
  Puzzle,
-  Sparkles,
-  Terminal,
-  Globe,
-  Database,
+  RotateCw,
+  Settings,
  Shield,
-  Wrench,
-  Zap,
-  Heart,
+  Sparkles,
  Star,
-  Code,
-  Eye,
+  Terminal,
+  Wrench,
+  X,
+  Zap,
 } from "lucide-react";
-import { Cell, Grid, SelectionSwitcher, Typography } from "@nous-research/ui";
+import { SelectionSwitcher, Typography } from "@nous-research/ui";
 import { cn } from "@/lib/utils";
 import { Backdrop } from "@/components/Backdrop";
-import StatusPage from "@/pages/StatusPage";
+import { SidebarFooter } from "@/components/SidebarFooter";
+import { SidebarStatusStrip } from "@/components/SidebarStatusStrip";
+import { PageHeaderProvider } from "@/contexts/PageHeaderProvider";
+import { useSystemActions } from "@/contexts/useSystemActions";
+import type { SystemAction } from "@/contexts/system-actions-context";
 import ConfigPage from "@/pages/ConfigPage";
+import DocsPage from "@/pages/DocsPage";
 import EnvPage from "@/pages/EnvPage";
 import SessionsPage from "@/pages/SessionsPage";
 import LogsPage from "@/pages/LogsPage";
 import AnalyticsPage from "@/pages/AnalyticsPage";
 import CronPage from "@/pages/CronPage";
 import SkillsPage from "@/pages/SkillsPage";
+import ChatPage from "@/pages/ChatPage";
 import { LanguageSwitcher } from "@/components/LanguageSwitcher";
 import { ThemeSwitcher } from "@/components/ThemeSwitcher";
 import { useI18n } from "@/i18n";
-import { PluginSlot, usePlugins } from "@/plugins";
-import type { RegisteredPlugin } from "@/plugins";
+import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
+import type { PluginManifest } from "@/plugins";
 import { useTheme } from "@/themes";
+import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";

-/** Built-in route → default page component. Used both for standard routing
- *  and for resolving plugin `tab.override` values. Keys must match the
- *  `path` in `BUILTIN_NAV` so `/path` lookups stay consistent. */
-const BUILTIN_ROUTES: Record<string, React.ComponentType> = {
-  "/": StatusPage,
+function RootRedirect() {
+  return <Navigate to="/sessions" replace />;
+}
+
+const CHAT_NAV_ITEM: NavItem = {
+  path: "/chat",
+  labelKey: "chat",
+  label: "Chat",
+  icon: Terminal,
+};
+
+/** Built-in routes except /chat (only with `hermes dashboard --tui`). */
+const BUILTIN_ROUTES_CORE: Record<string, ComponentType> = {
+  "/": RootRedirect,
  "/sessions": SessionsPage,
  "/analytics": AnalyticsPage,
  "/logs": LogsPage,
@@ -52,10 +88,10 @@ const BUILTIN_ROUTES: Record<string, React.ComponentType> = {
  "/skills": SkillsPage,
  "/config": ConfigPage,
  "/env": EnvPage,
+  "/docs": DocsPage,
 };

-const BUILTIN_NAV: NavItem[] = [
-  { path: "/", labelKey: "status", label: "Status", icon: Activity },
+const BUILTIN_NAV_REST: NavItem[] = [
  {
    path: "/sessions",
    labelKey: "sessions",
@@ -73,11 +109,15 @@ const BUILTIN_NAV: NavItem[] = [
  { path: "/skills", labelKey: "skills", label: "Skills", icon: Package },
  { path: "/config", labelKey: "config", label: "Config", icon: Settings },
  { path: "/env", labelKey: "keys", label: "Keys", icon: KeyRound },
+  {
+    path: "/docs",
+    labelKey: "documentation",
+    label: "Documentation",
+    icon: BookOpen,
+  },
 ];

-// Plugins can reference any of these by name in their manifest — keeps bundle
-// size sane vs. importing the full lucide-react set.
-const ICON_MAP: Record<string, React.ComponentType<{ className?: string }>> = {
+const ICON_MAP: Record<string, ComponentType<{ className?: string }>> = {
  Activity,
  BarChart3,
  Clock,
@@ -100,24 +140,15 @@ const ICON_MAP: Record<string, React.ComponentType<{ className?: string }>> = {
  Eye,
 };

-function resolveIcon(
-  name: string,
-): React.ComponentType<{ className?: string }> {
+function resolveIcon(name: string): ComponentType<{ className?: string }> {
  return ICON_MAP[name] ?? Puzzle;
 }

-function buildNavItems(
-  builtIn: NavItem[],
-  plugins: RegisteredPlugin[],
-): NavItem[] {
+function buildNavItems(builtIn: NavItem[], manifests: PluginManifest[]): NavItem[] {
  const items = [...builtIn];

-  for (const { manifest } of plugins) {
-    // Plugins that replace a built-in route don't add a new tab entry —
-    // they reuse the existing tab. The nav just lights up the original
-    // built-in entry when the user visits `/`.
+  for (const manifest of manifests) {
    if (manifest.tab.override) continue;
-    // Hidden plugins register their component + slots but skip the nav.
    if (manifest.tab.hidden) continue;

    const pluginItem: NavItem = {
@@ -145,54 +176,61 @@ function buildNavItems(
  return items;
 }

-/** Build the final route table, letting plugins override built-in pages.
- *
- *  Returns (path, Component, key) tuples. Plugins with `tab.override`
- *  win over both built-ins and other plugins (last registration wins if
- *  two plugins claim the same override, but we warn in dev). Plugins with
- *  a regular `tab.path` register alongside built-ins as standalone
- *  routes. */
 function buildRoutes(
-  plugins: RegisteredPlugin[],
-): Array<{ key: string; path: string; Component: React.ComponentType }> {
-  const overrides = new Map<string, RegisteredPlugin>();
-  const addons: RegisteredPlugin[] = [];
+  builtinRoutes: Record<string, ComponentType>,
+  manifests: PluginManifest[],
+): Array<{
+  key: string;
+  path: string;
+  element: ReactNode;
+}> {
+  const byOverride = new Map<string, PluginManifest>();
+  const addons: PluginManifest[] = [];

-  for (const p of plugins) {
-    if (p.manifest.tab.override) {
-      overrides.set(p.manifest.tab.override, p);
+  for (const m of manifests) {
+    if (m.tab.override) {
+      byOverride.set(m.tab.override, m);
    } else {
-      addons.push(p);
+      addons.push(m);
    }
  }

  const routes: Array<{
    key: string;
    path: string;
-    Component: React.ComponentType;
+    element: ReactNode;
  }> = [];

-  for (const [path, Component] of Object.entries(BUILTIN_ROUTES)) {
-    const override = overrides.get(path);
-    if (override) {
+  for (const [path, Component] of Object.entries(builtinRoutes)) {
+    const om = byOverride.get(path);
+    if (om) {
      routes.push({
-        key: `override:${override.manifest.name}`,
+        key: `override:${om.name}`,
        path,
-        Component: override.component,
+        element: <PluginPage name={om.name} />,
      });
    } else {
-      routes.push({ key: `builtin:${path}`, path, Component });
+      routes.push({ key: `builtin:${path}`, path, element: <Component /> });
    }
  }

-  for (const addon of addons) {
-    // Don't double-register a plugin that shadows a built-in path via
-    // `tab.path` — `override` is the supported mechanism for that.
-    if (BUILTIN_ROUTES[addon.manifest.tab.path]) continue;
+  for (const m of addons) {
+    if (m.tab.hidden) continue;
+    if (builtinRoutes[m.tab.path]) continue;
    routes.push({
-      key: `plugin:${addon.manifest.name}`,
-      path: addon.manifest.tab.path,
-      Component: addon.component,
+      key: `plugin:${m.name}`,
+      path: m.tab.path,
+      element: <PluginPage name={m.name} />,
+    });
+  }
+
+  for (const m of manifests) {
+    if (!m.tab.hidden) continue;
+    if (builtinRoutes[m.tab.path] || m.tab.override) continue;
+    routes.push({
+      key: `plugin:hidden:${m.name}`,
+      path: m.tab.path,
+      element: <PluginPage name={m.name} />,
    });
  }

@@ -201,154 +239,145 @@ function buildRoutes(

 export default function App() {
  const { t } = useI18n();
-  const { plugins } = usePlugins();
+  const { pathname } = useLocation();
+  const { manifests } = usePlugins();
  const { theme } = useTheme();
+  const [mobileOpen, setMobileOpen] = useState(false);
+  const closeMobile = useCallback(() => setMobileOpen(false), []);
+  const isDocsRoute = pathname === "/docs" || pathname === "/docs/";
+  const normalizedPath = pathname.replace(/\/$/, "") || "/";
+  const isChatRoute = normalizedPath === "/chat";
+  const embeddedChat = isDashboardEmbeddedChatEnabled();
+
+  const builtinRoutes = useMemo(
+    () => ({
+      ...BUILTIN_ROUTES_CORE,
+      ...(embeddedChat ? { "/chat": ChatPage } : {}),
+    }),
+    [embeddedChat],
+  );
+
+  const builtinNav = useMemo(
+    () =>
+      embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST,
+    [embeddedChat],
+  );

  const navItems = useMemo(
-    () => buildNavItems(BUILTIN_NAV, plugins),
-    [plugins],
+    () => buildNavItems(builtinNav, manifests),
+    [builtinNav, manifests],
+  );
+  const routes = useMemo(
+    () => buildRoutes(builtinRoutes, manifests),
+    [builtinRoutes, manifests],
+  );
+  const pluginTabMeta = useMemo(
+    () =>
+      manifests
+        .filter((m) => !m.tab.hidden)
+        .map((m) => ({
+          path: m.tab.override ?? m.tab.path,
+          label: m.label,
+        })),
+    [manifests],
  );
-  const routes = useMemo(() => buildRoutes(plugins), [plugins]);

  const layoutVariant = theme.layoutVariant ?? "standard";
-  const showSidebar = layoutVariant === "cockpit";
-  // Tiled layout drops the 1600px clamp so pages can use the full viewport;
-  // standard + cockpit keep the centered reading width.
-  const mainMaxWidth = layoutVariant === "tiled" ? "max-w-none" : "max-w-[1600px]";
+
+  useEffect(() => {
+    if (!mobileOpen) return;
+    const onKey = (e: KeyboardEvent) => {
+      if (e.key === "Escape") setMobileOpen(false);
+    };
+    document.addEventListener("keydown", onKey);
+    const prevOverflow = document.body.style.overflow;
+    document.body.style.overflow = "hidden";
+    return () => {
+      document.removeEventListener("keydown", onKey);
+      document.body.style.overflow = prevOverflow;
+    };
+  }, [mobileOpen]);
+
+  useEffect(() => {
+    const mql = window.matchMedia("(min-width: 1024px)");
+    const onChange = (e: MediaQueryListEvent) => {
+      if (e.matches) setMobileOpen(false);
+    };
+    mql.addEventListener("change", onChange);
+    return () => mql.removeEventListener("change", onChange);
+  }, []);

  return (
    <div
      data-layout-variant={layoutVariant}
-      className="text-midground font-mondwest bg-black min-h-screen flex flex-col uppercase antialiased overflow-x-hidden"
+      className="font-mondwest flex h-dvh max-h-dvh min-h-0 flex-col overflow-hidden bg-black uppercase text-midground antialiased"
    >
      <SelectionSwitcher />
      <Backdrop />
-      {/* Themes can style backdrop chrome via `componentStyles.backdrop.*`
-          CSS vars read by <Backdrop />. Plugins can also inject full
-          components into the backdrop layer via the `backdrop` slot —
-          useful for scanlines, parallax stars, hero artwork, etc. */}
      <PluginSlot name="backdrop" />

      <header
        className={cn(
-          "fixed top-0 left-0 right-0 z-40",
+          "lg:hidden fixed top-0 left-0 right-0 z-40 h-12",
+          "flex items-center gap-2 px-3",
          "border-b border-current/20",
          "bg-background-base/90 backdrop-blur-sm",
        )}
        style={{
-          // Themes can tweak header chrome (background, border-image,
-          // clip-path) via these CSS vars. Unset vars compute to the
-          // property's initial value, so themes opt in per-property.
          background: "var(--component-header-background)",
          borderImage: "var(--component-header-border-image)",
          clipPath: "var(--component-header-clip-path)",
        }}
      >
-        <div className={cn("mx-auto flex h-12", mainMaxWidth)}>
-          <PluginSlot name="header-left" />
-          <div className="min-w-0 flex-1 overflow-x-auto scrollbar-none">
-            <Grid
-              className="h-full !border-t-0 !border-b-0"
-              style={{
-                gridTemplateColumns: `auto repeat(${navItems.length}, auto)`,
-              }}
-            >
-              <Cell className="flex items-center !p-0 !px-3 sm:!px-5">
-                <Typography
-                  className="font-bold text-[1.0625rem] sm:text-[1.125rem] leading-[0.95] tracking-[0.0525rem] text-midground"
-                  style={{ mixBlendMode: "plus-lighter" }}
-                >
-                  Hermes
-                  <br />
-                  Agent
-                </Typography>
-              </Cell>
+        <button
+          type="button"
+          onClick={() => setMobileOpen(true)}
+          aria-label={t.app.openNavigation}
+          aria-expanded={mobileOpen}
+          aria-controls="app-sidebar"
+          className={cn(
+            "inline-flex h-8 w-8 items-center justify-center",
+            "text-midground/70 hover:text-midground transition-colors cursor-pointer",
+            "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+          )}
+        >
+          <Menu className="h-4 w-4" />
+        </button>

-              {navItems.map(({ path, label, labelKey, icon: Icon }) => (
-                <Cell key={path} className="relative !p-0">
-                  <NavLink
-                    to={path}
-                    end={path === "/"}
-                    className={({ isActive }) =>
-                      cn(
-                        "group relative flex h-full w-full items-center gap-1.5",
-                        "px-2.5 sm:px-4 py-2",
-                        "font-mondwest text-[0.65rem] sm:text-[0.8rem] tracking-[0.12em]",
-                        "whitespace-nowrap transition-colors cursor-pointer",
-                        "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
-                        isActive
-                          ? "text-midground"
-                          : "opacity-60 hover:opacity-100",
-                      )
-                    }
-                    style={{
-                      clipPath: "var(--component-tab-clip-path)",
-                    }}
-                  >
-                    {({ isActive }) => (
-                      <>
-                        <Icon className="h-3.5 w-3.5 shrink-0" />
-                        <span className="hidden sm:inline">
-                          {labelKey
-                            ? ((t.app.nav as Record<string, string>)[
-                                labelKey
-                              ] ?? label)
-                            : label}
-                        </span>
-
-                        <span
-                          aria-hidden
-                          className="absolute inset-1 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover:opacity-5"
-                        />
-
-                        {isActive && (
-                          <span
-                            aria-hidden
-                            className="absolute bottom-0 left-0 right-0 h-px bg-midground"
-                            style={{ mixBlendMode: "plus-lighter" }}
-                          />
-                        )}
-                      </>
-                    )}
-                  </NavLink>
-                </Cell>
-              ))}
-            </Grid>
-          </div>
-
-          <Grid className="h-full shrink-0 !border-t-0 !border-b-0">
-            <Cell className="flex items-center gap-2 !p-0 !px-2 sm:!px-4">
-              <PluginSlot name="header-right" />
-              <ThemeSwitcher />
-              <LanguageSwitcher />
-              <Typography
-                mondwest
-                className="hidden sm:inline text-[0.7rem] tracking-[0.15em] opacity-50"
-              >
-                {t.app.webUi}
-              </Typography>
-            </Cell>
-          </Grid>
-        </div>
+        <Typography
+          className="font-bold text-[0.95rem] leading-[0.95] tracking-[0.05em] text-midground"
+          style={{ mixBlendMode: "plus-lighter" }}
+        >
+          {t.app.brand}
+        </Typography>
      </header>

-      {/* Full-width banner slot under the nav, outside the main clamp —
-          useful for marquee/alert/status strips themes want to show
-          above page content. */}
+      {mobileOpen && (
+        <button
+          type="button"
+          aria-label={t.app.closeNavigation}
+          onClick={closeMobile}
+          className={cn(
+            "lg:hidden fixed inset-0 z-40",
+            "bg-black/60 backdrop-blur-sm cursor-pointer",
+          )}
+        />
+      )}
+
      <PluginSlot name="header-banner" />

-      <div
-        className={cn(
-          "relative z-2 mx-auto w-full flex-1 px-3 sm:px-6 pt-16 sm:pt-20 pb-4 sm:pb-8",
-          mainMaxWidth,
-          showSidebar && "flex gap-4 sm:gap-6",
-        )}
-      >
-        {showSidebar && (
+      <div className="flex min-h-0 min-w-0 flex-1 flex-col overflow-hidden pt-12 lg:pt-0">
+        <div className="flex min-h-0 min-w-0 flex-1">
          <aside
+            id="app-sidebar"
+            aria-label={t.app.navigation}
            className={cn(
-              "w-[260px] shrink-0 border-r border-current/20 pr-3 sm:pr-4",
-              "hidden lg:block",
+              "fixed top-0 left-0 z-50 flex h-dvh max-h-dvh w-64 min-h-0 flex-col",
+              "border-r border-current/20",
+              "bg-background-base/95 backdrop-blur-sm",
+              "transition-transform duration-200 ease-out",
+              mobileOpen ? "translate-x-0" : "-translate-x-full",
+              "lg:sticky lg:top-0 lg:translate-x-0 lg:shrink-0",
            )}
            style={{
              background: "var(--component-sidebar-background)",
@@ -356,75 +385,275 @@ export default function App() {
              borderImage: "var(--component-sidebar-border-image)",
            }}
          >
-            <PluginSlot
-              name="sidebar"
-              fallback={
-                <div className="p-4 text-xs opacity-60 font-mondwest tracking-wide">
-                  {/* Cockpit layout with no sidebar plugin — rare but valid;
-                      the space still exists so the grid doesn't shift when
-                      a plugin loads asynchronously. */}
-                  sidebar slot empty
-                </div>
-              }
-            />
-          </aside>
-        )}
+            <div
+              className={cn(
+                "flex h-14 shrink-0 items-center justify-between gap-2 px-5",
+                "border-b border-current/20",
+              )}
+            >
+              <Typography
+                className="font-bold text-[1.125rem] leading-[0.95] tracking-[0.0525rem] text-midground"
+                style={{ mixBlendMode: "plus-lighter" }}
+              >
+                Hermes
+                <br />
+                Agent
+              </Typography>

-        <main className="min-w-0 flex-1">
-          <PluginSlot name="pre-main" />
-          <Routes>
-            {routes.map(({ key, path, Component }) => (
-              <Route key={key} path={path} element={<Component />} />
-            ))}
-            <Route path="*" element={<Navigate to="/" replace />} />
-          </Routes>
-          <PluginSlot name="post-main" />
-        </main>
+              <button
+                type="button"
+                onClick={closeMobile}
+                aria-label={t.app.closeNavigation}
+                className={cn(
+                  "lg:hidden inline-flex h-7 w-7 items-center justify-center",
+                  "text-midground/70 hover:text-midground transition-colors cursor-pointer",
+                  "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+                )}
+              >
+                <X className="h-4 w-4" />
+              </button>
+            </div>
+
+            <PluginSlot name="header-left" />
+
+            <nav
+              className="min-h-0 w-full flex-1 overflow-y-auto overflow-x-hidden border-t border-current/10 py-2"
+              aria-label={t.app.navigation}
+            >
+              <ul className="flex flex-col">
+                {navItems.map(({ path, label, labelKey, icon: Icon }) => {
+                  const navLabel = labelKey
+                    ? ((t.app.nav as Record<string, string>)[labelKey] ?? label)
+                    : label;
+                  return (
+                    <li key={path}>
+                      <NavLink
+                        to={path}
+                        end={path === "/sessions"}
+                        onClick={closeMobile}
+                        className={({ isActive }) =>
+                          cn(
+                            "group relative flex items-center gap-3",
+                            "px-5 py-2.5",
+                            "font-mondwest text-[0.8rem] tracking-[0.12em]",
+                            "whitespace-nowrap transition-colors cursor-pointer",
+                            "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+                            isActive
+                              ? "text-midground"
+                              : "opacity-60 hover:opacity-100",
+                          )
+                        }
+                        style={{
+                          clipPath: "var(--component-tab-clip-path)",
+                        }}
+                      >
+                        {({ isActive }) => (
+                          <>
+                            <Icon className="h-3.5 w-3.5 shrink-0" />
+                            <span className="truncate">{navLabel}</span>
+
+                            <span
+                              aria-hidden
+                              className="absolute inset-y-0.5 left-1.5 right-1.5 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover:opacity-5"
+                            />
+
+                            {isActive && (
+                              <span
+                                aria-hidden
+                                className="absolute left-0 top-0 bottom-0 w-px bg-midground"
+                                style={{ mixBlendMode: "plus-lighter" }}
+                              />
+                            )}
+                          </>
+                        )}
+                      </NavLink>
+                    </li>
+                  );
+                })}
+              </ul>
+            </nav>
+
+            <SidebarSystemActions onNavigate={closeMobile} />
+
+            <div
+              className={cn(
+                "flex shrink-0 items-center justify-between gap-2",
+                "px-3 py-2",
+                "border-t border-current/20",
+              )}
+            >
+              <div className="flex min-w-0 items-center gap-2">
+                <PluginSlot name="header-right" />
+                <ThemeSwitcher dropUp />
+                <LanguageSwitcher />
+              </div>
+            </div>
+
+            <SidebarFooter />
+          </aside>
+
+          <PageHeaderProvider pluginTabs={pluginTabMeta}>
+            <div
+              className={cn(
+                "relative z-2 flex min-w-0 min-h-0 flex-1 flex-col",
+                "px-3 sm:px-6",
+                isChatRoute
+                  ? "pb-3 pt-1 sm:pb-4 sm:pt-2 lg:pt-4"
+                  : "pt-2 sm:pt-4 lg:pt-6 pb-4 sm:pb-8",
+                isDocsRoute && "min-h-0 flex-1",
+              )}
+            >
+              <PluginSlot name="pre-main" />
+              <div
+                className={cn(
+                  "w-full min-w-0",
+                  (isDocsRoute || isChatRoute) && "min-h-0 flex flex-1 flex-col",
+                )}
+              >
+                <Routes>
+                  {routes.map(({ key, path, element }) => (
+                    <Route key={key} path={path} element={element} />
+                  ))}
+                  <Route
+                    path="*"
+                    element={<Navigate to="/sessions" replace />}
+                  />
+                </Routes>
+              </div>
+              <PluginSlot name="post-main" />
+            </div>
+          </PageHeaderProvider>
+        </div>
      </div>

-      <footer className="relative z-2 border-t border-current/20">
-        <Grid className={cn("mx-auto !border-t-0 !border-b-0", mainMaxWidth)}>
-          <Cell className="flex items-center !px-3 sm:!px-6 !py-3">
-            <PluginSlot
-              name="footer-left"
-              fallback={
-                <Typography
-                  mondwest
-                  className="text-[0.7rem] sm:text-[0.8rem] tracking-[0.12em] opacity-60"
-                >
-                  {t.app.footer.name}
-                </Typography>
-              }
-            />
-          </Cell>
-          <Cell className="flex items-center justify-end !px-3 sm:!px-6 !py-3">
-            <PluginSlot
-              name="footer-right"
-              fallback={
-                <Typography
-                  mondwest
-                  className="text-[0.6rem] sm:text-[0.7rem] tracking-[0.15em] text-midground"
-                  style={{ mixBlendMode: "plus-lighter" }}
-                >
-                  {t.app.footer.org}
-                </Typography>
-              }
-            />
-          </Cell>
-        </Grid>
-      </footer>
-
-      {/* Fixed-position overlay plugins (scanlines, vignettes, etc.) render
-          above everything else. Each plugin is responsible for its own
-          pointer-events and z-index. */}
      <PluginSlot name="overlay" />
    </div>
  );
 }

+function SidebarSystemActions({ onNavigate }: { onNavigate: () => void }) {
+  const { t } = useI18n();
+  const navigate = useNavigate();
+  const { activeAction, isBusy, isRunning, pendingAction, runAction } =
+    useSystemActions();
+
+  const items: SystemActionItem[] = [
+    {
+      action: "restart",
+      icon: RotateCw,
+      label: t.status.restartGateway,
+      runningLabel: t.status.restartingGateway,
+      spin: true,
+    },
+    {
+      action: "update",
+      icon: Download,
+      label: t.status.updateHermes,
+      runningLabel: t.status.updatingHermes,
+      spin: false,
+    },
+  ];
+
+  const handleClick = (action: SystemAction) => {
+    if (isBusy) return;
+    void runAction(action);
+    navigate("/sessions");
+    onNavigate();
+  };
+
+  return (
+    <div
+      className={cn(
+        "shrink-0 flex flex-col",
+        "border-t border-current/10",
+        "py-1",
+      )}
+    >
+      <span
+        className={cn(
+          "px-5 pt-0.5 pb-0.5",
+          "font-mondwest text-[0.6rem] tracking-[0.15em] uppercase opacity-30",
+        )}
+      >
+        {t.app.system}
+      </span>
+
+      <SidebarStatusStrip />
+
+      <ul className="flex flex-col">
+        {items.map(({ action, icon: Icon, label, runningLabel, spin }) => {
+          const isPending = pendingAction === action;
+          const isActionRunning =
+            activeAction === action && isRunning && !isPending;
+          const busy = isPending || isActionRunning;
+          const displayLabel = isActionRunning ? runningLabel : label;
+          const disabled = isBusy && !busy;
+
+          return (
+            <li key={action}>
+              <button
+                type="button"
+                onClick={() => handleClick(action)}
+                disabled={disabled}
+                aria-busy={busy}
+                className={cn(
+                  "group relative flex w-full items-center gap-3",
+                  "px-5 py-1.5",
+                  "font-mondwest text-[0.75rem] tracking-[0.1em]",
+                  "text-left whitespace-nowrap transition-opacity cursor-pointer",
+                  "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground",
+                  busy
+                    ? "text-midground opacity-100"
+                    : "opacity-60 hover:opacity-100",
+                  "disabled:cursor-not-allowed disabled:opacity-30",
+                )}
+              >
+                {isPending ? (
+                  <Loader2 className="h-3.5 w-3.5 shrink-0 animate-spin" />
+                ) : (
+                  <Icon
+                    className={cn(
+                      "h-3.5 w-3.5 shrink-0",
+                      isActionRunning && spin && "animate-spin",
+                      isActionRunning && !spin && "animate-pulse",
+                    )}
+                  />
+                )}
+
+                <span className="truncate">{displayLabel}</span>
+
+                <span
+                  aria-hidden
+                  className="absolute inset-y-0.5 left-1.5 right-1.5 bg-midground opacity-0 pointer-events-none transition-opacity duration-200 group-hover:opacity-5"
+                />
+
+                {busy && (
+                  <span
+                    aria-hidden
+                    className="absolute left-0 top-0 bottom-0 w-px bg-midground"
+                    style={{ mixBlendMode: "plus-lighter" }}
+                  />
+                )}
+              </button>
+            </li>
+          );
+        })}
+      </ul>
+    </div>
+  );
+}
+
 interface NavItem {
-  icon: React.ComponentType<{ className?: string }>;
+  icon: ComponentType<{ className?: string }>;
  label: string;
  labelKey?: string;
  path: string;
 }
+
+interface SystemActionItem {
+  action: SystemAction;
+  icon: ComponentType<{ className?: string }>;
+  label: string;
+  runningLabel: string;
+  spin: boolean;
+}
@@ -0,0 +1,379 @@
+/**
+ * ChatSidebar — structured-events panel that sits next to the xterm.js
+ * terminal in the dashboard Chat tab.
+ *
+ * Two WebSockets, one per concern:
+ *
+ *   1. **JSON-RPC sidecar** (`GatewayClient` → /api/ws) — drives the
+ *      sidebar's own slot of the dashboard's in-process gateway.  Owns
+ *      the model badge / picker / connection state / error banner.
+ *      Independent of the PTY pane's session by design — those are the
+ *      pieces the sidebar needs to be able to drive directly (model
+ *      switch via slash.exec, etc.).
+ *
+ *   2. **Event subscriber** (/api/events?channel=…) — passive, receives
+ *      every dispatcher emit from the PTY-side `tui_gateway.entry` that
+ *      the dashboard fanned out.  This is how `tool.start/progress/
+ *      complete` from the agent loop reach the sidebar even though the
+ *      PTY child runs three processes deep from us.  The `channel` id
+ *      ties this listener to the same chat tab's PTY child — see
+ *      `ChatPage.tsx` for where the id is generated.
+ *
+ * Best-effort throughout: WS failures show in the badge / banner, the
+ * terminal pane keeps working unimpaired.
+ */
+
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Card } from "@/components/ui/card";
+
+import { ModelPickerDialog } from "@/components/ModelPickerDialog";
+import { ToolCall, type ToolEntry } from "@/components/ToolCall";
+import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient";
+
+import { cn } from "@/lib/utils";
+import { AlertCircle, ChevronDown, RefreshCw } from "lucide-react";
+import { useCallback, useEffect, useMemo, useState } from "react";
+
+interface SessionInfo {
+  cwd?: string;
+  model?: string;
+  provider?: string;
+  credential_warning?: string;
+}
+
+interface RpcEnvelope {
+  method?: string;
+  params?: { type?: string; payload?: unknown };
+}
+
+const TOOL_LIMIT = 20;
+
+const STATE_LABEL: Record<ConnectionState, string> = {
+  idle: "idle",
+  connecting: "connecting",
+  open: "live",
+  closed: "closed",
+  error: "error",
+};
+
+const STATE_TONE: Record<ConnectionState, string> = {
+  idle: "bg-muted text-muted-foreground",
+  connecting: "bg-primary/10 text-primary",
+  open: "bg-emerald-500/10 text-emerald-500 dark:text-emerald-400",
+  closed: "bg-muted text-muted-foreground",
+  error: "bg-destructive/10 text-destructive",
+};
+
+interface ChatSidebarProps {
+  channel: string;
+  className?: string;
+}
+
+export function ChatSidebar({ channel, className }: ChatSidebarProps) {
+  // `version` bumps on reconnect; gw is derived so we never call setState
+  // for it inside an effect (React 19's set-state-in-effect rule). The
+  // counter is the dependency on purpose — it's not read in the memo body,
+  // it's the signal that says "rebuild the client".
+  const [version, setVersion] = useState(0);
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  const gw = useMemo(() => new GatewayClient(), [version]);
+
+  const [state, setState] = useState<ConnectionState>("idle");
+  const [sessionId, setSessionId] = useState<string | null>(null);
+  const [info, setInfo] = useState<SessionInfo>({});
+  const [tools, setTools] = useState<ToolEntry[]>([]);
+  const [modelOpen, setModelOpen] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  useEffect(() => {
+    let cancelled = false;
+    const offState = gw.onState(setState);
+
+    const offSessionInfo = gw.on<SessionInfo>("session.info", (ev) => {
+      if (ev.session_id) {
+        setSessionId(ev.session_id);
+      }
+
+      if (ev.payload) {
+        setInfo((prev) => ({ ...prev, ...ev.payload }));
+      }
+    });
+
+    const offError = gw.on<{ message?: string }>("error", (ev) => {
+      const message = ev.payload?.message;
+
+      if (message) {
+        setError(message);
+      }
+    });
+
+    // Adopt whichever session the gateway hands us. session.create on the
+    // sidecar is independent of the PTY pane's session by design — we
+    // only need a sid to drive the model picker's slash.exec calls.
+    gw.connect()
+      .then(() => {
+        if (cancelled) {
+          return;
+        }
+        return gw.request<{ session_id: string }>("session.create", {});
+      })
+      .then((created) => {
+        if (cancelled || !created?.session_id) {
+          return;
+        }
+        setSessionId(created.session_id);
+      })
+      .catch((e: Error) => {
+        if (!cancelled) {
+          setError(e.message);
+        }
+      });
+
+    return () => {
+      cancelled = true;
+      offState();
+      offSessionInfo();
+      offError();
+      gw.close();
+    };
+  }, [gw]);
+
+  // Event subscriber WebSocket — receives the rebroadcast of every
+  // dispatcher emit from the PTY child's gateway.  See /api/pub +
+  // /api/events in hermes_cli/web_server.py for the broadcast hop.
+  //
+  // Failures (auth/loopback rejection, server too old to expose the
+  // endpoint, transient drops) surface in the same banner as the
+  // JSON-RPC sidecar so the sidebar matches its documented best-effort
+  // UX and the user always has a reconnect affordance.
+  useEffect(() => {
+    const token = window.__HERMES_SESSION_TOKEN__;
+
+    if (!token || !channel) {
+      return;
+    }
+
+    const proto = window.location.protocol === "https:" ? "wss:" : "ws:";
+    const qs = new URLSearchParams({ token, channel });
+    const ws = new WebSocket(
+      `${proto}//${window.location.host}/api/events?${qs.toString()}`,
+    );
+
+    // `unmounting` suppresses the banner during cleanup — `ws.close()`
+    // from the effect's return fires a close event with code 1005 that
+    // would otherwise look like an unexpected drop.
+    const DISCONNECTED = "events feed disconnected — tool calls may not appear";
+    let unmounting = false;
+    const surface = (msg: string) => !unmounting && setError(msg);
+
+    ws.addEventListener("error", () => surface(DISCONNECTED));
+
+    ws.addEventListener("close", (ev) => {
+      if (ev.code === 4401 || ev.code === 4403) {
+        surface(`events feed rejected (${ev.code}) — reload the page`);
+      } else if (ev.code !== 1000) {
+        surface(DISCONNECTED);
+      }
+    });
+
+    ws.addEventListener("message", (ev) => {
+      let frame: RpcEnvelope;
+
+      try {
+        frame = JSON.parse(ev.data);
+      } catch {
+        return;
+      }
+
+      if (frame.method !== "event" || !frame.params) {
+        return;
+      }
+
+      const { type, payload } = frame.params;
+
+      if (type === "tool.start") {
+        const p = payload as
+          | { tool_id?: string; name?: string; context?: string }
+          | undefined;
+        const toolId = p?.tool_id;
+
+        if (!toolId) {
+          return;
+        }
+
+        setTools((prev) =>
+          [
+            ...prev,
+            {
+              kind: "tool" as const,
+              id: `tool-${toolId}-${prev.length}`,
+              tool_id: toolId,
+              name: p?.name ?? "tool",
+              context: p?.context,
+              status: "running" as const,
+              startedAt: Date.now(),
+            },
+          ].slice(-TOOL_LIMIT),
+        );
+      } else if (type === "tool.progress") {
+        const p = payload as
+          | { name?: string; preview?: string }
+          | undefined;
+
+        if (!p?.name || !p.preview) {
+          return;
+        }
+
+        setTools((prev) =>
+          prev.map((t) =>
+            t.status === "running" && t.name === p.name
+              ? { ...t, preview: p.preview }
+              : t,
+          ),
+        );
+      } else if (type === "tool.complete") {
+        const p = payload as
+          | {
+              tool_id?: string;
+              summary?: string;
+              error?: string;
+              inline_diff?: string;
+            }
+          | undefined;
+
+        if (!p?.tool_id) {
+          return;
+        }
+
+        setTools((prev) =>
+          prev.map((t) =>
+            t.tool_id === p.tool_id
+              ? {
+                  ...t,
+                  status: p.error ? "error" : "done",
+                  summary: p.summary,
+                  error: p.error,
+                  inline_diff: p.inline_diff,
+                  completedAt: Date.now(),
+                }
+              : t,
+          ),
+        );
+      }
+    });
+
+    return () => {
+      unmounting = true;
+      ws.close();
+    };
+  }, [channel, version]);
+
+  const reconnect = useCallback(() => {
+    setError(null);
+    setTools([]);
+    setVersion((v) => v + 1);
+  }, []);
+
+  // Picker hands us a fully-formed slash command (e.g. "/model anthropic/...").
+  // Fire-and-forget through `slash.exec`; the TUI pane will render the result
+  // via PTY, so the sidebar doesn't need to surface output of its own.
+  const onModelSubmit = useCallback(
+    (slashCommand: string) => {
+      if (!sessionId) {
+        return;
+      }
+
+      void gw.request("slash.exec", {
+        session_id: sessionId,
+        command: slashCommand,
+      });
+      setModelOpen(false);
+    },
+    [gw, sessionId],
+  );
+
+  const canPickModel = state === "open" && !!sessionId;
+  const modelLabel = (info.model ?? "—").split("/").slice(-1)[0] ?? "—";
+  const banner = error ?? info.credential_warning ?? null;
+
+  return (
+    <aside
+      className={cn(
+        "flex h-full w-full min-w-0 shrink-0 flex-col gap-3 normal-case lg:w-80",
+        className,
+      )}
+    >
+      <Card className="flex items-center justify-between gap-2 px-3 py-2">
+        <div className="min-w-0">
+          <div className="text-xs uppercase tracking-wider text-muted-foreground">
+            model
+          </div>
+
+          <button
+            type="button"
+            disabled={!canPickModel}
+            onClick={() => setModelOpen(true)}
+            className="flex items-center gap-1 truncate text-sm font-medium hover:underline disabled:cursor-not-allowed disabled:opacity-60 disabled:no-underline"
+            title={info.model ?? "switch model"}
+          >
+            <span className="truncate">{modelLabel}</span>
+
+            {canPickModel && (
+              <ChevronDown className="h-3 w-3 shrink-0 opacity-60" />
+            )}
+          </button>
+        </div>
+
+        <Badge className={STATE_TONE[state]}>{STATE_LABEL[state]}</Badge>
+      </Card>
+
+      {banner && (
+        <Card className="flex items-start gap-2 border-destructive/40 bg-destructive/5 px-3 py-2 text-xs">
+          <AlertCircle className="mt-0.5 h-3.5 w-3.5 shrink-0 text-destructive" />
+
+          <div className="min-w-0 flex-1">
+            <div className="wrap-break-word text-destructive">{banner}</div>
+
+            {error && (
+              <Button
+                variant="ghost"
+                size="sm"
+                className="mt-1 h-6 px-1.5 text-xs"
+                onClick={reconnect}
+              >
+                <RefreshCw className="mr-1 h-3 w-3" />
+                reconnect
+              </Button>
+            )}
+          </div>
+        </Card>
+      )}
+
+      <Card className="flex min-h-0 flex-1 flex-col px-2 py-2">
+        <div className="px-1 pb-2 text-xs uppercase tracking-wider text-muted-foreground">
+          tools
+        </div>
+
+        <div className="flex min-h-0 flex-1 flex-col gap-1.5 overflow-y-auto pr-1">
+          {tools.length === 0 ? (
+            <div className="px-2 py-4 text-center text-xs text-muted-foreground">
+              no tool calls yet
+            </div>
+          ) : (
+            tools.map((t) => <ToolCall key={t.id} tool={t} />)
+          )}
+        </div>
+      </Card>
+
+      {modelOpen && canPickModel && sessionId && (
+        <ModelPickerDialog
+          gw={gw}
+          sessionId={sessionId}
+          onClose={() => setModelOpen(false)}
+          onSubmit={onModelSubmit}
+        />
+      )}
+    </aside>
+  );
+}
@@ -0,0 +1,40 @@
+import { ConfirmDialog } from "@/components/ui/confirm-dialog";
+import { useI18n } from "@/i18n";
+
+export function DeleteConfirmDialog({
+  cancelLabel,
+  confirmLabel,
+  description,
+  loading,
+  onCancel,
+  onConfirm,
+  open,
+  title,
+}: DeleteConfirmDialogProps) {
+  const { t } = useI18n();
+
+  return (
+    <ConfirmDialog
+      open={open}
+      onCancel={onCancel}
+      onConfirm={onConfirm}
+      title={title}
+      description={description}
+      loading={loading}
+      destructive
+      confirmLabel={confirmLabel ?? t.common.delete}
+      cancelLabel={cancelLabel ?? t.common.cancel}
+    />
+  );
+}
+
+interface DeleteConfirmDialogProps {
+  cancelLabel?: string;
+  confirmLabel?: string;
+  description?: string;
+  loading: boolean;
+  onCancel: () => void;
+  onConfirm: () => void;
+  open: boolean;
+  title: string;
+}
@@ -1,22 +1,50 @@
-import { useMemo } from "react";
+import { useMemo, type ReactNode } from "react";

 /**
 * Lightweight markdown renderer for LLM output.
 * Handles: code blocks, inline code, bold, italic, headers, links, lists, horizontal rules.
 * NOT a full CommonMark parser — optimized for typical assistant message patterns.
+ *
+ * `streaming` renders a blinking caret at the tail of the last block so it
+ * appears to hug the final character instead of wrapping onto a new line
+ * after a block element (paragraph/list/code/…).
 */
-export function Markdown({ content, highlightTerms }: { content: string; highlightTerms?: string[] }) {
+export function Markdown({
+  content,
+  highlightTerms,
+  streaming,
+}: {
+  content: string;
+  highlightTerms?: string[];
+  streaming?: boolean;
+}) {
  const blocks = useMemo(() => parseBlocks(content), [content]);
+  const caret = streaming ? <StreamingCaret /> : null;

  return (
    <div className="text-sm text-foreground leading-relaxed space-y-2">
      {blocks.map((block, i) => (
-        <Block key={i} block={block} highlightTerms={highlightTerms} />
+        <Block
+          key={i}
+          block={block}
+          highlightTerms={highlightTerms}
+          caret={caret && i === blocks.length - 1 ? caret : null}
+        />
      ))}
+      {blocks.length === 0 && caret}
    </div>
  );
 }

+function StreamingCaret() {
+  return (
+    <span
+      aria-hidden
+      className="inline-block w-[0.5em] h-[1em] ml-0.5 align-[-0.15em] bg-foreground/50 animate-pulse"
+    />
+  );
+}
+
 /* ------------------------------------------------------------------ */
 /*  Types                                                              */
 /* ------------------------------------------------------------------ */
@@ -58,7 +86,11 @@ function parseBlocks(text: string): BlockNode[] {
    // Heading
    const headingMatch = line.match(/^(#{1,4})\s+(.+)/);
    if (headingMatch) {
-      blocks.push({ type: "heading", level: headingMatch[1].length, content: headingMatch[2] });
+      blocks.push({
+        type: "heading",
+        level: headingMatch[1].length,
+        content: headingMatch[2],
+      });
      i++;
      continue;
    }
@@ -124,12 +156,23 @@ function parseBlocks(text: string): BlockNode[] {
 /*  Block renderer                                                     */
 /* ------------------------------------------------------------------ */

-function Block({ block, highlightTerms }: { block: BlockNode; highlightTerms?: string[] }) {
+function Block({
+  block,
+  highlightTerms,
+  caret,
+}: {
+  block: BlockNode;
+  highlightTerms?: string[];
+  caret?: ReactNode;
+}) {
  switch (block.type) {
    case "code":
      return (
        <pre className="bg-secondary/60 border border-border px-3 py-2.5 text-xs font-mono leading-relaxed overflow-x-auto">
-          <code>{block.content}</code>
+          <code>
+            {block.content}
+            {caret}
+          </code>
        </pre>
      );

@@ -141,25 +184,46 @@ function Block({ block, highlightTerms }: { block: BlockNode; highlightTerms?: s
        h3: "text-sm font-semibold",
        h4: "text-sm font-medium",
      };
-      return <Tag className={sizes[Tag]}><InlineContent text={block.content} highlightTerms={highlightTerms} /></Tag>;
+      return (
+        <Tag className={sizes[Tag]}>
+          <InlineContent text={block.content} highlightTerms={highlightTerms} />
+          {caret}
+        </Tag>
+      );
    }

    case "hr":
-      return <hr className="border-border" />;
+      return (
+        <>
+          <hr className="border-border" />
+          {caret}
+        </>
+      );

    case "list": {
      const Tag = block.ordered ? "ol" : "ul";
+      const last = block.items.length - 1;
      return (
-        <Tag className={`space-y-0.5 ${block.ordered ? "list-decimal" : "list-disc"} pl-5 text-sm`}>
+        <Tag
+          className={`space-y-0.5 ${block.ordered ? "list-decimal" : "list-disc"} pl-5 text-sm`}
+        >
          {block.items.map((item, i) => (
-            <li key={i}><InlineContent text={item} highlightTerms={highlightTerms} /></li>
+            <li key={i}>
+              <InlineContent text={item} highlightTerms={highlightTerms} />
+              {i === last ? caret : null}
+            </li>
          ))}
        </Tag>
      );
    }

    case "paragraph":
-      return <p><InlineContent text={block.content} highlightTerms={highlightTerms} /></p>;
+      return (
+        <p>
+          <InlineContent text={block.content} highlightTerms={highlightTerms} />
+          {caret}
+        </p>
+      );
  }
 }

@@ -178,7 +242,8 @@ type InlineNode =
 function parseInline(text: string): InlineNode[] {
  const nodes: InlineNode[] = [];
  // Pattern priority: code > link > bold > italic > bare URL > line break
-  const pattern = /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g;
+  const pattern =
+    /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g;
  let lastIndex = 0;
  let match: RegExpExecArray | null;

@@ -217,7 +282,13 @@ function parseInline(text: string): InlineNode[] {
  return nodes;
 }

-function InlineContent({ text, highlightTerms }: { text: string; highlightTerms?: string[] }) {
+function InlineContent({
+  text,
+  highlightTerms,
+}: {
+  text: string;
+  highlightTerms?: string[];
+}) {
  const nodes = useMemo(() => parseInline(text), [text]);

  return (
@@ -225,17 +296,34 @@ function InlineContent({ text, highlightTerms }: { text: string; highlightTerms?
      {nodes.map((node, i) => {
        switch (node.type) {
          case "text":
-            return <HighlightedText key={i} text={node.content} terms={highlightTerms} />;
+            return (
+              <HighlightedText
+                key={i}
+                text={node.content}
+                terms={highlightTerms}
+              />
+            );
          case "code":
            return (
-              <code key={i} className="bg-secondary/60 px-1.5 py-0.5 text-xs font-mono text-primary/90">
+              <code
+                key={i}
+                className="bg-secondary/60 px-1.5 py-0.5 text-xs font-mono text-primary/90"
+              >
                {node.content}
              </code>
            );
          case "bold":
-            return <strong key={i} className="font-semibold"><HighlightedText text={node.content} terms={highlightTerms} /></strong>;
+            return (
+              <strong key={i} className="font-semibold">
+                <HighlightedText text={node.content} terms={highlightTerms} />
+              </strong>
+            );
          case "italic":
-            return <em key={i}><HighlightedText text={node.content} terms={highlightTerms} /></em>;
+            return (
+              <em key={i}>
+                <HighlightedText text={node.content} terms={highlightTerms} />
+              </em>
+            );
          case "link":
            return (
              <a
@@ -269,10 +357,12 @@ function HighlightedText({ text, terms }: { text: string; terms?: string[] }) {
    <>
      {parts.map((part, i) =>
        regex.test(part) ? (
-          <mark key={i} className="bg-warning/30 text-warning px-0.5">{part}</mark>
+          <mark key={i} className="bg-warning/30 text-warning px-0.5">
+            {part}
+          </mark>
        ) : (
          <span key={i}>{part}</span>
-        )
+        ),
      )}
    </>
  );
--- a/Show More
+++ b/Show More