fix(error_classifier): retry mid-stream SSL/TLS alert errors as transport

Mid-stream SSL alerts (bad_record_mac, tls_alert_internal_error, handshake failures) previously fell through the classifier pipeline to the 'unknown' bucket because: - ssl.SSLError type names weren't in _TRANSPORT_ERROR_TYPES (the isinstance(OSError) catch picks up some but not all SDK-wrapped forms) - the message-pattern list had no SSL alert substrings The 'unknown' bucket is still retryable, but: (a) logs tell the user 'unknown' instead of identifying the cause, (b) it bypasses the transport-specific backoff/fallback logic, and (c) if the SSL error happens on a large session with a generic 'connection closed' wrapper, the existing disconnect-on-large-session heuristic would incorrectly trigger context compression — expensive, and never fixes a transport hiccup. Changes: - Add ssl.SSLError and its subclass type names to _TRANSPORT_ERROR_TYPES - New _SSL_TRANSIENT_PATTERNS list (separate from _SERVER_DISCONNECT_PATTERNS so SSL alerts route to timeout, not context_overflow+compress) - New step 5 in the classifier pipeline: SSL pattern check runs BEFORE the disconnect check to pre-empt the large-session-compress path Patterns cover both space-separated ('ssl alert', 'bad record mac') and underscore-separated ('ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC') forms. This is load-bearing because OpenSSL 3.x changed the error-code separator from underscore to slash (e.g. SSLV3_ALERT_BAD_RECORD_MAC → SSL/TLS_ALERT_BAD_RECORD_MAC) and will likely churn again — matching on stable alert reason substrings survives future format changes. Tests (8 new): - BAD_RECORD_MAC in Python ssl.c format - OpenSSL 3.x underscore format - TLSV1_ALERT_INTERNAL_ERROR - ssl handshake failure - [SSL: ...] prefix fallback - Real ssl.SSLError instance - REGRESSION GUARD: SSL on large session does NOT compress - REGRESSION GUARD: plain disconnect on large session STILL compresses
fix(gateway): also unlink stale PID + lock files on cleanup
2026-04-22 17:07:12 -07:00 · 2026-04-22 16:33:46 -07:00 · 2026-04-22 16:33:46 -07:00 · 2026-04-22 16:33:36 -07:00 · 2026-04-22 16:33:25 -07:00 · 2026-04-22 16:33:25 -07:00
84 changed files with 9703 additions and 4407 deletions
@@ -64,6 +64,47 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_text_for_contains(content: Any) -> str:
+    """Return a best-effort text view of message content.
+
+    Used only for substring checks when we need to know whether we've already
+    appended a note to a message. Keeps multimodal lists intact elsewhere.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join(part for part in parts if part)
+    return str(content)
+
+
+def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
+    """Append or prepend plain text to message content safely.
+
+    Compression sometimes needs to add a note or merge a summary into an
+    existing message. Message content may be plain text or a multimodal list of
+    blocks, so direct string concatenation is not always safe.
+    """
+    if content is None:
+        return text
+    if isinstance(content, str):
+        return text + content if prepend else content + text
+    if isinstance(content, list):
+        text_block = {"type": "text", "text": text}
+        return [text_block, *content] if prepend else [*content, text_block]
+    rendered = str(content)
+    return text + rendered if prepend else rendered + text
+
+
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -1144,10 +1185,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content") or ""
+                existing = msg.get("content")
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in existing:
-                    msg["content"] = existing + "\n\n" + _compression_note
+                if _compression_note not in _content_text_for_contains(existing):
+                    msg["content"] = _append_text_to_content(
+                        existing,
+                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
+                    )
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1191,12 +1235,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                original = msg.get("content") or ""
-                msg["content"] = (
+                merged_prefix = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                    + original
+                )
+                msg["content"] = _append_text_to_content(
+                    msg.get("content"),
+                    merged_prefix,
+                    prepend=True,
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -220,12 +220,25 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
+    # SSL/TLS transport errors — transient mid-stream handshake/record
+    # failures that should retry rather than surface as a stalled session.
+    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
+    # the type names here so provider-wrapped SSL errors (e.g. when the
+    # SDK re-raises without preserving the exception chain) still classify
+    # as transport rather than falling through to the unknown bucket.
+    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
+    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level)
+# Server disconnect patterns (no status code, but transport-level).
+# These are the "ambiguous" patterns — a plain connection close could be
+# transient transport hiccup OR server-side context overflow rejection
+# (common when the API gateway disconnects instead of returning an HTTP
+# error for oversized requests).  A large session + one of these patterns
+# triggers the context-overflow-with-compression recovery path.
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -236,6 +249,40 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

+# SSL/TLS transient failure patterns — intentionally distinct from
+# _SERVER_DISCONNECT_PATTERNS above.
+#
+# An SSL alert mid-stream is almost always a transport-layer hiccup
+# (flaky network, mid-session TLS renegotiation failure, load balancer
+# dropping the connection) — NOT a server-side context overflow signal.
+# So we want the retry path but NOT the compression path; lumping these
+# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
+# expensive) context compression on any large-session SSL hiccup.
+#
+# The OpenSSL library constructs error codes by prepending a format string
+# to the uppercased alert reason; OpenSSL 3.x changed the separator
+# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+# which silently stopped matching anything explicit.  Matching on the
+# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
+# survives future OpenSSL format churn without code changes.
+_SSL_TRANSIENT_PATTERNS = [
+    # Space-separated (human-readable form, Python ssl module, most SDKs)
+    "bad record mac",
+    "ssl alert",
+    "tls alert",
+    "ssl handshake failure",
+    "tlsv1 alert",
+    "sslv3 alert",
+    # Underscore-separated (OpenSSL error code tokens, e.g.
+    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
+    "bad_record_mac",
+    "ssl_alert",
+    "tls_alert",
+    "tls_alert_internal_error",
+    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
+    "[ssl:",
+]
+

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -255,9 +302,10 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. Transport error heuristics
+      5. SSL/TLS transient alert patterns → retry as timeout
      6. Server disconnect + large session → context overflow
-      7. Fallback: unknown (retryable with backoff)
+      7. Transport error heuristics
+      8. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -388,7 +436,18 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. Server disconnect + large session → context overflow ─────
+    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
+    # SSL alerts mid-stream are transport hiccups, not server-side context
+    # overflow signals.  Classify before the disconnect check so a large
+    # session doesn't incorrectly trigger context compression when the real
+    # cause is a flaky TLS handshake.  Also matches when the error is
+    # wrapped in a generic exception whose message string carries the SSL
+    # alert text but the type isn't ssl.SSLError (happens with some SDKs
+    # that re-raise without chaining).
+    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
+        return _result(FailoverReason.timeout, retryable=True)
+
+    # ── 6. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -405,12 +464,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 6. Transport / timeout heuristics ───────────────────────────
+    # ── 7. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Fallback: unknown ────────────────────────────────────────
+    # ── 8. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -4,6 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

+import ipaddress
 import logging
 import re
 import time
@@ -51,6 +52,13 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


+# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
+# block, so without an explicit check Ollama reached over Tailscale (e.g.
+# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
+# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
+_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
+
+
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -125,6 +133,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
+    "gemma-4": 256000,  # Gemma 4 family
+    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
@@ -177,6 +187,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "mimo-v2-pro": 1000000,
    "mimo-v2-omni": 256000,
    "mimo-v2-flash": 256000,
+    "mimo-v2.5-pro": 1000000,
+    "mimo-v2.5": 1000000,
    "zai-org/GLM-5": 202752,
 }

@@ -283,7 +295,15 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
+    """Return True if base_url points to a local machine.
+
+    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
+    container-internal DNS names (``host.docker.internal`` et al.),
+    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
+    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
+    is included so remote-but-trusted Ollama boxes reached over a
+    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
+    """
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -298,14 +318,17 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges and link-local
-    import ipaddress
+    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
    try:
        addr = ipaddress.ip_address(host)
-        return addr.is_private or addr.is_loopback or addr.is_link_local
+        if addr.is_private or addr.is_loopback or addr.is_link_local:
+            return True
+        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
+            return True
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
+    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -316,6 +339,8 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
+            if first == 100 and 64 <= second <= 127:
+                return True
        except ValueError:
            pass
    return False
@@ -87,14 +87,20 @@ class AnthropicTransport(ProviderTransport):
        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)

    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid."""
+        """Check Anthropic response structure is valid.
+
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
+        """
        if response is None:
            return False
        content_blocks = getattr(response, "content", None)
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return False
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
@@ -135,9 +135,22 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

+    def _resolve_handlers(self, event_type: str) -> List[Callable]:
+        """Return all handlers that should fire for ``event_type``.
+
+        Exact matches fire first, followed by wildcard matches (e.g.
+        ``command:*`` matches ``command:reset``).
+        """
+        handlers = list(self._handlers.get(event_type, []))
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+        return handlers
+
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event.
+        Fire all handlers registered for an event, discarding return values.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -151,16 +164,7 @@ class HookRegistry:
        if context is None:
            context = {}

-        # Collect handlers: exact match + wildcard match
-        handlers = list(self._handlers.get(event_type, []))
-
-        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-
-        for fn in handlers:
+        for fn in self._resolve_handlers(event_type):
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -168,3 +172,32 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+
+    async def emit_collect(
+        self,
+        event_type: str,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> List[Any]:
+        """Fire handlers and return their non-None return values in order.
+
+        Like :meth:`emit` but captures each handler's return value. Used for
+        decision-style hooks (e.g. ``command:<name>`` policies that want to
+        allow/deny/rewrite the command before normal dispatch).
+
+        Exceptions from individual handlers are logged but do not abort the
+        remaining handlers.
+        """
+        if context is None:
+            context = {}
+
+        results: List[Any] = []
+        for fn in self._resolve_handlers(event_type):
+            try:
+                result = fn(event_type, context)
+                if asyncio.iscoroutine(result):
+                    result = await result
+                if result is not None:
+                    results.append(result)
+            except Exception as e:
+                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+        return results
@@ -2129,10 +2129,42 @@ class DiscordAdapter(BasePlatformAdapter):
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
+        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
+            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
+            discord_name = _name.lower()[:32]
+            desc = (_description or f"Run /{_name}")[:100]
+            has_args = bool(_args_hint)
+
+            if has_args:
+                def _make_args_handler(__name: str, __hint: str):
+                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
+                    async def _handler(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(
+                            interaction, f"/{__name} {args}".strip()
+                        )
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_args_handler(_name, _args_hint)
+            else:
+                def _make_simple_handler(__name: str):
+                    async def _handler(interaction: discord.Interaction):
+                        await self._run_simple_slash(interaction, f"/{__name}")
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_simple_handler(_name)
+
+            return discord.app_commands.Command(
+                name=discord_name,
+                description=desc,
+                callback=handler,
+            )
+
+        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

-            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2147,38 +2179,10 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                # Skip aliases that overlap with already-registered names
-                # (aliases for explicitly registered commands are handled above).
-                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
-                has_args = bool(cmd_def.args_hint)
-
-                if has_args:
-                    # Command takes optional arguments — create handler with
-                    # an optional ``args`` string parameter.
-                    def _make_args_handler(_name: str, _hint: str):
-                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
-                        async def _handler(interaction: discord.Interaction, args: str = ""):
-                            await self._run_simple_slash(
-                                interaction, f"/{_name} {args}".strip()
-                            )
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
-                else:
-                    # Parameterless command.
-                    def _make_simple_handler(_name: str):
-                        async def _handler(interaction: discord.Interaction):
-                            await self._run_simple_slash(interaction, f"/{_name}")
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_simple_handler(cmd_def.name)
-
-                auto_cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=desc,
-                    callback=handler,
+                auto_cmd = _build_auto_slash_command(
+                    cmd_def.name,
+                    cmd_def.description,
+                    cmd_def.args_hint,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2195,6 +2199,35 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

+        # ── Plugin-registered slash commands ──
+        # Plugins register via PluginContext.register_command(); we mirror
+        # those into Discord's native slash picker so users get the same
+        # autocomplete UX as for built-in commands. No per-platform plugin
+        # API needed — plugin commands are platform-agnostic.
+        try:
+            from hermes_cli.commands import _iter_plugin_command_entries
+
+            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
+                discord_name = plugin_name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                auto_cmd = _build_auto_slash_command(
+                    plugin_name,
+                    plugin_desc,
+                    plugin_args_hint,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+        except Exception as e:
+            logger.warning(
+                "Discord auto-register from plugin commands failed: %s", e
+            )
+
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -35,7 +35,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -73,7 +73,9 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
+    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
+    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -234,6 +236,8 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
+_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
+_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -277,12 +281,36 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


+@dataclass(frozen=True)
+class FeishuMentionRef:
+    name: str = ""
+    open_id: str = ""
+    is_all: bool = False
+    is_self: bool = False
+
+
+@dataclass(frozen=True)
+class _FeishuBotIdentity:
+    open_id: str = ""
+    user_id: str = ""
+    name: str = ""
+
+    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
+        # Precedence: open_id > user_id > name. IDs are authoritative when both
+        # sides have them; the next tier is only considered when either side
+        # lacks the current one.
+        if open_id and self.open_id:
+            return open_id == self.open_id
+        if user_id and self.user_id:
+            return user_id == self.user_id
+        return bool(self.name) and name == self.name
+
+
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -292,7 +320,7 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
+    mentions: List[FeishuMentionRef] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)

@@ -505,14 +533,17 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
+def parse_feishu_post_payload(
+    payload: Any,
+    *,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
-    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -523,7 +554,10 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
+            "".join(
+                _render_post_element(item, image_keys, media_refs, mentions_map)
+                for item in row
+            )
        )
        if row_text:
            parts.append(row_text)
@@ -532,7 +566,6 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
-        mentioned_ids=mentioned_ids,
    )


@@ -584,7 +617,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(element, str):
        return element
@@ -602,19 +635,21 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        mentioned_id = (
-            str(element.get("open_id", "")).strip()
-            or str(element.get("user_id", "")).strip()
-        )
-        if mentioned_id and mentioned_id not in mentioned_ids:
-            mentioned_ids.append(mentioned_id)
-        display_name = (
-            str(element.get("user_name", "")).strip()
-            or str(element.get("name", "")).strip()
-            or str(element.get("text", "")).strip()
-            or mentioned_id
-        )
-        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
+        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
+        # the real ref in mentions_map for the display name.
+        placeholder = str(element.get("user_id", "")).strip()
+        if placeholder == "@_all":
+            # Feishu SDK sometimes omits @_all from the top-level mentions
+            # payload; record it here so the caller's mention list stays complete.
+            if mentions_map is not None and "@_all" not in mentions_map:
+                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
+            return "@all"
+        ref = (mentions_map or {}).get(placeholder)
+        if ref is not None:
+            display_name = ref.name or ref.open_id or "user"
+        else:
+            display_name = str(element.get("user_name", "")).strip() or "user"
+        return f"@{_escape_markdown_text(display_name)}"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -652,8 +687,7 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        value = element.get(key)
-        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
+        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -663,7 +697,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -671,17 +705,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
+        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    return ""
@@ -692,31 +726,48 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
+def normalize_feishu_message(
+    *,
+    message_type: str,
+    raw_content: str,
+    mentions: Optional[Sequence[Any]] = None,
+    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
+) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
+    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
+        text = str(payload.get("text", "") or "")
+        # Feishu SDK sometimes omits @_all from the mentions payload even when
+        # the text literal contains it (confirmed via im.v1.message.get).
+        if "@_all" in text and "@_all" not in mentions_map:
+            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
+            text_content=_normalize_feishu_text(text, mentions_map),
+            mentions=list(mentions_map.values()),
        )
    if normalized_type == "post":
-        parsed_post = parse_feishu_post_payload(payload)
+        # The walker writes back to mentions_map if it encounters
+        # <at user_id="@_all">, so reading .values() after parsing is enough.
+        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentioned_ids=list(parsed_post.mentioned_ids),
+            mentions=list(mentions_map.values()),
            relation_kind="post",
        )
+    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT
+            or FALLBACK_IMAGE_TEXT,
+            mentions_map,
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -724,6 +775,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
+            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -735,6 +787,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
+            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1009,8 +1062,20 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(text: str) -> str:
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
+def _normalize_feishu_text(
+    text: str,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> str:
+    def _sub(match: "re.Match[str]") -> str:
+        key = match.group(0)
+        ref = (mentions_map or {}).get(key)
+        if ref is None:
+            return " "
+        name = ref.name or ref.open_id or "user"
+        return f"@{name}"
+
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
+    cleaned = cleaned.replace("@_all", "@all")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1029,6 +1094,117 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


+# ---------------------------------------------------------------------------
+# Mention helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_mention_ids(mention: Any) -> tuple[str, str]:
+    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
+    # plus id_type discriminator; event payloads hand back a nested UserId
+    # object carrying both fields.
+    mention_id = getattr(mention, "id", None)
+    if isinstance(mention_id, str):
+        id_type = str(getattr(mention, "id_type", "") or "").lower()
+        if id_type == "open_id":
+            return mention_id, ""
+        if id_type == "user_id":
+            return "", mention_id
+        return "", ""
+    if mention_id is None:
+        return "", ""
+    return (
+        str(getattr(mention_id, "open_id", "") or ""),
+        str(getattr(mention_id, "user_id", "") or ""),
+    )
+
+
+def _build_mentions_map(
+    mentions: Optional[Sequence[Any]],
+    bot: _FeishuBotIdentity,
+) -> Dict[str, FeishuMentionRef]:
+    result: Dict[str, FeishuMentionRef] = {}
+    for mention in mentions or []:
+        key = str(getattr(mention, "key", "") or "")
+        if not key:
+            continue
+        if key == "@_all":
+            result[key] = FeishuMentionRef(is_all=True)
+            continue
+        open_id, user_id = _extract_mention_ids(mention)
+        name = str(getattr(mention, "name", "") or "").strip()
+        result[key] = FeishuMentionRef(
+            name=name,
+            open_id=open_id,
+            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
+        )
+    return result
+
+
+def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
+    parts: List[str] = []
+    seen: set = set()
+    for ref in mentions:
+        if ref.is_self:
+            continue
+        signature = (ref.is_all, ref.open_id, ref.name)
+        if signature in seen:
+            continue
+        seen.add(signature)
+        if ref.is_all:
+            parts.append("@all")
+        elif ref.open_id:
+            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
+        else:
+            parts.append(ref.name or "unknown")
+    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
+
+
+def _strip_edge_self_mentions(
+    text: str,
+    mentions: Sequence[FeishuMentionRef],
+) -> str:
+    # Leading: strip consecutive self-mentions unconditionally.
+    # Trailing: strip only when followed by whitespace/terminal punct, so
+    # mid-sentence references ("don't @Bot again") stay intact.
+    # Leading word-boundary prevents @Al from eating @Alice.
+    if not text:
+        return text
+    self_names = [
+        f"@{ref.name or ref.open_id or 'user'}"
+        for ref in mentions
+        if ref.is_self
+    ]
+    if not self_names:
+        return text
+
+    remaining = text.lstrip()
+    while True:
+        for nm in self_names:
+            if not remaining.startswith(nm):
+                continue
+            after = remaining[len(nm):]
+            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
+                continue
+            remaining = after.lstrip()
+            break
+        else:
+            break
+
+    while True:
+        i = len(remaining)
+        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
+            i -= 1
+        body = remaining[:i]
+        tail = remaining[i:]
+        for nm in self_names:
+            if body.endswith(nm):
+                remaining = body[: -len(nm)].rstrip() + tail
+                break
+        else:
+            return remaining
+
+
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -2470,13 +2646,22 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
+        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
+
+        if inbound_type == MessageType.TEXT:
+            text = _strip_edge_self_mentions(text, mentions)
+            if text.startswith("/"):
+                inbound_type = MessageType.COMMAND
+
+        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
+            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
            return

-        if inbound_type == MessageType.TEXT and text.startswith("/"):
-            inbound_type = MessageType.COMMAND
+        if inbound_type != MessageType.COMMAND:
+            hint = _build_mention_hint(mentions)
+            if hint:
+                text = f"{hint}\n\n{text}" if text else hint

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -2935,14 +3120,20 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
-        """Extract text and cached media from a normalized Feishu message."""
+    async def _extract_message_content(
+        self, message: Any
+    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
+        normalized = normalize_feishu_message(
+            message_type=raw_type,
+            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
+        )
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -2959,7 +3150,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types
+        return text, inbound_type, media_urls, media_types, list(normalized.mentions)

    async def _download_feishu_message_resources(
        self,
@@ -3308,15 +3499,31 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
+            parent_mentions = getattr(parent, "mentions", None) if parent else None
+            text = self._extract_text_from_raw_content(
+                msg_type=msg_type,
+                raw_content=raw_content,
+                mentions=parent_mentions,
+            )
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
-        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
+    def _extract_text_from_raw_content(
+        self,
+        *,
+        msg_type: str,
+        raw_content: str,
+        mentions: Optional[Sequence[Any]] = None,
+    ) -> Optional[str]:
+        normalized = normalize_feishu_message(
+            message_type=msg_type,
+            raw_content=raw_content,
+            mentions=mentions,
+            bot=self._bot_identity(),
+        )
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3386,10 +3593,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
        )
-        if normalized.mentioned_ids:
-            return self._post_mentions_bot(normalized.mentioned_ids)
-        return False
+        return self._post_mentions_bot(normalized.mentions)

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3409,30 +3616,37 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        """Check whether any mention targets the configured or inferred bot identity."""
+        # IDs trump names: when both sides have open_id (or both user_id),
+        # match requires equal IDs. Name fallback only when either side
+        # lacks an ID.
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = getattr(mention_id, "open_id", None)
-            mention_user_id = getattr(mention_id, "user_id", None)
+            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
+            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if self._bot_open_id and mention_open_id == self._bot_open_id:
-                return True
-            if self._bot_user_id and mention_user_id == self._bot_user_id:
-                return True
+            if mention_open_id and self._bot_open_id:
+                if mention_open_id == self._bot_open_id:
+                    return True
+                continue  # IDs differ — not the bot; skip name fallback.
+            if mention_user_id and self._bot_user_id:
+                if mention_user_id == self._bot_user_id:
+                    return True
+                continue
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
-        if not mentioned_ids:
-            return False
-        if self._bot_open_id and self._bot_open_id in mentioned_ids:
-            return True
-        if self._bot_user_id and self._bot_user_id in mentioned_ids:
-            return True
-        return False
+    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
+        return any(m.is_self for m in mentions)
+
+    def _bot_identity(self) -> _FeishuBotIdentity:
+        return _FeishuBotIdentity(
+            open_id=self._bot_open_id,
+            user_id=self._bot_user_id,
+            name=self._bot_name,
+        )

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3457,14 +3671,15 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                resp = await asyncio.to_thread(
-                    self._client.request,
-                    method="GET",
-                    url="/open-apis/bot/v3/info",
-                    body=None,
-                    raw_response=True,
+                req = (
+                    BaseRequest.builder()
+                    .http_method(HttpMethod.GET)
+                    .uri("/open-apis/bot/v3/info")
+                    .token_types({AccessTokenType.TENANT})
+                    .build()
                )
-                content = getattr(resp, "content", None)
+                resp = await asyncio.to_thread(self._client.request, req)
+                content = getattr(getattr(resp, "raw", None), "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4232,12 +4447,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("bot_name"),
+        "bot_name": bot.get("app_name") or bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4246,13 +4461,18 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
+        req = (
+            BaseRequest.builder()
+            .http_method(HttpMethod.GET)
+            .uri("/open-apis/bot/v3/info")
+            .token_types({AccessTokenType.TENANT})
+            .build()
        )
-        return _parse_bot_response(json.loads(resp.content))
+        resp = client.request(req)
+        content = getattr(getattr(resp, "raw", None), "content", None)
+        if content is None:
+            return None
+        return _parse_bot_response(json.loads(content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -2687,8 +2687,9 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug("SessionDB close error: %s", _e)

-            from gateway.status import remove_pid_file
+            from gateway.status import remove_pid_file, release_gateway_runtime_lock
            remove_pid_file()
+            release_gateway_runtime_lock()

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
@@ -3485,23 +3486,73 @@ class GatewayRunner:

        # Check for commands
        command = event.get_command()
-        
-        # Emit command:* hook for any recognized slash command.
-        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
-        # in hermes_cli/commands.py — no hardcoded set to maintain here.
-        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
-        if command and command in GATEWAY_KNOWN_COMMANDS:
-            await self.hooks.emit(f"command:{command}", {
-                "platform": source.platform.value if source.platform else "",
-                "user_id": source.user_id,
-                "command": command,
-                "args": event.get_command_args().strip(),
-            })

-        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        from hermes_cli.commands import (
+            GATEWAY_KNOWN_COMMANDS,
+            is_gateway_known_command,
+            resolve_command as _resolve_cmd,
+        )
+
+        # Resolve aliases to canonical name so dispatch and hook names
+        # don't depend on the exact alias the user typed.
        _cmd_def = _resolve_cmd(command) if command else None
        canonical = _cmd_def.name if _cmd_def else command

+        # Fire the ``command:<canonical>`` hook for any recognized slash
+        # command — built-in OR plugin-registered. Handlers can return a
+        # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}``
+        # to intercept dispatch before core handling runs. This replaces
+        # the previous fire-and-forget emit(): return values are now
+        # honored, but handlers that return nothing behave exactly as
+        # before (telemetry-style hooks keep working).
+        if command and is_gateway_known_command(canonical):
+            raw_args = event.get_command_args().strip()
+            hook_ctx = {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": canonical,
+                "raw_command": command,
+                "args": raw_args,
+                "raw_args": raw_args,
+            }
+            try:
+                hook_results = await self.hooks.emit_collect(
+                    f"command:{canonical}", hook_ctx
+                )
+            except Exception as _hook_err:
+                logger.debug(
+                    "command:%s hook dispatch failed (non-fatal): %s",
+                    canonical, _hook_err,
+                )
+                hook_results = []
+
+            for hook_result in hook_results:
+                if not isinstance(hook_result, dict):
+                    continue
+                decision = str(hook_result.get("decision", "")).strip().lower()
+                if not decision or decision == "allow":
+                    continue
+                if decision == "deny":
+                    message = hook_result.get("message")
+                    if isinstance(message, str) and message:
+                        return message
+                    return f"Command `/{command}` was blocked by a hook."
+                if decision == "handled":
+                    message = hook_result.get("message")
+                    return message if isinstance(message, str) and message else None
+                if decision == "rewrite":
+                    new_command = str(
+                        hook_result.get("command_name", "")
+                    ).strip().lstrip("/")
+                    if not new_command:
+                        continue
+                    new_args = str(hook_result.get("raw_args", "")).strip()
+                    event.text = f"/{new_command} {new_args}".strip()
+                    command = event.get_command()
+                    _cmd_def = _resolve_cmd(command) if command else None
+                    canonical = _cmd_def.name if _cmd_def else command
+                    break
+
        if canonical == "new":
            return await self._handle_reset_command(event)
        
@@ -7645,13 +7696,14 @@ class GatewayRunner:
        from hermes_cli.debug import (
            _capture_dump, collect_debug_report,
            upload_to_pastebin, _schedule_auto_delete,
-            _GATEWAY_PRIVACY_NOTICE,
+            _GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
        )

        loop = asyncio.get_running_loop()

        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
        def _collect_and_upload():
+            _best_effort_sweep_expired_pastes()
            dump_text = _capture_dump()
            report = collect_debug_report(log_lines=200, dump_text=dump_text)

@@ -10794,7 +10846,13 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
+    from gateway.status import (
+        acquire_gateway_runtime_lock,
+        get_running_pid,
+        release_gateway_runtime_lock,
+        remove_pid_file,
+        terminate_pid,
+    )
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
@@ -11007,14 +11065,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            "Exiting to avoid double-running.", _current_pid
        )
        return False
+    if not acquire_gateway_runtime_lock():
+        logger.error(
+            "Gateway runtime lock is already held by another instance. Exiting."
+        )
+        return False
    try:
        write_pid_file()
    except FileExistsError:
+        release_gateway_runtime_lock()
        logger.error(
            "PID file race lost to another gateway instance. Exiting."
        )
        return False
    atexit.register(remove_pid_file)
+    atexit.register(release_gateway_runtime_lock)

    # Start the gateway
    success = await runner.start()
@@ -22,11 +22,18 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

+if sys.platform == "win32":
+    import msvcrt
+else:
+    import fcntl
+
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
+_GATEWAY_LOCK_FILENAME = "gateway.lock"
+_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -35,6 +42,14 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


+def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
+    """Return the path to the runtime gateway lock file."""
+    if pid_path is not None:
+        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
+    home = get_hermes_home()
+    return home / _GATEWAY_LOCK_FILENAME
+
+
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -212,16 +227,135 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


+def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
+    return _read_pid_record(lock_path or _get_gateway_lock_path())
+
+
+def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
+    if not record:
+        return None
+    try:
+        return int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        return None
+
+
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
+    """Delete a stale gateway PID file (and its sibling lock metadata).
+
+    Called from ``get_running_pid()`` after the runtime lock has already been
+    confirmed inactive, so the on-disk metadata is known to belong to a dead
+    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
+    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
+    ``--replace`` handoffs), this path force-unlinks both files so the next
+    startup sees a clean slate.
+    """
    if not cleanup_stale:
        return
    try:
-        if pid_path == _get_pid_path():
-            remove_pid_file()
-        else:
-            pid_path.unlink(missing_ok=True)
+        pid_path.unlink(missing_ok=True)
    except Exception:
        pass
+    try:
+        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
+    except Exception:
+        pass
+
+
+def _write_gateway_lock_record(handle) -> None:
+    handle.seek(0)
+    handle.truncate()
+    json.dump(_build_pid_record(), handle)
+    handle.flush()
+    try:
+        os.fsync(handle.fileno())
+    except OSError:
+        pass
+
+
+def _try_acquire_file_lock(handle) -> bool:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0, os.SEEK_END)
+            if handle.tell() == 0:
+                handle.write("\n")
+                handle.flush()
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return True
+    except (BlockingIOError, OSError):
+        return False
+
+
+def _release_file_lock(handle) -> None:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+    except OSError:
+        pass
+
+
+def acquire_gateway_runtime_lock() -> bool:
+    """Claim the cross-process runtime lock for the gateway.
+
+    Unlike the PID file, the lock is owned by the live process itself. If the
+    process dies abruptly, the OS releases the lock automatically.
+    """
+    global _gateway_lock_handle
+    if _gateway_lock_handle is not None:
+        return True
+
+    path = _get_gateway_lock_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    handle = open(path, "a+", encoding="utf-8")
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return False
+    _write_gateway_lock_record(handle)
+    _gateway_lock_handle = handle
+    return True
+
+
+def release_gateway_runtime_lock() -> None:
+    """Release the gateway runtime lock when owned by this process."""
+    global _gateway_lock_handle
+    handle = _gateway_lock_handle
+    if handle is None:
+        return
+    _gateway_lock_handle = None
+    _release_file_lock(handle)
+    try:
+        handle.close()
+    except OSError:
+        pass
+
+
+def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
+    """Return True when some process currently owns the gateway runtime lock."""
+    global _gateway_lock_handle
+    resolved_lock_path = lock_path or _get_gateway_lock_path()
+    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
+        return True
+
+    if not resolved_lock_path.exists():
+        return False
+
+    handle = open(resolved_lock_path, "a+", encoding="utf-8")
+    try:
+        if _try_acquire_file_lock(handle):
+            _release_file_lock(handle)
+            return False
+        return True
+    finally:
+        try:
+            handle.close()
+        except OSError:
+            pass


 def write_pid_file() -> None:
@@ -583,35 +717,42 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    record = _read_pid_record(resolved_pid_path)
-    if not record:
+    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
+    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
+    if not lock_active:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    try:
-        pid = int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    primary_record = _read_pid_record(resolved_pid_path)
+    fallback_record = _read_gateway_lock_record(resolved_lock_path)

-    try:
-        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-    except (ProcessLookupError, PermissionError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    for record in (primary_record, fallback_record):
+        pid = _pid_from_record(record)
+        if pid is None:
+            continue

-    recorded_start = record.get("start_time")
-    current_start = _get_process_start_time(pid)
-    if recorded_start is not None and current_start is not None and current_start != recorded_start:
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+        try:
+            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+        except ProcessLookupError:
+            continue
+        except PermissionError:
+            # The process exists but belongs to another user/service scope.
+            # With the runtime lock still held, prefer keeping it visible
+            # rather than deleting the PID file as "stale".
+            if _record_looks_like_gateway(record):
+                return pid
+            continue

-    if not _looks_like_gateway_process(pid):
-        if not _record_looks_like_gateway(record):
-            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-            return None
+        recorded_start = record.get("start_time")
+        current_start = _get_process_start_time(pid)
+        if recorded_start is not None and current_start is not None and current_start != recorded_start:
+            continue

-    return pid
+        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
+            return pid
+
+    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+    return None


 def is_gateway_running(
@@ -260,6 +260,26 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )


+def is_gateway_known_command(name: str | None) -> bool:
+    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
+
+    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
+    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
+    looked up lazily so importing this module never forces plugin
+    discovery. Gateway code uses this to decide whether to emit
+    ``command:<name>`` hooks — plugin commands get the same lifecycle
+    events as built-ins.
+    """
+    if not name:
+        return False
+    if name in GATEWAY_KNOWN_COMMANDS:
+        return True
+    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
+        if plugin_name == name:
+            return True
+    return False
+
+
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -371,12 +391,47 @@ def gateway_help_lines() -> list[str]:
    return lines


+def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
+    """Yield (name, description, args_hint) tuples for all plugin slash commands.
+
+    Plugin commands are registered via
+    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
+    like ``CommandDef`` entries for gateway surfacing: they appear in the
+    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
+    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    Discord's native slash command picker.
+
+    Lookup is lazy so importing this module never forces plugin discovery
+    (which can trigger filesystem scans and environment-dependent
+    behavior).
+    """
+    try:
+        from hermes_cli.plugins import get_plugin_commands
+    except Exception:
+        return []
+    try:
+        commands = get_plugin_commands() or {}
+    except Exception:
+        return []
+    entries: list[tuple[str, str, str]] = []
+    for name, meta in commands.items():
+        if not isinstance(name, str) or not isinstance(meta, dict):
+            continue
+        description = str(meta.get("description") or f"Run /{name}")
+        args_hint = str(meta.get("args_hint") or "").strip()
+        entries.append((name, description, args_hint))
+    return entries
+
+
 def telegram_bot_commands() -> list[tuple[str, str]]:
    """Return (command_name, description) pairs for Telegram setMyCommands.

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
    canonical command.
+
+    Plugin-registered slash commands are included so plugins get native
+    autocomplete in Telegram without touching core code.
    """
    overrides = _resolve_config_gates()
    result: list[tuple[str, str]] = []
@@ -386,6 +441,10 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
+    for name, description, _args_hint in _iter_plugin_command_entries():
+        tg_name = _sanitize_telegram_name(name)
+        if tg_name:
+            result.append((tg_name, description))
    return result


@@ -750,6 +809,9 @@ def slack_subcommand_map() -> dict[str, str]:

    Maps both canonical names and aliases so /hermes bg do stuff works
    the same as /hermes background do stuff.
+
+    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
+    routes through the plugin handler.
    """
    overrides = _resolve_config_gates()
    mapping: dict[str, str] = {}
@@ -759,6 +821,9 @@ def slack_subcommand_map() -> dict[str, str]:
        mapping[cmd.name] = f"/{cmd.name}"
        for alias in cmd.aliases:
            mapping[alias] = f"/{alias}"
+    for name, _description, _args_hint in _iter_plugin_command_entries():
+        if name not in mapping:
+            mapping[name] = f"/{name}"
    return mapping


@@ -840,6 +840,7 @@ DEFAULT_CONFIG = {

    # Pre-exec security scanning via tirith
    "security": {
+        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
        "redact_secrets": True,
        "tirith_enabled": True,
        "tirith_path": "tirith",
@@ -13,6 +13,7 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional

@@ -147,6 +148,14 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
    return (deleted, len(remaining))


+def _best_effort_sweep_expired_pastes() -> None:
+    """Attempt pending-paste cleanup without letting /debug fail offline."""
+    try:
+        _sweep_expired_pastes()
+    except Exception:
+        pass
+
+
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -314,72 +323,128 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------

-def _resolve_log_path(log_name: str) -> Optional[Path]:
-    """Find the log file for *log_name*, falling back to the .1 rotation.

-    Returns the path if found, or None.
-    """
+@dataclass
+class LogSnapshot:
+    """Single-read snapshot of a log file used by debug-share."""
+
+    path: Optional[Path]
+    tail_text: str
+    full_text: Optional[str]
+
+
+def _primary_log_path(log_name: str) -> Optional[Path]:
+    """Where *log_name* would live if present. Doesn't check existence."""
    from hermes_cli.logs import LOG_FILES

    filename = LOG_FILES.get(log_name)
-    if not filename:
+    return (get_hermes_home() / "logs" / filename) if filename else None
+
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the first non-empty candidate (primary, then .1), or None.
+    Callers distinguish 'empty primary' from 'truly missing' via
+    :func:`_primary_log_path`.
+    """
+    primary = _primary_log_path(log_name)
+    if primary is None:
        return None

-    log_dir = get_hermes_home() / "logs"
-    primary = log_dir / filename
    if primary.exists() and primary.stat().st_size > 0:
        return primary

-    # Fall back to the most recent rotated file (.1).
-    rotated = log_dir / f"{filename}.1"
+    rotated = primary.parent / f"{primary.name}.1"
    if rotated.exists() and rotated.stat().st_size > 0:
        return rotated

    return None


-def _read_log_tail(log_name: str, num_lines: int) -> str:
-    """Read the last *num_lines* from a log file, or return a placeholder."""
-    from hermes_cli.logs import _read_last_n_lines
+def _capture_log_snapshot(
+    log_name: str,
+    *,
+    tail_lines: int,
+    max_bytes: int = _MAX_LOG_BYTES,
+) -> LogSnapshot:
+    """Capture a log once and derive summary/full-log views from it.

-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return "(file not found)"
-
-    try:
-        lines = _read_last_n_lines(log_path, num_lines)
-        return "".join(lines).rstrip("\n")
-    except Exception as exc:
-        return f"(error reading: {exc})"
-
-
-def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
-    """Read a log file for standalone upload.
-
-    Returns the file content (last *max_bytes* if truncated), or None if the
-    file doesn't exist or is empty.
+    The report tail and standalone log upload must come from the same file
+    snapshot. Otherwise a rotation/truncate between reads can make the report
+    look newer than the uploaded ``agent.log`` paste.
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
-        return None
+        primary = _primary_log_path(log_name)
+        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
+        return LogSnapshot(path=None, tail_text=tail, full_text=None)

    try:
        size = log_path.stat().st_size
        if size == 0:
-            return None
+            # race: file was truncated between _resolve_log_path and stat
+            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)

-        if size <= max_bytes:
-            return log_path.read_text(encoding="utf-8", errors="replace")
-
-        # File is larger than max_bytes — read the tail.
        with open(log_path, "rb") as f:
-            f.seek(size - max_bytes)
-            # Skip partial line at the seek point.
-            f.readline()
-            content = f.read().decode("utf-8", errors="replace")
-        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
-    except Exception:
-        return None
+            if size <= max_bytes:
+                raw = f.read()
+                truncated = False
+            else:
+                # Read from the end until we have enough bytes for the
+                # standalone upload and enough newline context to render the
+                # summary tail from the same snapshot.
+                chunk_size = 8192
+                pos = size
+                chunks: list[bytes] = []
+                total = 0
+                newline_count = 0
+
+                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
+                    read_size = min(chunk_size, pos)
+                    pos -= read_size
+                    f.seek(pos)
+                    chunk = f.read(read_size)
+                    chunks.insert(0, chunk)
+                    total += len(chunk)
+                    newline_count += chunk.count(b"\n")
+                    chunk_size = min(chunk_size * 2, 65536)
+
+                raw = b"".join(chunks)
+                truncated = pos > 0
+
+        full_raw = raw
+        if truncated and len(full_raw) > max_bytes:
+            cut = len(full_raw) - max_bytes
+            # Check whether the cut lands exactly on a line boundary.  If the
+            # byte just before the cut position is a newline the first retained
+            # byte starts a complete line and we should keep it.  Only drop a
+            # partial first line when we're genuinely mid-line.
+            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
+            full_raw = full_raw[cut:]
+            if not on_boundary and b"\n" in full_raw:
+                full_raw = full_raw.split(b"\n", 1)[1]
+
+        all_text = raw.decode("utf-8", errors="replace")
+        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
+
+        full_text = full_raw.decode("utf-8", errors="replace")
+        if truncated:
+            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
+
+        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
+    except Exception as exc:
+        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
+
+
+def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
+    """Capture all logs used by debug-share exactly once."""
+    errors_lines = min(log_lines, 100)
+    return {
+        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
+        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
+        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
+    }


 # ---------------------------------------------------------------------------
@@ -405,7 +470,12 @@ def _capture_dump() -> str:
    return capture.getvalue()


-def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+def collect_debug_report(
+    *,
+    log_lines: int = 200,
+    dump_text: str = "",
+    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
+) -> str:
    """Build the summary debug report: system dump + log tails.

    Parameters
@@ -424,19 +494,22 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
        dump_text = _capture_dump()
    buf.write(dump_text)

+    if log_snapshots is None:
+        log_snapshots = _capture_default_log_snapshots(log_lines)
+
    # ── Recent log tails (summary only) ──────────────────────────────────
    buf.write("\n\n")
    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(_read_log_tail("agent", log_lines))
+    buf.write(log_snapshots["agent"].tail_text)
    buf.write("\n\n")

    errors_lines = min(log_lines, 100)
    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write(log_snapshots["errors"].tail_text)
    buf.write("\n\n")

    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write(log_snapshots["gateway"].tail_text)
    buf.write("\n")

    return buf.getvalue()
@@ -448,6 +521,8 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:

 def run_debug_share(args):
    """Collect debug report + full logs, upload each, print URLs."""
+    _best_effort_sweep_expired_pastes()
+
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
@@ -459,10 +534,15 @@ def run_debug_share(args):

    # Capture dump once — prepended to every paste for context.
    dump_text = _capture_dump()
+    log_snapshots = _capture_default_log_snapshots(log_lines)

-    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
-    agent_log = _read_full_log("agent")
-    gateway_log = _read_full_log("gateway")
+    report = collect_debug_report(
+        log_lines=log_lines,
+        dump_text=dump_text,
+        log_snapshots=log_snapshots,
+    )
+    agent_log = log_snapshots["agent"].full_text
+    gateway_log = log_snapshots["gateway"].full_text

    # Prepend dump header to each full log so every paste is self-contained.
    if agent_log:
@@ -333,6 +333,147 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
    return selected_system, result.stdout.strip() == "active"


+def _read_systemd_unit_properties(
+    system: bool = False,
+    properties: tuple[str, ...] = (
+        "ActiveState",
+        "SubState",
+        "Result",
+        "ExecMainStatus",
+    ),
+) -> dict[str, str]:
+    """Return selected ``systemctl show`` properties for the gateway unit."""
+    selected_system = _select_systemd_scope(system)
+    try:
+        result = _run_systemctl(
+            [
+                "show",
+                get_service_name(),
+                "--no-pager",
+                "--property",
+                ",".join(properties),
+            ],
+            system=selected_system,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except (RuntimeError, subprocess.TimeoutExpired, OSError):
+        return {}
+
+    if result.returncode != 0:
+        return {}
+
+    parsed: dict[str, str] = {}
+    for line in result.stdout.splitlines():
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        parsed[key] = value.strip()
+    return parsed
+
+
+def _wait_for_systemd_service_restart(
+    *,
+    system: bool = False,
+    previous_pid: int | None = None,
+    timeout: float = 60.0,
+) -> bool:
+    """Wait for the gateway service to become active after a restart handoff."""
+    import time
+
+    svc = get_service_name()
+    scope_label = _service_scope_label(system).capitalize()
+    deadline = time.time() + timeout
+
+    while time.time() < deadline:
+        props = _read_systemd_unit_properties(system=system)
+        active_state = props.get("ActiveState", "")
+        sub_state = props.get("SubState", "")
+        new_pid = None
+        try:
+            from gateway.status import get_running_pid
+
+            new_pid = get_running_pid()
+        except Exception:
+            new_pid = None
+
+        if active_state == "active":
+            if new_pid and (previous_pid is None or new_pid != previous_pid):
+                print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                return True
+            if previous_pid is None:
+                print(f"✓ {scope_label} service restarted")
+                return True
+
+        if active_state == "activating" and sub_state == "auto-restart":
+            time.sleep(1)
+            continue
+
+        time.sleep(2)
+
+    print(
+        f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
+        f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+        f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
+    )
+    return False
+
+
+def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
+    """Recover a planned service restart that is stuck in systemd state."""
+    props = _read_systemd_unit_properties(system=system)
+    if not props:
+        return False
+
+    try:
+        from gateway.status import read_runtime_status
+    except Exception:
+        return False
+
+    runtime_state = read_runtime_status() or {}
+    if not runtime_state.get("restart_requested"):
+        return False
+
+    active_state = props.get("ActiveState", "")
+    sub_state = props.get("SubState", "")
+    exec_main_status = props.get("ExecMainStatus", "")
+    result = props.get("Result", "")
+
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("⏳ Service restart already pending — waiting for systemd relaunch...")
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    if active_state == "failed" and (
+        exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
+        or result == "exit-code"
+    ):
+        svc = get_service_name()
+        scope_label = _service_scope_label(system).capitalize()
+        print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
+        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    return False
+
+
 def _probe_launchd_service_running() -> bool:
    if not get_launchd_plist_path().exists():
        return False
@@ -470,7 +611,8 @@ def stop_profile_gateway() -> bool:
        except (ProcessLookupError, PermissionError):
            break

-    remove_pid_file()
+    if get_running_pid() is None:
+        remove_pid_file()
    return True


@@ -1505,14 +1647,9 @@ def systemd_restart(system: bool = False):

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
-        # SIGUSR1 sent — the gateway will drain active agents, exit with
-        # code 75, and systemd will restart it after RestartSec (30s).
-        # Wait for the old process to die and the new one to become active
-        # so the CLI doesn't return while the service is still restarting.
        import time
        scope_label = _service_scope_label(system).capitalize()
        svc = get_service_name()
-        scope_cmd = _systemctl_cmd(system)

        # Phase 1: wait for old process to exit (drain + shutdown)
        print(f"⏳ {scope_label} service draining active work...")
@@ -1526,48 +1663,41 @@ def systemd_restart(system: bool = False):
        else:
            print(f"⚠ Old process (PID {pid}) still alive after 90s")

-        # Phase 2: wait for systemd to start the new process
-        print(f"⏳ Waiting for {svc} to restart...")
-        deadline = time.time() + 60
-        while time.time() < deadline:
-            try:
-                result = subprocess.run(
-                    scope_cmd + ["is-active", svc],
-                    capture_output=True, text=True, timeout=5,
-                )
-                if result.stdout.strip() == "active":
-                    # Verify it's a NEW process, not the old one somehow
-                    new_pid = get_running_pid()
-                    if new_pid and new_pid != pid:
-                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
-                        return
-            except (subprocess.TimeoutExpired, FileNotFoundError):
-                pass
-            time.sleep(2)
-
-        # Timed out — check final state
-        try:
-            result = subprocess.run(
-                scope_cmd + ["is-active", svc],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.stdout.strip() == "active":
-                print(f"✓ {scope_label} service restarted")
-                return
-        except Exception:
-            pass
-        print(
-            f"⚠ {scope_label} service did not become active within 60s.\n"
-            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
-            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        # The gateway exits with code 75 for a planned service restart.
+        # systemd can sit in the RestartSec window or even wedge itself into a
+        # failed/rate-limited state if the operator asks for another restart in
+        # the middle of that handoff. Clear any stale failed state and kick the
+        # unit immediately so `hermes gateway restart` behaves idempotently.
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        _wait_for_systemd_service_restart(system=system, previous_pid=pid)
        return
+
+    if _recover_pending_systemd_restart(system=system, previous_pid=pid):
+        return
+
+    _run_systemctl(
+        ["reset-failed", get_service_name()],
+        system=system,
+        check=False,
+        timeout=30,
+    )
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")



-def systemd_status(deep: bool = False, system: bool = False):
+def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
    system = _select_systemd_scope(system)
    unit_path = get_systemd_unit_path(system=system)
    scope_flag = " --system" if system else ""
@@ -1590,8 +1720,12 @@ def systemd_status(deep: bool = False, system: bool = False):
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

+    status_cmd = ["status", get_service_name(), "--no-pager"]
+    if full:
+        status_cmd.append("-l")
+
    _run_systemctl(
-        ["status", get_service_name(), "--no-pager"],
+        status_cmd,
        system=system,
        capture_output=False,
        timeout=10,
@@ -1624,6 +1758,19 @@ def systemd_status(deep: bool = False, system: bool = False):
        for line in runtime_lines:
            print(f"  {line}")

+    unit_props = _read_systemd_unit_properties(system=system)
+    active_state = unit_props.get("ActiveState", "")
+    sub_state = unit_props.get("SubState", "")
+    exec_main_status = unit_props.get("ExecMainStatus", "")
+    result_code = unit_props.get("Result", "")
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("  ⏳ Restart pending: systemd is waiting to relaunch the gateway")
+    elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
+        print("  ⚠ Planned restart is stuck in systemd failed state (exit 75)")
+        print(f"  Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
+    elif active_state == "failed" and result_code:
+        print(f"  ⚠ Systemd unit result: {result_code}")
+
    if system:
        print("✓ System service starts at boot without requiring systemd linger")
    elif deep:
@@ -1639,7 +1786,10 @@ def systemd_status(deep: bool = False, system: bool = False):
    if deep:
        print()
        print("Recent logs:")
-        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
+        log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
+        if full:
+            log_cmd.append("-l")
+        subprocess.run(log_cmd, timeout=10)


 # =============================================================================
@@ -3762,12 +3912,13 @@ def gateway_command(args):
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
+        full = getattr(args, 'full', False)
        system = getattr(args, 'system', False)
        snapshot = get_gateway_runtime_snapshot(system=system)
        
        # Check for service first
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            systemd_status(deep, system=system)
+            systemd_status(deep, system=system, full=full)
            _print_gateway_process_mismatch(snapshot)
        elif is_macos() and get_launchd_plist_path().exists():
            launchd_status(deep)
@@ -6888,6 +6888,12 @@ For more help on a command:
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
+    gateway_status.add_argument(
+        "-l",
+        "--full",
+        action="store_true",
+        help="Show full, untruncated service/log output where supported",
+    )
    gateway_status.add_argument(
        "--system",
        action="store_true",
@@ -42,7 +42,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openrouter/elephant-alpha",       "free"),
    ("openai/gpt-5.4",                  ""),
    ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2-pro",               ""),
+    ("xiaomi/mimo-v2.5-pro",             ""),
+    ("xiaomi/mimo-v2.5",                 ""),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -108,7 +109,8 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2-pro",
+        "xiaomi/mimo-v2.5-pro",
+        "xiaomi/mimo-v2.5",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -283,6 +283,7 @@ class PluginContext:
        name: str,
        handler: Callable,
        description: str = "",
+        args_hint: str = "",
    ) -> None:
        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.

@@ -293,6 +294,13 @@ class PluginContext:
        terminal commands), this registers in-session slash commands that users
        invoke during a conversation.

+        ``args_hint`` is an optional short string (e.g. ``"<file>"`` or
+        ``"dias:7 formato:json"``) used by gateway adapters to surface the
+        command with an argument field — for example Discord's native slash
+        command picker. Plugin commands without ``args_hint`` register as
+        parameterless in Discord and still accept trailing text when invoked
+        as free-form chat.
+
        Names conflicting with built-in commands are rejected with a warning.
        """
        clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -320,6 +328,7 @@ class PluginContext:
            "handler": handler,
            "description": description or "Plugin command",
            "plugin": self.manifest.name,
+            "args_hint": (args_hint or "").strip(),
        }
        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)

@@ -30,6 +30,14 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      prompt: "#FFF8DC"                  # Prompt text color
      input_rule: "#CD7F32"              # Input area horizontal rule
      response_border: "#FFD700"         # Response box border (ANSI)
+      status_bar_bg: "#1a1a2e"           # Status bar background
+      status_bar_text: "#C0C0C0"         # Status bar default text
+      status_bar_strong: "#FFD700"       # Status bar highlighted text
+      status_bar_dim: "#8B8682"          # Status bar separators/muted text
+      status_bar_good: "#8FBC8F"         # Healthy context usage
+      status_bar_warn: "#FFD700"         # Warning context usage
+      status_bar_bad: "#FF8C00"          # High context usage
+      status_bar_critical: "#FF6B6B"     # Critical context usage
      session_label: "#DAA520"           # Session label color
      session_border: "#8B8682"          # Session ID dim color
      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
@@ -170,6 +178,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF8DC",
            "input_rule": "#CD7F32",
            "response_border": "#FFD700",
+            "status_bar_bg": "#1a1a2e",
            "session_label": "#DAA520",
            "session_border": "#8B8682",
        },
@@ -203,6 +212,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F1E6CF",
            "input_rule": "#9F1C1C",
            "response_border": "#C7A96B",
+            "status_bar_bg": "#2A1212",
+            "status_bar_text": "#F1E6CF",
+            "status_bar_strong": "#C7A96B",
+            "status_bar_dim": "#6E584B",
+            "status_bar_good": "#7BC96F",
+            "status_bar_warn": "#C7A96B",
+            "status_bar_bad": "#DD4A3A",
+            "status_bar_critical": "#EF5350",
            "session_label": "#C7A96B",
            "session_border": "#6E584B",
        },
@@ -267,6 +284,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#444444",
            "response_border": "#aaaaaa",
+            "status_bar_bg": "#1F1F1F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#E6EDF3",
+            "status_bar_dim": "#777777",
+            "status_bar_good": "#B5B5B5",
+            "status_bar_warn": "#AAAAAA",
+            "status_bar_bad": "#D0D0D0",
+            "status_bar_critical": "#F0F0F0",
            "session_label": "#888888",
            "session_border": "#555555",
        },
@@ -298,6 +323,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#4169e1",
            "response_border": "#7eb8f6",
+            "status_bar_bg": "#151C2F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#7EB8F6",
+            "status_bar_dim": "#4B5563",
+            "status_bar_good": "#63D0A6",
+            "status_bar_warn": "#E6A855",
+            "status_bar_bad": "#F7A072",
+            "status_bar_critical": "#FF7A7A",
            "session_label": "#7eb8f6",
            "session_border": "#4b5563",
        },
@@ -403,6 +436,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#EAF7FF",
            "input_rule": "#2A6FB9",
            "response_border": "#5DB8F5",
+            "status_bar_bg": "#0F2440",
+            "status_bar_text": "#EAF7FF",
+            "status_bar_strong": "#A9DFFF",
+            "status_bar_dim": "#496884",
+            "status_bar_good": "#6ED7B0",
+            "status_bar_warn": "#5DB8F5",
+            "status_bar_bad": "#2A6FB9",
+            "status_bar_critical": "#D94F4F",
            "session_label": "#A9DFFF",
            "session_border": "#496884",
        },
@@ -467,6 +508,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F5F5F5",
            "input_rule": "#656565",
            "response_border": "#B7B7B7",
+            "status_bar_bg": "#202020",
+            "status_bar_text": "#D3D3D3",
+            "status_bar_strong": "#F5F5F5",
+            "status_bar_dim": "#656565",
+            "status_bar_good": "#B7B7B7",
+            "status_bar_warn": "#D3D3D3",
+            "status_bar_bad": "#E7E7E7",
+            "status_bar_critical": "#F5F5F5",
            "session_label": "#919191",
            "session_border": "#656565",
        },
@@ -532,6 +581,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF0D4",
            "input_rule": "#C75B1D",
            "response_border": "#F29C38",
+            "status_bar_bg": "#2B160E",
+            "status_bar_text": "#FFF0D4",
+            "status_bar_strong": "#FFD39A",
+            "status_bar_dim": "#6C4724",
+            "status_bar_good": "#6BCB77",
+            "status_bar_warn": "#F29C38",
+            "status_bar_bad": "#E2832B",
+            "status_bar_critical": "#EF5350",
            "session_label": "#FFD39A",
            "session_border": "#6C4724",
        },
@@ -770,6 +827,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
    warn = skin.get_color("ui_warn", "#FF8C00")
    error = skin.get_color("ui_error", "#FF6B6B")
    status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
+    status_text = skin.get_color("status_bar_text", text)
+    status_strong = skin.get_color("status_bar_strong", title)
+    status_dim = skin.get_color("status_bar_dim", dim)
+    status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
+    status_warn = skin.get_color("status_bar_warn", warn)
+    status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
+    status_critical = skin.get_color("status_bar_critical", error)
    voice_bg = skin.get_color("voice_status_bg", status_bg)
    menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
    menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@@ -782,13 +846,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "prompt": prompt,
        "prompt-working": f"{dim} italic",
        "hint": f"{dim} italic",
-        "status-bar": f"bg:{status_bg} {text}",
-        "status-bar-strong": f"bg:{status_bg} {title} bold",
-        "status-bar-dim": f"bg:{status_bg} {dim}",
-        "status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
-        "status-bar-warn": f"bg:{status_bg} {warn} bold",
-        "status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
-        "status-bar-critical": f"bg:{status_bg} {error} bold",
+        "status-bar": f"bg:{status_bg} {status_text}",
+        "status-bar-strong": f"bg:{status_bg} {status_strong} bold",
+        "status-bar-dim": f"bg:{status_bg} {status_dim}",
+        "status-bar-good": f"bg:{status_bg} {status_good} bold",
+        "status-bar-warn": f"bg:{status_bg} {status_warn} bold",
+        "status-bar-bad": f"bg:{status_bg} {status_bad} bold",
+        "status-bar-critical": f"bg:{status_bg} {status_critical} bold",
        "input-rule": input_rule,
        "image-badge": f"{label} bold",
        "completion-menu": f"bg:{menu_bg} {text}",
@@ -108,9 +108,15 @@ def _run_async(coro):
    if loop and loop.is_running():
        # Inside an async context (gateway, RL env) — run in a fresh thread.
        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, coro)
+        pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        future = pool.submit(asyncio.run, coro)
+        try:
            return future.result(timeout=300)
+        except concurrent.futures.TimeoutError:
+            future.cancel()
+            raise
+        finally:
+            pool.shutdown(wait=False, cancel_futures=True)

    # If we're on a worker thread (e.g., parallel tool execution in
    # delegate_task), use a per-thread persistent loop.  This avoids
@@ -16,8 +16,8 @@
  },
  "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
  "dependencies": {
-    "agent-browser": "^0.13.0",
-    "@askjo/camofox-browser": "^1.5.2"
+    "@askjo/camofox-browser": "^1.5.2",
+    "agent-browser": "^0.26.0"
  },
  "overrides": {
    "lodash": "4.18.1"
@@ -5826,16 +5826,6 @@ class AIAgent:
                            result["response"] = _call_chat_completions()
                        return  # success
                    except Exception as e:
-                        if deltas_were_sent["yes"]:
-                            # Streaming failed AFTER some tokens were already
-                            # delivered.  Don't retry or fall back — partial
-                            # content already reached the user.
-                            logger.warning(
-                                "Streaming failed after partial delivery, not retrying: %s", e
-                            )
-                            result["error"] = e
-                            return
-
                        _is_timeout = isinstance(
                            e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
                        )
@@ -5843,6 +5833,123 @@ class AIAgent:
                            e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
                        )

+                        # If the stream died AFTER some tokens were delivered:
+                        # normally we don't retry (the user already saw text,
+                        # retrying would duplicate it).  BUT: if a tool call
+                        # was in-flight when the stream died, silently aborting
+                        # discards the tool call entirely.  In that case we
+                        # prefer to retry — the user sees a brief
+                        # "reconnecting" marker + duplicated preamble text,
+                        # which is strictly better than a failed action with
+                        # a "retry manually" message.  Limit this to transient
+                        # connection errors (Clawdbot-style narrow gate): no
+                        # tool has executed yet within this API call, so
+                        # silent retry is safe wrt side-effects.
+                        if deltas_were_sent["yes"]:
+                            _partial_tool_in_flight = bool(
+                                result.get("partial_tool_names")
+                            )
+                            _is_sse_conn_err_preview = False
+                            if not _is_timeout and not _is_conn_err:
+                                from openai import APIError as _APIError
+                                if isinstance(e, _APIError) and not getattr(e, "status_code", None):
+                                    _err_lower_preview = str(e).lower()
+                                    _SSE_PREVIEW_PHRASES = (
+                                        "connection lost",
+                                        "connection reset",
+                                        "connection closed",
+                                        "connection terminated",
+                                        "network error",
+                                        "network connection",
+                                        "terminated",
+                                        "peer closed",
+                                        "broken pipe",
+                                        "upstream connect error",
+                                    )
+                                    _is_sse_conn_err_preview = any(
+                                        phrase in _err_lower_preview
+                                        for phrase in _SSE_PREVIEW_PHRASES
+                                    )
+                            _is_transient = (
+                                _is_timeout or _is_conn_err or _is_sse_conn_err_preview
+                            )
+                            _can_silent_retry = (
+                                _partial_tool_in_flight
+                                and _is_transient
+                                and _stream_attempt < _max_stream_retries
+                            )
+                            if not _can_silent_retry:
+                                # Either no tool call was in-flight (so the
+                                # turn was a pure text response — current
+                                # stub-with-recovered-text behaviour is
+                                # correct), or retries are exhausted, or the
+                                # error isn't transient.  Fall through to the
+                                # stub path.
+                                logger.warning(
+                                    "Streaming failed after partial delivery, not retrying: %s", e
+                                )
+                                result["error"] = e
+                                return
+                            # Tool call was in-flight AND error is transient:
+                            # retry silently.  Clear per-attempt state so the
+                            # next stream starts clean.  Fire a "reconnecting"
+                            # marker so the user sees why the preamble is
+                            # about to be re-streamed.
+                            logger.info(
+                                "Streaming attempt %s/%s died mid tool-call "
+                                "(%s: %s) after user-visible text; retrying "
+                                "silently to avoid losing the action. "
+                                "Preamble will re-stream.",
+                                _stream_attempt + 1,
+                                _max_stream_retries + 1,
+                                type(e).__name__,
+                                e,
+                            )
+                            try:
+                                self._fire_stream_delta(
+                                    "\n\n⚠ Connection dropped mid tool-call; "
+                                    "reconnecting…\n\n"
+                                )
+                            except Exception:
+                                pass
+                            # Reset the streamed-text buffer so the retry's
+                            # fresh preamble doesn't get double-recorded in
+                            # _current_streamed_assistant_text (which would
+                            # pollute the interim-visible-text comparison).
+                            try:
+                                self._reset_stream_delivery_tracking()
+                            except Exception:
+                                pass
+                            # Reset in-memory accumulators so the next
+                            # attempt's chunks don't concat onto the dead
+                            # stream's partial JSON.
+                            result["partial_tool_names"] = []
+                            deltas_were_sent["yes"] = False
+                            first_delta_fired["done"] = False
+                            self._emit_status(
+                                f"⚠️ Connection dropped mid tool-call "
+                                f"({type(e).__name__}). Reconnecting… "
+                                f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
+                            )
+                            self._touch_activity(
+                                f"stream retry {_stream_attempt + 2}/{_max_stream_retries + 1} "
+                                f"mid tool-call after {type(e).__name__}"
+                            )
+                            stale = request_client_holder.get("client")
+                            if stale is not None:
+                                self._close_request_openai_client(
+                                    stale, reason="stream_mid_tool_retry_cleanup"
+                                )
+                                request_client_holder["client"] = None
+                            try:
+                                self._replace_primary_openai_client(
+                                    reason="stream_mid_tool_retry_pool_cleanup"
+                                )
+                            except Exception:
+                                pass
+                            self._emit_status("🔄 Reconnected — resuming…")
+                            continue
+
                        # SSE error events from proxies (e.g. OpenRouter sends
                        # {"error":{"message":"Network connection lost."}}) are
                        # raised as APIError by the OpenAI SDK.  These are
@@ -6153,6 +6260,10 @@ class AIAgent:
            # falling through to OpenRouter defaults.
            fb_base_url_hint = (fb.get("base_url") or "").strip() or None
            fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+            if not fb_api_key_hint:
+                fb_key_env = (fb.get("key_env") or "").strip()
+                if fb_key_env:
+                    fb_api_key_hint = os.getenv(fb_key_env, "").strip() or None
            # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
            # when no explicit key is in the fallback config. Host match
            # (not substring) — see GHSA-76xc-57q6-vm5m.
@@ -58,6 +58,7 @@ AUTHOR_MAP = {
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
+    "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
    "valdi.jorge@gmail.com": "jvcl",
    "francip@gmail.com": "francip",
    "omni@comelse.com": "omnissiah-comelse",
@@ -181,6 +182,7 @@ AUTHOR_MAP = {
    "adavyasharma@gmail.com": "adavyas",
    "acaayush1111@gmail.com": "aayushchaudhary",
    "jason@outland.art": "jasonoutland",
+    "73175452+Magaav@users.noreply.github.com": "Magaav",
    "mrflu1918@proton.me": "SPANISHFLU",
    "morganemoss@gmai.com": "mormio",
    "kopjop926@gmail.com": "cesareth",
@@ -285,6 +287,7 @@ AUTHOR_MAP = {
    "srhtsrht17@gmail.com": "Sertug17",
    "stephenschoettler@gmail.com": "stephenschoettler",
    "tanishq231003@gmail.com": "yyovil",
+    "taosiyuan163@153.com": "taosiyuan163",
    "tesseracttars@gmail.com": "tesseracttars-creator",
    "tianliangjay@gmail.com": "xingkongliang",
    "tranquil_flow@protonmail.com": "Tranquil-Flow",
@@ -341,6 +344,7 @@ AUTHOR_MAP = {
    "shalompmc0505@naver.com": "pinion05",
    "105142614+VTRiot@users.noreply.github.com": "VTRiot",
    "vivien000812@gmail.com": "iamagenius00",
+    "89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
 }


@@ -253,6 +253,35 @@ class TestSummaryPrefixNormalization:


 class TestCompressWithClient:
+    def test_system_content_list_gets_compression_note_without_crashing(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [
+            {"role": "system", "content": [{"type": "text", "text": "system prompt"}]},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        assert isinstance(result[0]["content"], list)
+        assert any(
+            isinstance(block, dict)
+            and "compacted into a handoff summary" in block.get("text", "")
+            for block in result[0]["content"]
+        )
+
    def test_summarization_path(self):
        mock_client = MagicMock()
        mock_response = MagicMock()
@@ -460,6 +489,41 @@ class TestCompressWithClient:
        assert len(first_tail) == 1
        assert "summary text" in first_tail[0]["content"]

+    def test_double_collision_merges_summary_into_list_tail_content(self):
+        """Structured tail content should accept a merged summary without TypeError."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=3)
+
+        msgs = [
+            {"role": "system", "content": "system prompt"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "user", "content": [{"type": "text", "text": "msg 6"}]},
+            {"role": "assistant", "content": "msg 7"},
+            {"role": "user", "content": "msg 8"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        merged_tail = next(
+            m for m in result
+            if m.get("role") == "user" and isinstance(m.get("content"), list)
+        )
+        assert isinstance(merged_tail["content"], list)
+        assert "summary text" in merged_tail["content"][0]["text"]
+        assert any(
+            isinstance(block, dict) and block.get("text") == "msg 6"
+            for block in merged_tail["content"]
+        )
+
    def test_double_collision_user_head_assistant_tail(self):
        """Reverse double collision: head ends with 'user', tail starts with 'assistant'.
        summary='assistant' collides with tail, 'user' collides with head → merge."""
@@ -949,3 +949,94 @@ class TestAdversarialEdgeCases:
        e = MockAPIError("server error", status_code=500, body={"message": None})
        result = classify_api_error(e)
        assert result is not None
+
+
+# ── Test: SSL/TLS transient errors ─────────────────────────────────────
+
+class TestSSLTransientPatterns:
+    """SSL/TLS alerts mid-stream should retry as timeout, not unknown, and
+    should NOT trigger context compression even on a large session.
+
+    Motivation: OpenSSL 3.x changed TLS alert error code format
+    (`SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+    breaking string-exact matching in downstream retry logic.  We match
+    stable substrings instead.
+    """
+
+    def test_bad_record_mac_classifies_as_timeout(self):
+        """OpenSSL 3.x mid-stream bad record mac alert."""
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2580)")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_openssl_3x_format_classifies_as_timeout(self):
+        """New format `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC` still matches
+        because we key on both space- and underscore-separated forms of
+        the stable `bad_record_mac` token."""
+        e = Exception("ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC during streaming")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_tls_alert_internal_error_classifies_as_timeout(self):
+        e = Exception("[SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_ssl_handshake_failure_classifies_as_timeout(self):
+        e = Exception("ssl handshake failure during mid-stream")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_prefix_classifies_as_timeout(self):
+        """Python's generic '[SSL: XYZ]' prefix from the ssl module."""
+        e = Exception("[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_alert_on_large_session_does_not_compress(self):
+        """Critical: SSL alerts on big contexts must NOT trigger context
+        compression — compression is expensive and won't fix a transport
+        hiccup.  This is why _SSL_TRANSIENT_PATTERNS is separate from
+        _SERVER_DISCONNECT_PATTERNS.
+        """
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,      # 90% of a 200k-context window
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.timeout
+        assert result.should_compress is False
+
+    def test_plain_disconnect_on_large_session_still_compresses(self):
+        """Regression guard: the context-overflow-via-disconnect path
+        (non-SSL disconnects on large sessions) must still trigger
+        compression.  Only SSL-specific disconnects skip it.
+        """
+        e = Exception("Server disconnected without sending a response")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.context_overflow
+        assert result.should_compress is True
+
+    def test_real_ssl_error_type_classifies_as_timeout(self):
+        """Real ssl.SSLError instance — the type name alone (not message)
+        should route to the transport bucket."""
+        import ssl
+        e = ssl.SSLError("arbitrary ssl error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
@@ -106,3 +106,25 @@ class TestIsLocalEndpoint:
    ])
    def test_remote_endpoints(self, url):
        assert is_local_endpoint(url) is False
+
+    @pytest.mark.parametrize("url", [
+        "http://100.64.0.0:11434",            # lower bound of CGNAT block
+        "http://100.64.0.1:11434/v1",         # lower bound +1
+        "http://100.77.243.5:11434",          # representative Tailscale host
+        "https://100.100.100.100:443",        # Tailscale MagicDNS anchor
+        "https://100.127.255.254:443",        # upper bound -1
+        "http://100.127.255.255:11434",       # upper bound of CGNAT block
+    ])
+    def test_tailscale_cgnat_is_local(self, url):
+        """Tailscale 100.64.0.0/10 should be treated as local for timeout bumps."""
+        assert is_local_endpoint(url) is True
+
+    @pytest.mark.parametrize("url", [
+        "http://100.63.255.255:11434",        # just below CGNAT block
+        "http://100.128.0.1:11434",           # just above CGNAT block
+        "http://100.200.0.1:11434",           # well outside CGNAT
+        "http://99.64.0.1:11434",             # first octet wrong
+    ])
+    def test_near_but_not_cgnat_is_remote(self, url):
+        """Hosts adjacent to but outside 100.64.0.0/10 must not match."""
+        assert is_local_endpoint(url) is False
@@ -114,6 +114,14 @@ class TestAnthropicTransport:
        r = SimpleNamespace(content=[])
        assert transport.validate_response(r) is False

+    def test_validate_response_empty_content_with_end_turn_is_valid(self, transport):
+        r = SimpleNamespace(content=[], stop_reason="end_turn")
+        assert transport.validate_response(r) is True
+
+    def test_validate_response_empty_content_with_tool_use_is_invalid(self, transport):
+        r = SimpleNamespace(content=[], stop_reason="tool_use")
+        assert transport.validate_response(r) is False
+
    def test_validate_response_valid(self, transport):
        r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
        assert transport.validate_response(r) is True
@@ -0,0 +1,60 @@
+"""Tests for the gateway /debug command."""
+
+from unittest.mock import patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+def _make_event(text="/debug", platform=Platform.TELEGRAM,
+                user_id="12345", chat_id="67890"):
+    source = SessionSource(
+        platform=platform,
+        user_id=user_id,
+        chat_id=chat_id,
+        user_name="testuser",
+    )
+    return MessageEvent(text=text, source=source)
+
+
+def _make_runner():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig()
+    runner.adapters = {}
+    return runner
+
+
+class TestHandleDebugCommand:
+    @pytest.mark.asyncio
+    async def test_debug_sweeps_expired_pastes_before_upload(self):
+        runner = _make_runner()
+        event = _make_event()
+
+        with patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)) as mock_sweep, \
+             patch("hermes_cli.debug._capture_dump", return_value="dump"), \
+             patch("hermes_cli.debug.collect_debug_report", return_value="report"), \
+             patch("hermes_cli.debug.upload_to_pastebin", return_value="https://paste.rs/report"), \
+             patch("hermes_cli.debug._schedule_auto_delete"):
+            result = await runner._handle_debug_command(event)
+
+        mock_sweep.assert_called_once()
+        assert "https://paste.rs/report" in result
+
+    @pytest.mark.asyncio
+    async def test_debug_survives_sweep_failure(self):
+        runner = _make_runner()
+        event = _make_event()
+
+        with patch("hermes_cli.debug._sweep_expired_pastes", side_effect=RuntimeError("offline")), \
+             patch("hermes_cli.debug._capture_dump", return_value="dump"), \
+             patch("hermes_cli.debug.collect_debug_report", return_value="report"), \
+             patch("hermes_cli.debug.upload_to_pastebin", return_value="https://paste.rs/report"), \
+             patch("hermes_cli.debug._schedule_auto_delete"):
+            result = await runner._handle_debug_command(event)
+
+        assert "https://paste.rs/report" in result
@@ -199,6 +199,89 @@ async def test_auto_registered_command_with_args(adapter):
    )


+@pytest.mark.asyncio
+async def test_auto_registers_plugin_commands_for_discord(adapter):
+    """Plugin slash commands should appear as native Discord app commands."""
+    adapter._run_simple_slash = AsyncMock()
+
+    with patch(
+        "hermes_cli.plugins.get_plugin_commands",
+        return_value={
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics dashboard",
+                "args_hint": "dias:7 formato:json",
+                "plugin": "metrics-plugin",
+            }
+        },
+    ):
+        adapter._register_slash_commands()
+
+    tree_names = set(adapter._client.tree.commands.keys())
+    assert "metricas" in tree_names
+
+    metricas_cmd = adapter._client.tree.commands["metricas"]
+    interaction = SimpleNamespace()
+    await metricas_cmd.callback(interaction, args="dias:7 formato:json")
+    adapter._run_simple_slash.assert_awaited_once_with(
+        interaction, "/metricas dias:7 formato:json"
+    )
+
+
+@pytest.mark.asyncio
+async def test_auto_registered_plugin_command_without_args_hint(adapter):
+    """Plugin commands without args_hint should register as parameterless."""
+    adapter._run_simple_slash = AsyncMock()
+
+    with patch(
+        "hermes_cli.plugins.get_plugin_commands",
+        return_value={
+            "ping": {
+                "handler": lambda _a: "pong",
+                "description": "Ping the plugin",
+                "args_hint": "",
+                "plugin": "ping-plugin",
+            }
+        },
+    ):
+        adapter._register_slash_commands()
+
+    assert "ping" in adapter._client.tree.commands
+    ping_cmd = adapter._client.tree.commands["ping"]
+    interaction = SimpleNamespace()
+    await ping_cmd.callback(interaction)
+    adapter._run_simple_slash.assert_awaited_once_with(interaction, "/ping")
+
+
+@pytest.mark.asyncio
+async def test_plugin_command_name_conflict_skipped(adapter):
+    """A plugin command that collides with a built-in must not override it."""
+    adapter._run_simple_slash = AsyncMock()
+
+    with patch(
+        "hermes_cli.plugins.get_plugin_commands",
+        return_value={
+            "status": {
+                "handler": lambda _a: "plugin-status",
+                "description": "Plugin status",
+                "args_hint": "",
+                "plugin": "shadow-plugin",
+            }
+        },
+    ):
+        adapter._register_slash_commands()
+
+    # Built-ins are registered via @tree.command as plain functions. A
+    # plugin-registered override would install a _FakeCommand instance
+    # (has .callback) via tree.add_command. If the conflict-skip logic
+    # fires, the slot remains a bare function.
+    status_entry = adapter._client.tree.commands["status"]
+    assert callable(status_entry) and not hasattr(status_entry, "callback"), (
+        "plugin registration overrode the built-in /status command — "
+        "the already_registered skip must prevent this"
+    )
+
+
 # ------------------------------------------------------------------
 # _handle_thread_create_slash — success, session dispatch, failure
 # ------------------------------------------------------------------
@@ -220,3 +220,99 @@ class TestEmit:

        await reg.emit("agent:start")  # no context arg
        assert captured[0] == {}
+
+
+class TestEmitCollect:
+    """Tests for emit_collect() — returns handler return values for decision-style hooks."""
+
+    @pytest.mark.asyncio
+    async def test_collects_sync_return_values(self):
+        reg = HookRegistry()
+        reg._handlers["command:status"] = [
+            lambda _e, _c: {"decision": "allow"},
+            lambda _e, _c: {"decision": "deny", "message": "nope"},
+        ]
+
+        results = await reg.emit_collect("command:status", {})
+
+        assert results == [
+            {"decision": "allow"},
+            {"decision": "deny", "message": "nope"},
+        ]
+
+    @pytest.mark.asyncio
+    async def test_collects_async_return_values(self):
+        reg = HookRegistry()
+
+        async def _async_handler(_event_type, _ctx):
+            return {"decision": "handled", "message": "done"}
+
+        reg._handlers["command:ping"] = [_async_handler]
+
+        results = await reg.emit_collect("command:ping", {})
+
+        assert results == [{"decision": "handled", "message": "done"}]
+
+    @pytest.mark.asyncio
+    async def test_drops_none_return_values(self):
+        reg = HookRegistry()
+        reg._handlers["command:x"] = [
+            lambda _e, _c: None,  # fire-and-forget, returns nothing
+            lambda _e, _c: {"decision": "deny"},
+            lambda _e, _c: None,
+        ]
+
+        results = await reg.emit_collect("command:x", {})
+
+        assert results == [{"decision": "deny"}]
+
+    @pytest.mark.asyncio
+    async def test_handler_exception_does_not_abort_chain(self):
+        reg = HookRegistry()
+
+        def _raises(_e, _c):
+            raise ValueError("boom")
+
+        reg._handlers["command:x"] = [
+            _raises,
+            lambda _e, _c: {"decision": "allow"},
+        ]
+
+        results = await reg.emit_collect("command:x", {})
+
+        # First handler's exception is swallowed; second handler's value still collected.
+        assert results == [{"decision": "allow"}]
+
+    @pytest.mark.asyncio
+    async def test_wildcard_match_also_collected(self):
+        reg = HookRegistry()
+        reg._handlers["command:*"] = [lambda _e, _c: {"decision": "allow"}]
+        reg._handlers["command:reset"] = [lambda _e, _c: {"decision": "deny"}]
+
+        results = await reg.emit_collect("command:reset", {})
+
+        # Exact match fires first, then wildcard.
+        assert results == [{"decision": "deny"}, {"decision": "allow"}]
+
+    @pytest.mark.asyncio
+    async def test_no_handlers_returns_empty_list(self):
+        reg = HookRegistry()
+
+        results = await reg.emit_collect("unknown:event", {})
+
+        assert results == []
+
+    @pytest.mark.asyncio
+    async def test_default_context(self):
+        reg = HookRegistry()
+        captured = []
+
+        def _handler(event_type, context):
+            captured.append((event_type, context))
+            return None
+
+        reg._handlers["agent:start"] = [_handler]
+
+        await reg.emit_collect("agent:start")  # no context arg
+
+        assert captured == [("agent:start", {})]
@@ -65,7 +65,11 @@ class TestGatewayPidState:
        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)

-        assert status.get_running_pid() == os.getpid()
+        assert status.acquire_gateway_runtime_lock() is True
+        try:
+            assert status.get_running_pid() == os.getpid()
+        finally:
+            status.release_gateway_runtime_lock()

    def test_get_running_pid_accepts_script_style_gateway_cmdline(self, tmp_path, monkeypatch):
        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -85,7 +89,11 @@ class TestGatewayPidState:
            lambda pid: "/venv/bin/python /repo/hermes_cli/main.py gateway run --replace",
        )

-        assert status.get_running_pid() == os.getpid()
+        assert status.acquire_gateway_runtime_lock() is True
+        try:
+            assert status.get_running_pid() == os.getpid()
+        finally:
+            status.release_gateway_runtime_lock()

    def test_get_running_pid_accepts_explicit_pid_path_without_cleanup(self, tmp_path, monkeypatch):
        other_home = tmp_path / "profile-home"
@@ -102,9 +110,116 @@ class TestGatewayPidState:
        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)

+        lock_path = other_home / "gateway.lock"
+        lock_path.write_text(json.dumps({
+            "pid": os.getpid(),
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+        monkeypatch.setattr(status, "is_gateway_runtime_lock_active", lambda lock_path=None: True)
+
        assert status.get_running_pid(pid_path, cleanup_stale=False) == os.getpid()
        assert pid_path.exists()

+    def test_runtime_lock_claims_and_releases_liveness(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        assert status.is_gateway_runtime_lock_active() is False
+        assert status.acquire_gateway_runtime_lock() is True
+        assert status.is_gateway_runtime_lock_active() is True
+
+        status.release_gateway_runtime_lock()
+
+        assert status.is_gateway_runtime_lock_active() is False
+
+    def test_get_running_pid_treats_pid_file_as_stale_without_runtime_lock(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        pid_path.write_text(json.dumps({
+            "pid": os.getpid(),
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+
+        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
+        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
+
+        assert status.get_running_pid() is None
+        assert not pid_path.exists()
+
+    def test_get_running_pid_cleans_stale_metadata_from_dead_foreign_pid(self, tmp_path, monkeypatch):
+        """Stale PID file from a *different* PID (crashed process) must still be cleaned.
+
+        Regression for: ``remove_pid_file()`` defensively refuses to delete a
+        PID file whose pid != ``os.getpid()`` to protect ``--replace``
+        handoffs.  Stale-cleanup must not go through that path or real
+        crashed-process PID files never get removed.
+        """
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        lock_path = tmp_path / "gateway.lock"
+
+        # PID that is guaranteed not alive and not our own.
+        dead_foreign_pid = 999999
+        assert dead_foreign_pid != os.getpid()
+
+        pid_path.write_text(json.dumps({
+            "pid": dead_foreign_pid,
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+        lock_path.write_text(json.dumps({
+            "pid": dead_foreign_pid,
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+
+        # No live lock holder → get_running_pid should clean both files.
+        assert status.get_running_pid() is None
+        assert not pid_path.exists()
+        assert not lock_path.exists()
+
+    def test_get_running_pid_falls_back_to_live_lock_record(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        pid_path.write_text(json.dumps({
+            "pid": 99999,
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
+        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
+        monkeypatch.setattr(
+            status,
+            "_build_pid_record",
+            lambda: {
+                "pid": os.getpid(),
+                "kind": "hermes-gateway",
+                "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+                "start_time": 123,
+            },
+        )
+        assert status.acquire_gateway_runtime_lock() is True
+
+        def fake_kill(pid, sig):
+            if pid == 99999:
+                raise ProcessLookupError
+            return None
+
+        monkeypatch.setattr(status.os, "kill", fake_kill)
+
+        try:
+            assert status.get_running_pid() == os.getpid()
+        finally:
+            status.release_gateway_runtime_lock()
+

 class TestGatewayRuntimeStatus:
    def test_write_runtime_status_overwrites_stale_pid_on_restart(self, tmp_path, monkeypatch):
@@ -41,7 +41,11 @@ def _make_runner():
    adapter.send = AsyncMock()
    runner.adapters = {Platform.TELEGRAM: adapter}
    runner._voice_mode = {}
-    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.hooks = SimpleNamespace(
+        emit=AsyncMock(),
+        emit_collect=AsyncMock(return_value=[]),
+        loaded_hooks=False,
+    )

    session_entry = SessionEntry(
        session_key=build_session_key(_make_source()),
@@ -164,3 +168,206 @@ async def test_underscored_alias_for_hyphenated_builtin_not_flagged(monkeypatch)
    # Whatever /reload_mcp returns, it must not be the unknown-command guard.
    if result is not None:
        assert "Unknown command" not in result
+
+
+# ------------------------------------------------------------------
+# command:<name> decision hook — deny / handled / rewrite
+# ------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_command_hook_can_deny_before_dispatch(monkeypatch):
+    """A handler returning {"decision": "deny"} blocks a slash command early."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError("denied slash command leaked to the agent")
+    )
+    runner._handle_status_command = AsyncMock(
+        side_effect=AssertionError("denied slash command reached its handler")
+    )
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "deny", "message": "Blocked by ACL"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "Blocked by ACL"
+    runner._run_agent.assert_not_called()
+    # The emit_collect call should use the canonical command name.
+    call_args = runner.hooks.emit_collect.await_args
+    assert call_args.args[0] == "command:status"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_deny_without_message_uses_default(monkeypatch):
+    """A deny decision with no message falls back to a generic blocked string."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(
+        side_effect=AssertionError("denied slash command reached its handler")
+    )
+    runner.hooks.emit_collect = AsyncMock(return_value=[{"decision": "deny"}])
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result is not None
+    assert "blocked" in result.lower()
+
+
+@pytest.mark.asyncio
+async def test_command_hook_can_mark_command_as_handled(monkeypatch):
+    """A handled decision short-circuits dispatch cleanly with a custom reply."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(
+        side_effect=AssertionError("handled slash command reached its handler")
+    )
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "handled", "message": "Already handled upstream"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "Already handled upstream"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_allow_decision_is_passthrough(monkeypatch):
+    """A handler returning {"decision": "allow"} must NOT prevent normal dispatch."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(return_value="status: ok")
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "allow"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "status: ok"
+    runner._handle_status_command.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_command_hook_non_dict_return_values_ignored(monkeypatch):
+    """Hook return values that aren't dicts must not break dispatch."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._handle_status_command = AsyncMock(return_value="status: ok")
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=["some string", 42, None, {}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "status: ok"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_fires_for_plugin_registered_command(monkeypatch):
+    """Plugin-registered slash commands should also trigger command:<name> hooks."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError("plugin command leaked to the agent")
+    )
+    runner.hooks.emit_collect = AsyncMock(
+        return_value=[{"decision": "handled", "message": "intercepted"}]
+    )
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+    # Stub plugin command lookup so is_gateway_known_command() recognizes /metricas.
+    from hermes_cli import plugins as _plugins_mod
+
+    monkeypatch.setattr(
+        _plugins_mod,
+        "get_plugin_commands",
+        lambda: {"metricas": {"description": "Metrics", "args_hint": "dias:7"}},
+    )
+
+    result = await runner._handle_message(_make_event("/metricas dias:7"))
+
+    assert result == "intercepted"
+    # Hook event name uses the plugin command as canonical.
+    call_args = runner.hooks.emit_collect.await_args
+    assert call_args.args[0] == "command:metricas"
+    # Args are passed through in both "args" and "raw_args" keys.
+    ctx = call_args.args[1]
+    assert ctx["raw_args"] == "dias:7"
+
+
+@pytest.mark.asyncio
+async def test_command_hook_rewrite_routes_to_plugin(monkeypatch):
+    """A rewrite decision should re-resolve the command and route to the new one."""
+    import gateway.run as gateway_run
+
+    runner = _make_runner()
+    runner._run_agent = AsyncMock(
+        side_effect=AssertionError("rewritten command leaked to the agent")
+    )
+
+    call_log = []
+
+    async def _emit_collect(event_type, ctx):
+        call_log.append(event_type)
+        if event_type == "command:status":
+            return [
+                {
+                    "decision": "rewrite",
+                    "command_name": "metricas",
+                    "raw_args": "dias:7",
+                }
+            ]
+        return []
+
+    runner.hooks.emit_collect = AsyncMock(side_effect=_emit_collect)
+
+    monkeypatch.setattr(
+        gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}
+    )
+    from hermes_cli import plugins as _plugins_mod
+
+    monkeypatch.setattr(
+        _plugins_mod,
+        "get_plugin_commands",
+        lambda: {"metricas": {"description": "Metrics", "args_hint": "dias:7"}},
+    )
+    monkeypatch.setattr(
+        _plugins_mod,
+        "get_plugin_command_handler",
+        lambda name: (lambda args: f"metrics {args}") if name == "metricas" else None,
+    )
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert result == "metrics dias:7"
+    # First emit_collect fires on the original command; after rewrite the
+    # dispatcher does NOT re-fire for the new command (one decision per turn).
+    assert call_log == ["command:status"]
@@ -1208,3 +1208,119 @@ class TestDiscordSkillCommandsByCategory:
        assert "axolotl" in names
        assert "vllm" in names
        assert len(uncategorized) == 0
+
+
+# ---------------------------------------------------------------------------
+# Plugin slash command integration
+# ---------------------------------------------------------------------------
+
+class TestPluginCommandEnumeration:
+    """Plugin commands registered via ctx.register_command() must be surfaced
+    by every gateway enumerator (Telegram menu, Slack subcommand map, etc.).
+    """
+
+    def _patch_plugin_commands(self, monkeypatch, commands):
+        """Monkeypatch hermes_cli.plugins.get_plugin_commands() to a fixed dict."""
+        from hermes_cli import plugins as _plugins_mod
+
+        monkeypatch.setattr(
+            _plugins_mod, "get_plugin_commands", lambda: dict(commands)
+        )
+
+    def test_plugin_command_appears_in_telegram_menu(self, monkeypatch):
+        """/metricas registered by a plugin must appear in Telegram BotCommand menu."""
+        self._patch_plugin_commands(monkeypatch, {
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics dashboard",
+                "args_hint": "dias:7",
+                "plugin": "metrics-plugin",
+            }
+        })
+        names = {name for name, _desc in telegram_bot_commands()}
+        assert "metricas" in names
+
+    def test_plugin_command_appears_in_slack_subcommand_map(self, monkeypatch):
+        """/hermes metricas must route through the Slack subcommand map."""
+        self._patch_plugin_commands(monkeypatch, {
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics",
+                "args_hint": "",
+                "plugin": "metrics-plugin",
+            }
+        })
+        mapping = slack_subcommand_map()
+        assert mapping.get("metricas") == "/metricas"
+
+    def test_plugin_command_does_not_shadow_builtin_in_slack(self, monkeypatch):
+        """If a plugin registers a name that collides with a built-in, the built-in mapping wins."""
+        self._patch_plugin_commands(monkeypatch, {
+            "status": {
+                "handler": lambda _a: "plugin-status",
+                "description": "Plugin status",
+                "args_hint": "",
+                "plugin": "shadow-plugin",
+            }
+        })
+        mapping = slack_subcommand_map()
+        # Built-in /status must still be present and not overwritten.
+        assert mapping.get("status") == "/status"
+
+    def test_plugin_command_with_hyphens_sanitized_for_telegram(self, monkeypatch):
+        """Plugin names containing hyphens must be underscore-normalized for Telegram."""
+        self._patch_plugin_commands(monkeypatch, {
+            "my-plugin-cmd": {
+                "handler": lambda _a: "ok",
+                "description": "desc",
+                "args_hint": "",
+                "plugin": "p",
+            }
+        })
+        names = {name for name, _desc in telegram_bot_commands()}
+        assert "my_plugin_cmd" in names
+        assert "my-plugin-cmd" not in names
+
+    def test_is_gateway_known_command_recognizes_plugin_commands(self, monkeypatch):
+        """is_gateway_known_command() must return True for plugin commands."""
+        from hermes_cli.commands import is_gateway_known_command
+
+        self._patch_plugin_commands(monkeypatch, {
+            "metricas": {
+                "handler": lambda _a: "ok",
+                "description": "Metrics",
+                "args_hint": "",
+                "plugin": "p",
+            }
+        })
+        assert is_gateway_known_command("metricas") is True
+        assert is_gateway_known_command("definitely-not-registered") is False
+
+    def test_is_gateway_known_command_still_recognizes_builtins(self, monkeypatch):
+        """Built-in commands must remain known even when plugin discovery fails."""
+        from hermes_cli import plugins as _plugins_mod
+        from hermes_cli.commands import is_gateway_known_command
+
+        def _boom():
+            raise RuntimeError("plugin system down")
+
+        monkeypatch.setattr(_plugins_mod, "get_plugin_commands", _boom)
+
+        assert is_gateway_known_command("status") is True
+        assert is_gateway_known_command(None) is False
+        assert is_gateway_known_command("") is False
+
+    def test_plugin_enumerator_handles_missing_plugin_manager(self, monkeypatch):
+        """Enumerators must never raise when plugin discovery raises."""
+        from hermes_cli import plugins as _plugins_mod
+
+        def _boom():
+            raise RuntimeError("plugin system down")
+
+        monkeypatch.setattr(_plugins_mod, "get_plugin_commands", _boom)
+
+        # Both calls should succeed and just return the built-in set.
+        tg_names = {name for name, _desc in telegram_bot_commands()}
+        slack_names = set(slack_subcommand_map())
+        assert "status" in tg_names
+        assert "status" in slack_names
@@ -137,50 +137,105 @@ class TestUploadToPastebin:
 # Log reading
 # ---------------------------------------------------------------------------

-class TestReadFullLog:
-    """Test _read_full_log for standalone log uploads."""
+class TestCaptureLogSnapshot:
+    """Test _capture_log_snapshot for log reading and truncation."""

    def test_reads_small_file(self, hermes_home):
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot

-        content = _read_full_log("agent")
-        assert content is not None
-        assert "session started" in content
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is not None
+        assert "session started" in snap.full_text
+        assert "session started" in snap.tail_text

    def test_returns_none_for_missing(self, tmp_path, monkeypatch):
        home = tmp_path / ".hermes"
        home.mkdir()
        monkeypatch.setenv("HERMES_HOME", str(home))

-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("agent") is None
+        from hermes_cli.debug import _capture_log_snapshot
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is None
+        assert snap.tail_text == "(file not found)"

-    def test_returns_none_for_empty(self, hermes_home):
-        # Truncate agent.log to empty
+    def test_empty_primary_reports_file_empty(self, hermes_home):
+        """Empty primary (no .1 fallback) surfaces as '(file empty)', not missing."""
        (hermes_home / "logs" / "agent.log").write_text("")

-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("agent") is None
+        from hermes_cli.debug import _capture_log_snapshot
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is None
+        assert snap.tail_text == "(file empty)"
+
+    def test_race_truncate_after_resolve_reports_empty(self, hermes_home, monkeypatch):
+        """If the log is truncated between resolve and stat, say 'empty', not 'missing'."""
+        log_path = hermes_home / "logs" / "agent.log"
+        from hermes_cli import debug
+
+        monkeypatch.setattr(debug, "_resolve_log_path", lambda _name: log_path)
+        log_path.write_text("")
+
+        snap = debug._capture_log_snapshot("agent", tail_lines=10)
+        assert snap.path == log_path
+        assert snap.full_text is None
+        assert snap.tail_text == "(file empty)"

    def test_truncates_large_file(self, hermes_home):
        """Files larger than max_bytes get tail-truncated."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot

        # Write a file larger than 1KB
        big_content = "x" * 100 + "\n"
        (hermes_home / "logs" / "agent.log").write_text(big_content * 200)

-        content = _read_full_log("agent", max_bytes=1024)
-        assert content is not None
-        assert "truncated" in content
+        snap = _capture_log_snapshot("agent", tail_lines=10, max_bytes=1024)
+        assert snap.full_text is not None
+        assert "truncated" in snap.full_text
+
+    def test_keeps_first_line_when_truncation_on_boundary(self, hermes_home):
+        """When truncation lands on a line boundary, keep the first full line."""
+        from hermes_cli.debug import _capture_log_snapshot
+
+        # File must exceed the initial chunk_size (8192) used by the
+        # backward-reading loop so the truncation path actually fires.
+        line = "A" * 99 + "\n"  # 100 bytes per line
+        num_lines = 200  # 20000 bytes
+        (hermes_home / "logs" / "agent.log").write_text(line * num_lines)
+
+        # max_bytes = 1000 = 100 * 10 → cut at byte 20000 - 1000 = 19000,
+        # and byte 19000 - 1 is '\n'.  Boundary hit → keep all 10 lines.
+        snap = _capture_log_snapshot("agent", tail_lines=5, max_bytes=1000)
+        assert snap.full_text is not None
+        assert "truncated" in snap.full_text
+        raw = snap.full_text.split("\n", 1)[1]
+        kept = [l for l in raw.strip().splitlines() if l.startswith("A")]
+        assert len(kept) == 10
+
+    def test_drops_partial_when_truncation_mid_line(self, hermes_home):
+        """When truncation lands mid-line, drop the partial fragment."""
+        from hermes_cli.debug import _capture_log_snapshot
+
+        line = "A" * 99 + "\n"  # 100 bytes per line
+        num_lines = 200  # 20000 bytes
+        (hermes_home / "logs" / "agent.log").write_text(line * num_lines)
+
+        # max_bytes = 950 doesn't divide evenly into 100 → mid-line cut.
+        snap = _capture_log_snapshot("agent", tail_lines=5, max_bytes=950)
+        assert snap.full_text is not None
+        assert "truncated" in snap.full_text
+        raw = snap.full_text.split("\n", 1)[1]
+        kept = [l for l in raw.strip().splitlines() if l.startswith("A")]
+        # 950 / 100 = 9.5 → 9 complete lines after dropping partial
+        assert len(kept) == 9

    def test_unknown_log_returns_none(self, hermes_home):
-        from hermes_cli.debug import _read_full_log
-        assert _read_full_log("nonexistent") is None
+        from hermes_cli.debug import _capture_log_snapshot
+        snap = _capture_log_snapshot("nonexistent", tail_lines=10)
+        assert snap.full_text is None

    def test_falls_back_to_rotated_file(self, hermes_home):
        """When gateway.log doesn't exist, falls back to gateway.log.1."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot

        logs_dir = hermes_home / "logs"
        # Remove the primary (if any) and create a .1 rotation
@@ -189,33 +244,33 @@ class TestReadFullLog:
            "2026-04-12 10:00:00 INFO gateway.run: rotated content\n"
        )

-        content = _read_full_log("gateway")
-        assert content is not None
-        assert "rotated content" in content
+        snap = _capture_log_snapshot("gateway", tail_lines=10)
+        assert snap.full_text is not None
+        assert "rotated content" in snap.full_text

    def test_prefers_primary_over_rotated(self, hermes_home):
        """Primary log is used when it exists, even if .1 also exists."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot

        logs_dir = hermes_home / "logs"
        (logs_dir / "gateway.log").write_text("primary content\n")
        (logs_dir / "gateway.log.1").write_text("rotated content\n")

-        content = _read_full_log("gateway")
-        assert "primary content" in content
-        assert "rotated" not in content
+        snap = _capture_log_snapshot("gateway", tail_lines=10)
+        assert "primary content" in snap.full_text
+        assert "rotated" not in snap.full_text

    def test_falls_back_when_primary_empty(self, hermes_home):
        """Empty primary log falls back to .1 rotation."""
-        from hermes_cli.debug import _read_full_log
+        from hermes_cli.debug import _capture_log_snapshot

        logs_dir = hermes_home / "logs"
        (logs_dir / "agent.log").write_text("")
        (logs_dir / "agent.log.1").write_text("rotated agent data\n")

-        content = _read_full_log("agent")
-        assert content is not None
-        assert "rotated agent data" in content
+        snap = _capture_log_snapshot("agent", tail_lines=10)
+        assert snap.full_text is not None
+        assert "rotated agent data" in snap.full_text


 # ---------------------------------------------------------------------------
@@ -283,6 +338,44 @@ class TestCollectDebugReport:
 class TestRunDebugShare:
    """Test the run_debug_share CLI handler."""

+    def test_share_sweeps_expired_pastes(self, hermes_home, capsys):
+        """Slash-command path should sweep old pending deletes before uploading."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)) as mock_sweep, \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    return_value="https://paste.rs/test"):
+            run_debug_share(args)
+
+        mock_sweep.assert_called_once()
+        assert "Debug report uploaded" in capsys.readouterr().out
+
+    def test_share_survives_sweep_failure(self, hermes_home, capsys):
+        """Expired-paste cleanup is best-effort and must not block sharing."""
+        from hermes_cli.debug import run_debug_share
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch(
+                 "hermes_cli.debug._sweep_expired_pastes",
+                 side_effect=RuntimeError("offline"),
+             ), \
+             patch("hermes_cli.debug.upload_to_pastebin",
+                    return_value="https://paste.rs/test"):
+            run_debug_share(args)
+
+        assert "https://paste.rs/test" in capsys.readouterr().out
+
    def test_local_flag_prints_full_logs(self, hermes_home, capsys):
        """--local prints the report plus full log contents."""
        from hermes_cli.debug import run_debug_share
@@ -340,6 +433,55 @@ class TestRunDebugShare:
        assert "--- hermes dump ---" in gateway_paste
        assert "--- full gateway.log ---" in gateway_paste

+    def test_share_keeps_report_and_full_log_on_same_snapshot(self, hermes_home, capsys):
+        """A mid-run rotation must not make full agent.log older than the report."""
+        from hermes_cli.debug import run_debug_share, collect_debug_report as real_collect_debug_report
+
+        logs_dir = hermes_home / "logs"
+        (logs_dir / "agent.log").write_text(
+            "2026-04-22 12:00:00 INFO agent: newest line\n"
+        )
+        (logs_dir / "agent.log.1").write_text(
+            "2026-04-10 12:00:00 INFO agent: old rotated line\n"
+        )
+
+        args = MagicMock()
+        args.lines = 50
+        args.expire = 7
+        args.local = False
+
+        uploaded_content = []
+
+        def _mock_upload(content, expiry_days=7):
+            uploaded_content.append(content)
+            return f"https://paste.rs/paste{len(uploaded_content)}"
+
+        def _wrapped_collect_debug_report(*, log_lines=200, dump_text="", log_snapshots=None):
+            report = real_collect_debug_report(
+                log_lines=log_lines,
+                dump_text=dump_text,
+                log_snapshots=log_snapshots,
+            )
+            # Simulate the live log rotating after the report is built but
+            # before the old implementation would have re-read agent.log for
+            # standalone upload.
+            (logs_dir / "agent.log").write_text("")
+            (logs_dir / "agent.log.1").write_text(
+                "2026-04-10 12:00:00 INFO agent: old rotated line\n"
+            )
+            return report
+
+        with patch("hermes_cli.dump.run_dump"), \
+             patch("hermes_cli.debug.collect_debug_report", side_effect=_wrapped_collect_debug_report), \
+             patch("hermes_cli.debug.upload_to_pastebin", side_effect=_mock_upload):
+            run_debug_share(args)
+
+        report_paste = uploaded_content[0]
+        agent_paste = uploaded_content[1]
+        assert "2026-04-22 12:00:00 INFO agent: newest line" in report_paste
+        assert "2026-04-22 12:00:00 INFO agent: newest line" in agent_paste
+        assert "old rotated line" not in agent_paste
+
    def test_share_skips_missing_logs(self, tmp_path, monkeypatch, capsys):
        """Only uploads logs that exist."""
        home = tmp_path / ".hermes"
@@ -121,6 +121,12 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys
            return SimpleNamespace(returncode=0, stdout="", stderr="")
        if cmd[:3] == ["systemctl", "--user", "is-active"]:
            return SimpleNamespace(returncode=0, stdout="active\n", stderr="")
+        if cmd[:3] == ["systemctl", "--user", "show"]:
+            return SimpleNamespace(
+                returncode=0,
+                stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
+                stderr="",
+            )
        raise AssertionError(f"Unexpected command: {cmd}")

    monkeypatch.setattr(gateway.subprocess, "run", fake_run)
@@ -352,3 +358,24 @@ class TestWaitForGatewayExit:

        assert killed == 2
        assert calls == [(11, True), (22, True)]
+
+
+class TestStopProfileGateway:
+    def test_stop_profile_gateway_keeps_pid_file_when_process_still_running(self, monkeypatch):
+        calls = {"kill": 0, "remove": 0}
+
+        monkeypatch.setattr("gateway.status.get_running_pid", lambda: 12345)
+        monkeypatch.setattr(
+            gateway.os,
+            "kill",
+            lambda pid, sig: calls.__setitem__("kill", calls["kill"] + 1),
+        )
+        monkeypatch.setattr("time.sleep", lambda _: None)
+        monkeypatch.setattr(
+            "gateway.status.remove_pid_file",
+            lambda: calls.__setitem__("remove", calls["remove"] + 1),
+        )
+
+        assert gateway.stop_profile_gateway() is True
+        assert calls["kill"] == 21
+        assert calls["remove"] == 0
@@ -77,8 +77,10 @@ class TestSystemdServiceRefresh:
        gateway_cli.systemd_restart()

        assert unit_path.read_text(encoding="utf-8") == "new unit\n"
-        assert calls[:2] == [
+        assert calls[:4] == [
            ["systemctl", "--user", "daemon-reload"],
+            ["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus"],
+            ["systemctl", "--user", "reset-failed", gateway_cli.get_service_name()],
            ["systemctl", "--user", "reload-or-restart", gateway_cli.get_service_name()],
        ]

@@ -474,13 +476,21 @@ class TestGatewaySystemServiceRouting:
                raise ProcessLookupError()
        monkeypatch.setattr(os, "kill", fake_kill)

-        # Simulate systemctl is-active returning "active" with a new PID
+        # Simulate systemctl reset-failed/start followed by an active unit
        new_pid = [None]
        def fake_subprocess_run(cmd, **kwargs):
-            if "is-active" in cmd:
-                result = SimpleNamespace(stdout="active\n", returncode=0)
-                new_pid[0] = 999  # new PID
-                return result
+            if "reset-failed" in cmd:
+                calls.append(("reset-failed", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            if "start" in cmd:
+                calls.append(("start", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            if "show" in cmd:
+                new_pid[0] = 999
+                return SimpleNamespace(
+                    stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
+                    returncode=0,
+                )
            raise AssertionError(f"Unexpected systemctl call: {cmd}")

        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
@@ -494,9 +504,131 @@ class TestGatewaySystemServiceRouting:
        gateway_cli.systemd_restart()

        assert ("self", 654) in calls
+        assert any(call[0] == "reset-failed" for call in calls)
+        assert any(call[0] == "start" for call in calls)
        out = capsys.readouterr().out.lower()
        assert "restarted" in out

+    def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
+        monkeypatch.setattr(
+            "gateway.status.read_runtime_status",
+            lambda: {"restart_requested": True, "gateway_state": "stopped"},
+        )
+        monkeypatch.setattr(gateway_cli, "_request_gateway_self_restart", lambda pid: False)
+
+        calls = []
+        started = {"value": False}
+
+        def fake_subprocess_run(cmd, **kwargs):
+            if "show" in cmd:
+                if not started["value"]:
+                    return SimpleNamespace(
+                        stdout=(
+                            "ActiveState=failed\n"
+                            "SubState=failed\n"
+                            "Result=exit-code\n"
+                            f"ExecMainStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}\n"
+                        ),
+                        returncode=0,
+                    )
+                return SimpleNamespace(
+                    stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
+                    returncode=0,
+                )
+            if "reset-failed" in cmd:
+                calls.append(("reset-failed", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            if "start" in cmd:
+                started["value"] = True
+                calls.append(("start", cmd))
+                return SimpleNamespace(stdout="", returncode=0)
+            raise AssertionError(f"Unexpected command: {cmd}")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
+        monkeypatch.setattr(
+            "gateway.status.get_running_pid",
+            lambda: 999 if started["value"] else None,
+        )
+
+        gateway_cli.systemd_restart()
+
+        assert any(call[0] == "reset-failed" for call in calls)
+        assert any(call[0] == "start" for call in calls)
+        out = capsys.readouterr().out.lower()
+        assert "restarted" in out
+
+    def test_systemd_status_surfaces_planned_restart_failure(self, monkeypatch, capsys):
+        unit = SimpleNamespace(exists=lambda: True)
+        monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit)
+        monkeypatch.setattr(gateway_cli, "has_conflicting_systemd_units", lambda: False)
+        monkeypatch.setattr(gateway_cli, "has_legacy_hermes_units", lambda: False)
+        monkeypatch.setattr(gateway_cli, "systemd_unit_is_current", lambda system=False: True)
+        monkeypatch.setattr(gateway_cli, "_runtime_health_lines", lambda: ["⚠ Last shutdown reason: Gateway restart requested"])
+        monkeypatch.setattr(gateway_cli, "get_systemd_linger_status", lambda: (True, ""))
+        monkeypatch.setattr(gateway_cli, "_read_systemd_unit_properties", lambda system=False: {
+            "ActiveState": "failed",
+            "SubState": "failed",
+            "Result": "exit-code",
+            "ExecMainStatus": str(GATEWAY_SERVICE_RESTART_EXIT_CODE),
+        })
+
+        calls = []
+
+        def fake_run_systemctl(args, **kwargs):
+            calls.append(args)
+            if args[:2] == ["status", gateway_cli.get_service_name()]:
+                return SimpleNamespace(returncode=0, stdout="", stderr="")
+            if args[:2] == ["is-active", gateway_cli.get_service_name()]:
+                return SimpleNamespace(returncode=3, stdout="failed\n", stderr="")
+            raise AssertionError(f"Unexpected args: {args}")
+
+        monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
+
+        gateway_cli.systemd_status()
+
+        out = capsys.readouterr().out
+        assert "Planned restart is stuck in systemd failed state" in out
+
+    def test_gateway_status_dispatches_full_flag(self, monkeypatch):
+        user_unit = SimpleNamespace(exists=lambda: True)
+        system_unit = SimpleNamespace(exists=lambda: False)
+
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_systemd_unit_path",
+            lambda system=False: system_unit if system else user_unit,
+        )
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_gateway_runtime_snapshot",
+            lambda system=False: gateway_cli.GatewayRuntimeSnapshot(
+                manager="systemd (user)",
+                service_installed=True,
+                service_running=False,
+                gateway_pids=(),
+                service_scope="user",
+            ),
+        )
+
+        calls = []
+        monkeypatch.setattr(
+            gateway_cli,
+            "systemd_status",
+            lambda deep=False, system=False, full=False: calls.append((deep, system, full)),
+        )
+
+        gateway_cli.gateway_command(
+            SimpleNamespace(gateway_command="status", deep=False, system=False, full=True)
+        )
+
+        assert calls == [(False, False, True)]
+
    def test_gateway_install_passes_system_flags(self, monkeypatch):
        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
@@ -547,11 +679,15 @@ class TestGatewaySystemServiceRouting:
        )

        calls = []
-        monkeypatch.setattr(gateway_cli, "systemd_status", lambda deep=False, system=False: calls.append((deep, system)))
+        monkeypatch.setattr(
+            gateway_cli,
+            "systemd_status",
+            lambda deep=False, system=False, full=False: calls.append((deep, system, full)),
+        )

        gateway_cli.gateway_command(SimpleNamespace(gateway_command="status", deep=False, system=False))

-        assert calls == [(False, False)]
+        assert calls == [(False, False, False)]

    def test_gateway_status_reports_manual_process_when_service_is_stopped(self, monkeypatch, capsys):
        user_unit = SimpleNamespace(exists=lambda: True)
@@ -565,7 +701,11 @@ class TestGatewaySystemServiceRouting:
            "get_systemd_unit_path",
            lambda system=False: system_unit if system else user_unit,
        )
-        monkeypatch.setattr(gateway_cli, "systemd_status", lambda deep=False, system=False: print("service stopped"))
+        monkeypatch.setattr(
+            gateway_cli,
+            "systemd_status",
+            lambda deep=False, system=False, full=False: print("service stopped"),
+        )
        monkeypatch.setattr(
            gateway_cli,
            "get_gateway_runtime_snapshot",
@@ -1570,6 +1710,23 @@ class TestMigrateLegacyCommand:

        assert called == {"interactive": False, "dry_run": False}

+
+class TestGatewayStatusParser:
+    def test_gateway_status_subparser_accepts_full_flag(self):
+        import subprocess
+        import sys
+
+        result = subprocess.run(
+            [sys.executable, "-m", "hermes_cli.main", "gateway", "status", "-l", "--help"],
+            cwd=str(gateway_cli.PROJECT_ROOT),
+            capture_output=True,
+            text=True,
+            timeout=15,
+        )
+
+        assert result.returncode == 0
+        assert "unrecognized arguments" not in result.stderr
+
    def test_gateway_command_migrate_legacy_dry_run_passes_through(
        self, monkeypatch
    ):
@@ -787,6 +787,33 @@ class TestPluginCommands:
        assert entry["handler"] is handler
        assert entry["description"] == "My custom command"
        assert entry["plugin"] == "test-plugin"
+        # args_hint defaults to empty string when not passed.
+        assert entry["args_hint"] == ""
+
+    def test_register_command_with_args_hint(self):
+        """args_hint is stored and surfaced for gateway-native UI registration."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        ctx.register_command(
+            "metricas",
+            lambda a: a,
+            description="Metrics dashboard",
+            args_hint="dias:7 formato:json",
+        )
+
+        entry = mgr._plugin_commands["metricas"]
+        assert entry["args_hint"] == "dias:7 formato:json"
+
+    def test_register_command_args_hint_whitespace_trimmed(self):
+        """args_hint leading/trailing whitespace is stripped."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        ctx.register_command("foo", lambda a: a, args_hint="  <file>  ")
+        assert mgr._plugin_commands["foo"]["args_hint"] == "<file>"

    def test_register_command_normalizes_name(self):
        """Names are lowercased, stripped, and leading slashes removed."""
@@ -268,7 +268,6 @@ class TestCliBrandingHelpers:

    def test_prompt_toolkit_style_overrides_cover_tui_classes(self):
        from hermes_cli.skin_engine import set_active_skin, get_prompt_toolkit_style_overrides
-
        set_active_skin("ares")
        overrides = get_prompt_toolkit_style_overrides()
        required = {
@@ -277,6 +276,13 @@ class TestCliBrandingHelpers:
            "prompt",
            "prompt-working",
            "hint",
+            "status-bar",
+            "status-bar-strong",
+            "status-bar-dim",
+            "status-bar-good",
+            "status-bar-warn",
+            "status-bar-bad",
+            "status-bar-critical",
            "input-rule",
            "image-badge",
            "completion-menu",
@@ -325,6 +331,15 @@ class TestCliBrandingHelpers:
        overrides = get_prompt_toolkit_style_overrides()
        assert overrides["prompt"] == skin.get_color("prompt")
        assert overrides["input-rule"] == skin.get_color("input_rule")
+        assert overrides["status-bar"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_text')}"
+        )
+        assert overrides["status-bar-strong"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_strong')} bold"
+        )
+        assert overrides["status-bar-critical"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_critical')} bold"
+        )
        assert overrides["clarify-title"] == f"{skin.get_color('banner_title')} bold"
        assert overrides["sudo-prompt"] == f"{skin.get_color('ui_error')} bold"
        assert overrides["approval-title"] == f"{skin.get_color('ui_warn')} bold"
@@ -155,3 +155,29 @@ class TestFallbackChainAdvancement:
            ]
            assert agent._try_activate_fallback() is True
            assert agent.model == "gpt-4o"
+
+    def test_resolves_key_env_for_fallback_provider(self):
+        fbs = [
+            {
+                "provider": "custom",
+                "model": "fallback-model",
+                "base_url": "https://fallback.example/v1",
+                "key_env": "MY_FALLBACK_KEY",
+            }
+        ]
+        agent = _make_agent(fallback_model=fbs)
+        with (
+            patch.dict("os.environ", {"MY_FALLBACK_KEY": "env-secret"}, clear=False),
+            patch(
+                "agent.auxiliary_client.resolve_provider_client",
+                return_value=(
+                    _mock_client(
+                        base_url="https://fallback.example/v1",
+                        api_key="env-secret",
+                    ),
+                    "fallback-model",
+                ),
+            ) as mock_rpc,
+        ):
+            assert agent._try_activate_fallback() is True
+            assert mock_rpc.call_args.kwargs["explicit_api_key"] == "env-secret"
@@ -1133,3 +1133,225 @@ class TestPartialToolCallWarning:
            f"Unexpected warning on text-only partial stream: {content!r}"
        )

+
+class TestSilentRetryMidToolCall:
+    """Regression: when the stream dies mid tool-call JSON after text was
+    already delivered, we previously stubbed the turn with a "retry manually"
+    warning.  Now: if the error is a transient connection error AND a tool
+    call was in flight, silently retry the stream (the user sees a brief
+    reconnect marker + duplicated preamble, which is strictly better than
+    a lost action).  If no tool call was in flight, or the error isn't
+    transient, the existing stub-with-warning behaviour is preserved.
+    """
+
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_silent_retry_recovers_tool_call(
+        self, mock_close, mock_create, mock_replace,
+    ):
+        """First attempt: text + partial tool-call + connection drop.
+        Second attempt: text + complete tool-call.  Response should contain
+        the recovered tool call; no warning stub should be returned."""
+        from run_agent import AIAgent
+        import httpx as _httpx
+
+        attempts = {"n": 0}
+
+        def _first_stream():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, arguments='{"path": "/tmp/x", '),
+            ])
+            raise _httpx.RemoteProtocolError("peer closed connection")
+
+        def _second_stream():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(
+                    index=0, arguments='{"path": "/tmp/x", "content": "hi"}',
+                ),
+            ])
+            yield _make_stream_chunk(finish_reason="tool_calls")
+
+        def _pick_stream(*a, **kw):
+            attempts["n"] += 1
+            return _first_stream() if attempts["n"] == 1 else _second_stream()
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = _pick_stream
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        fired_deltas: list = []
+        agent._fire_stream_delta = lambda text: fired_deltas.append(text)
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "2"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        assert attempts["n"] == 2, (
+            f"Expected silent retry (2 attempts), got {attempts['n']}"
+        )
+        # Response should carry the recovered tool call, not a warning stub.
+        msg = response.choices[0].message
+        tool_calls = getattr(msg, "tool_calls", None)
+        assert tool_calls, (
+            f"Silent retry should recover the tool call, got tool_calls={tool_calls!r} "
+            f"content={getattr(msg, 'content', None)!r}"
+        )
+        _tc0 = tool_calls[0]
+        _name = (
+            _tc0["function"]["name"] if isinstance(_tc0, dict)
+            else _tc0.function.name
+        )
+        assert _name == "write_file"
+        # User saw a reconnect marker between attempts.
+        assert any("reconnecting" in d.lower() for d in fired_deltas), (
+            f"Expected a reconnect marker delta, fired_deltas={fired_deltas}"
+        )
+        # Stub-path warning must NOT appear (this was the whole point).
+        joined = "".join(fired_deltas)
+        assert "Stream stalled" not in joined, (
+            f"Stub-path warning leaked into silent-retry path: {joined!r}"
+        )
+
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_silent_retry_exhausted_falls_back_to_stub(
+        self, mock_close, mock_create, mock_replace,
+    ):
+        """When all retry attempts fail with connection errors, fall back
+        to the original stub-with-warning behaviour so the user isn't left
+        with zero signal."""
+        from run_agent import AIAgent
+        import httpx as _httpx
+
+        def _always_fails():
+            yield _make_stream_chunk(content="Let me write the audit: ")
+            yield _make_stream_chunk(tool_calls=[
+                _make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
+            ])
+            raise _httpx.RemoteProtocolError("peer closed connection")
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = lambda *a, **kw: _always_fails()
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+
+        fired_deltas: list = []
+        agent._fire_stream_delta = lambda text: fired_deltas.append(text)
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "1"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        # After retries exhaust, the stub-with-warning path must engage.
+        content = response.choices[0].message.content or ""
+        assert "Stream stalled mid tool-call" in content, (
+            f"Exhausted-retry fallback dropped the user-visible warning: {content!r}"
+        )
+        assert response.choices[0].message.tool_calls is None
+
+    @patch("run_agent.AIAgent._replace_primary_openai_client")
+    @patch("run_agent.AIAgent._create_request_openai_client")
+    @patch("run_agent.AIAgent._close_request_openai_client")
+    def test_no_silent_retry_for_text_only_stall(
+        self, mock_close, mock_create, mock_replace,
+    ):
+        """Text-only stall (no tool call in flight) must NOT trigger silent
+        retry — that's the case where the user saw the model's text reply
+        and retrying would duplicate it with no benefit."""
+        from run_agent import AIAgent
+        import httpx as _httpx
+
+        attempts = {"n": 0}
+
+        def _text_stall(*a, **kw):
+            attempts["n"] += 1
+
+            def _gen():
+                yield _make_stream_chunk(content="Here's my answer so far")
+                raise _httpx.RemoteProtocolError("peer closed connection")
+            return _gen()
+
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = _text_stall
+        mock_create.return_value = mock_client
+
+        agent = AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        agent.api_mode = "chat_completions"
+        agent._interrupt_requested = False
+        agent._current_streamed_assistant_text = "Here's my answer so far"
+
+        import os as _os
+        _prev = _os.environ.get("HERMES_STREAM_RETRIES")
+        _os.environ["HERMES_STREAM_RETRIES"] = "2"
+        try:
+            response = agent._interruptible_streaming_api_call({})
+        finally:
+            if _prev is None:
+                _os.environ.pop("HERMES_STREAM_RETRIES", None)
+            else:
+                _os.environ["HERMES_STREAM_RETRIES"] = _prev
+
+        # Only one attempt: text-only stall short-circuits retry.
+        assert attempts["n"] == 1, (
+            f"Text-only stall should not silent-retry, got {attempts['n']} attempts"
+        )
+        content = response.choices[0].message.content or ""
+        assert content == "Here's my answer so far", (
+            f"Text-only stall regressed: {content!r}"
+        )
+        assert "Stream stalled" not in content, (
+            f"Text-only stall should not emit tool-call warning: {content!r}"
+        )
+
@@ -66,6 +66,9 @@ class TestCliSkinPromptIntegration:
        assert style_dict["prompt"] == skin.get_color("prompt")
        assert style_dict["input-rule"] == skin.get_color("input_rule")
        assert style_dict["prompt-working"] == f"{skin.get_color('banner_dim')} italic"
+        assert style_dict["status-bar"] == (
+            f"bg:{skin.get_color('status_bar_bg')} {skin.get_color('status_bar_text')}"
+        )
        assert style_dict["approval-title"] == f"{skin.get_color('ui_warn')} bold"

    def test_apply_tui_skin_style_updates_running_app(self):
@@ -197,6 +197,68 @@ class TestRunAsyncWithRunningLoop:
        )
        assert result == 42

+    @pytest.mark.asyncio
+    async def test_timeout_uses_nonblocking_executor_shutdown(self, monkeypatch):
+        """A timeout in the running-loop branch must not wait for the worker.
+
+        ThreadPoolExecutor's context manager performs shutdown(wait=True).
+        If _run_async relies on that path after future.result(timeout=...)
+        times out, the timeout does not bound wall-clock time because the
+        caller still waits for the stuck coroutine's thread to finish.
+        """
+        import concurrent.futures
+        from model_tools import _run_async
+
+        events = {
+            "cancelled": False,
+            "result_timeout": None,
+            "shutdown_calls": [],
+        }
+
+        class TimeoutFuture:
+            def result(self, timeout=None):
+                events["result_timeout"] = timeout
+                raise concurrent.futures.TimeoutError()
+
+            def cancel(self):
+                events["cancelled"] = True
+                return True
+
+        class FakeExecutor:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                self.shutdown(wait=True)
+                return False
+
+            def submit(self, fn, *args, **kwargs):
+                if args and hasattr(args[0], "close"):
+                    args[0].close()
+                return TimeoutFuture()
+
+            def shutdown(self, wait=True, cancel_futures=False):
+                events["shutdown_calls"].append((wait, cancel_futures))
+
+        async def _never_finishes():
+            await asyncio.sleep(999)
+
+        monkeypatch.setattr(
+            concurrent.futures,
+            "ThreadPoolExecutor",
+            FakeExecutor,
+        )
+
+        with pytest.raises(concurrent.futures.TimeoutError):
+            _run_async(_never_finishes())
+
+        assert events["result_timeout"] == 300
+        assert events["cancelled"] is True
+        assert events["shutdown_calls"] == [(False, True)]
+

 # ---------------------------------------------------------------------------
 # Integration: full vision_analyze dispatch chain
@@ -106,11 +106,23 @@ def test_config_set_yolo_toggles_session_scope():

    server._sessions["sid"] = _session()
    try:
-        resp_on = server.handle_request({"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "yolo"}})
+        resp_on = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {"session_id": "sid", "key": "yolo"},
+            }
+        )
        assert resp_on["result"]["value"] == "1"
        assert is_session_yolo_enabled("session-key") is True

-        resp_off = server.handle_request({"id": "2", "method": "config.set", "params": {"session_id": "sid", "key": "yolo"}})
+        resp_off = server.handle_request(
+            {
+                "id": "2",
+                "method": "config.set",
+                "params": {"session_id": "sid", "key": "yolo"},
+            }
+        )
        assert resp_off["result"]["value"] == "0"
        assert is_session_yolo_enabled("session-key") is False
    finally:
@@ -118,6 +130,36 @@ def test_config_set_yolo_toggles_session_scope():
        server._sessions.clear()


+def test_config_get_statusbar_survives_non_dict_display(monkeypatch):
+    monkeypatch.setattr(server, "_load_cfg", lambda: {"display": "broken"})
+
+    resp = server.handle_request(
+        {"id": "1", "method": "config.get", "params": {"key": "statusbar"}}
+    )
+
+    assert resp["result"]["value"] == "top"
+
+
+def test_config_set_statusbar_survives_non_dict_display(tmp_path, monkeypatch):
+    import yaml
+
+    cfg_path = tmp_path / "config.yaml"
+    cfg_path.write_text(yaml.safe_dump({"display": "broken"}))
+    monkeypatch.setattr(server, "_hermes_home", tmp_path)
+
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"key": "statusbar", "value": "bottom"},
+        }
+    )
+
+    assert resp["result"]["value"] == "bottom"
+    saved = yaml.safe_load(cfg_path.read_text())
+    assert saved["display"]["tui_statusbar"] == "bottom"
+
+
 def test_enable_gateway_prompts_sets_gateway_env(monkeypatch):
    monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@@ -144,13 +186,21 @@ def test_config_set_reasoning_updates_live_session_and_agent(tmp_path, monkeypat
    server._sessions["sid"] = _session(agent=agent)

    resp_effort = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "reasoning", "value": "low"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "low"},
+        }
    )
    assert resp_effort["result"]["value"] == "low"
    assert agent.reasoning_config == {"enabled": True, "effort": "low"}

    resp_show = server.handle_request(
-        {"id": "2", "method": "config.set", "params": {"session_id": "sid", "key": "reasoning", "value": "show"}}
+        {
+            "id": "2",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "reasoning", "value": "show"},
+        }
    )
    assert resp_show["result"]["value"] == "show"
    assert server._sessions["sid"]["show_reasoning"] is True
@@ -162,7 +212,11 @@ def test_config_set_verbose_updates_session_mode_and_agent(tmp_path, monkeypatch
    server._sessions["sid"] = _session(agent=agent)

    resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "verbose", "value": "cycle"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "verbose", "value": "cycle"},
+        }
    )

    assert resp["result"]["value"] == "verbose"
@@ -180,7 +234,11 @@ def test_config_set_model_uses_live_switch_path(monkeypatch):

    monkeypatch.setattr(server, "_apply_model_switch", _fake_apply)
    resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "new/model"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "model", "value": "new/model"},
+        }
    )

    assert resp["result"]["value"] == "new/model"
@@ -221,7 +279,15 @@ def test_config_set_model_global_persists(monkeypatch):
    monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: saved.update(cfg))

    resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6 --global"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {
+                "session_id": "sid",
+                "key": "model",
+                "value": "anthropic/claude-sonnet-4.6 --global",
+            },
+        }
    )

    assert resp["result"]["value"] == "anthropic/claude-sonnet-4.6"
@@ -241,6 +307,7 @@ def test_config_set_model_syncs_inference_provider_env(monkeypatch):
    trying openrouter because the env-var-backed resolvers still saw the old
    provider.
    """
+
    class _Agent:
        provider = "openrouter"
        model = "old/model"
@@ -262,21 +329,39 @@ def test_config_set_model_syncs_inference_provider_env(monkeypatch):

    server._sessions["sid"] = _session(agent=_Agent())
    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
-    monkeypatch.setattr("hermes_cli.model_switch.switch_model", lambda **_kwargs: result)
+    monkeypatch.setattr(
+        "hermes_cli.model_switch.switch_model", lambda **_kwargs: result
+    )
    monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
    monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)

    server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "claude-sonnet-4.6 --provider anthropic"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {
+                "session_id": "sid",
+                "key": "model",
+                "value": "claude-sonnet-4.6 --provider anthropic",
+            },
+        }
    )

    assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"


 def test_config_set_personality_rejects_unknown_name(monkeypatch):
-    monkeypatch.setattr(server, "_available_personalities", lambda cfg=None: {"helpful": "You are helpful."})
+    monkeypatch.setattr(
+        server,
+        "_available_personalities",
+        lambda cfg=None: {"helpful": "You are helpful."},
+    )
    resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"key": "personality", "value": "bogus"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"key": "personality", "value": "bogus"},
+        }
    )

    assert "error" in resp
@@ -284,20 +369,36 @@ def test_config_set_personality_rejects_unknown_name(monkeypatch):


 def test_config_set_personality_resets_history_and_returns_info(monkeypatch):
-    session = _session(agent=types.SimpleNamespace(), history=[{"role": "user", "text": "hi"}], history_version=4)
+    session = _session(
+        agent=types.SimpleNamespace(),
+        history=[{"role": "user", "text": "hi"}],
+        history_version=4,
+    )
    new_agent = types.SimpleNamespace(model="x")
    emits = []

    server._sessions["sid"] = session
-    monkeypatch.setattr(server, "_available_personalities", lambda cfg=None: {"helpful": "You are helpful."})
-    monkeypatch.setattr(server, "_make_agent", lambda sid, key, session_id=None: new_agent)
-    monkeypatch.setattr(server, "_session_info", lambda agent: {"model": getattr(agent, "model", "?")})
+    monkeypatch.setattr(
+        server,
+        "_available_personalities",
+        lambda cfg=None: {"helpful": "You are helpful."},
+    )
+    monkeypatch.setattr(
+        server, "_make_agent", lambda sid, key, session_id=None: new_agent
+    )
+    monkeypatch.setattr(
+        server, "_session_info", lambda agent: {"model": getattr(agent, "model", "?")}
+    )
    monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
    monkeypatch.setattr(server, "_emit", lambda *args: emits.append(args))
    monkeypatch.setattr(server, "_write_config_key", lambda path, value: None)

    resp = server.handle_request(
-        {"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "personality", "value": "helpful"}}
+        {
+            "id": "1",
+            "method": "config.set",
+            "params": {"session_id": "sid", "key": "personality", "value": "helpful"},
+        }
    )

    assert resp["result"]["history_reset"] is True
@@ -311,11 +412,17 @@ def test_session_compress_uses_compress_helper(monkeypatch):
    agent = types.SimpleNamespace()
    server._sessions["sid"] = _session(agent=agent)

-    monkeypatch.setattr(server, "_compress_session_history", lambda session, focus_topic=None: (2, {"total": 42}))
+    monkeypatch.setattr(
+        server,
+        "_compress_session_history",
+        lambda session, focus_topic=None: (2, {"total": 42}),
+    )
    monkeypatch.setattr(server, "_session_info", lambda _agent: {"model": "x"})

    with patch("tui_gateway.server._emit") as emit:
-        resp = server.handle_request({"id": "1", "method": "session.compress", "params": {"session_id": "sid"}})
+        resp = server.handle_request(
+            {"id": "1", "method": "session.compress", "params": {"session_id": "sid"}}
+        )

    assert resp["result"]["removed"] == 2
    assert resp["result"]["usage"]["total"] == 42
@@ -328,9 +435,14 @@ def test_prompt_submit_sets_approval_session_key(monkeypatch):
    captured = {}

    class _Agent:
-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
            captured["session_key"] = get_current_session_key(default="")
-            return {"final_response": "ok", "messages": [{"role": "assistant", "content": "ok"}]}
+            return {
+                "final_response": "ok",
+                "messages": [{"role": "assistant", "content": "ok"}],
+            }

    class _ImmediateThread:
        def __init__(self, target=None, daemon=None):
@@ -345,7 +457,13 @@ def test_prompt_submit_sets_approval_session_key(monkeypatch):
    monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
    monkeypatch.setattr(server, "render_message", lambda raw, cols: None)

-    resp = server.handle_request({"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "ping"}})
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "prompt.submit",
+            "params": {"session_id": "sid", "text": "ping"},
+        }
+    )

    assert resp["result"]["status"] == "streaming"
    assert captured["session_key"] == "session-key"
@@ -359,9 +477,14 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
        base_url = ""
        api_key = ""

-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
            captured["prompt"] = prompt
-            return {"final_response": "ok", "messages": [{"role": "assistant", "content": "ok"}]}
+            return {
+                "final_response": "ok",
+                "messages": [{"role": "assistant", "content": "ok"}],
+            }

    class _ImmediateThread:
        def __init__(self, target=None, daemon=None):
@@ -371,8 +494,14 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
            self._target()

    fake_ctx = types.ModuleType("agent.context_references")
-    fake_ctx.preprocess_context_references = lambda message, **kwargs: types.SimpleNamespace(
-        blocked=False, message="expanded prompt", warnings=[], references=[], injected_tokens=0
+    fake_ctx.preprocess_context_references = (
+        lambda message, **kwargs: types.SimpleNamespace(
+            blocked=False,
+            message="expanded prompt",
+            warnings=[],
+            references=[],
+            injected_tokens=0,
+        )
    )
    fake_meta = types.ModuleType("agent.model_metadata")
    fake_meta.get_model_context_length = lambda *args, **kwargs: 100000
@@ -385,7 +514,13 @@ def test_prompt_submit_expands_context_refs(monkeypatch):
    monkeypatch.setitem(sys.modules, "agent.context_references", fake_ctx)
    monkeypatch.setitem(sys.modules, "agent.model_metadata", fake_meta)

-    server.handle_request({"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "@diff"}})
+    server.handle_request(
+        {
+            "id": "1",
+            "method": "prompt.submit",
+            "params": {"session_id": "sid", "text": "@diff"},
+        }
+    )

    assert captured["prompt"] == "expanded prompt"

@@ -404,7 +539,13 @@ def test_image_attach_appends_local_image(monkeypatch):
    server._sessions["sid"] = _session()
    monkeypatch.setitem(sys.modules, "cli", fake_cli)

-    resp = server.handle_request({"id": "1", "method": "image.attach", "params": {"session_id": "sid", "path": "/tmp/cat.png"}})
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "image.attach",
+            "params": {"session_id": "sid", "path": "/tmp/cat.png"},
+        }
+    )

    assert resp["result"]["attached"] is True
    assert resp["result"]["name"] == "cat.png"
@@ -420,14 +561,21 @@ def test_image_attach_accepts_unquoted_screenshot_path_with_spaces(monkeypatch):
        "is_image": True,
        "remainder": "",
    }
-    fake_cli._split_path_input = lambda raw: ("/tmp/Screenshot", "2026-04-21 at 1.04.43 PM.png")
+    fake_cli._split_path_input = lambda raw: (
+        "/tmp/Screenshot",
+        "2026-04-21 at 1.04.43 PM.png",
+    )
    fake_cli._resolve_attachment_path = lambda raw: None

    server._sessions["sid"] = _session()
    monkeypatch.setitem(sys.modules, "cli", fake_cli)

    resp = server.handle_request(
-        {"id": "1", "method": "image.attach", "params": {"session_id": "sid", "path": str(screenshot)}}
+        {
+            "id": "1",
+            "method": "image.attach",
+            "params": {"session_id": "sid", "path": str(screenshot)},
+        }
    )

    assert resp["result"]["attached"] is True
@@ -437,20 +585,34 @@ def test_image_attach_accepts_unquoted_screenshot_path_with_spaces(monkeypatch):


 def test_commands_catalog_surfaces_quick_commands(monkeypatch):
-    monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {
-        "build": {"type": "exec", "command": "npm run build"},
-        "git": {"type": "alias", "target": "/shell git"},
-        "notes": {"type": "exec", "command": "cat NOTES.md", "description": "Open design notes"},
-    }})
+    monkeypatch.setattr(
+        server,
+        "_load_cfg",
+        lambda: {
+            "quick_commands": {
+                "build": {"type": "exec", "command": "npm run build"},
+                "git": {"type": "alias", "target": "/shell git"},
+                "notes": {
+                    "type": "exec",
+                    "command": "cat NOTES.md",
+                    "description": "Open design notes",
+                },
+            }
+        },
+    )

-    resp = server.handle_request({"id": "1", "method": "commands.catalog", "params": {}})
+    resp = server.handle_request(
+        {"id": "1", "method": "commands.catalog", "params": {}}
+    )

    pairs = dict(resp["result"]["pairs"])
    assert "npm run build" in pairs["/build"]
    assert pairs["/git"].startswith("alias →")
    assert pairs["/notes"] == "Open design notes"

-    user_cat = next(c for c in resp["result"]["categories"] if c["name"] == "User commands")
+    user_cat = next(
+        c for c in resp["result"]["categories"] if c["name"] == "User commands"
+    )
    user_pairs = dict(user_cat["pairs"])
    assert set(user_pairs) == {"/build", "/git", "/notes"}

@@ -459,14 +621,22 @@ def test_commands_catalog_surfaces_quick_commands(monkeypatch):


 def test_command_dispatch_exec_nonzero_surfaces_error(monkeypatch):
-    monkeypatch.setattr(server, "_load_cfg", lambda: {"quick_commands": {"boom": {"type": "exec", "command": "boom"}}})
+    monkeypatch.setattr(
+        server,
+        "_load_cfg",
+        lambda: {"quick_commands": {"boom": {"type": "exec", "command": "boom"}}},
+    )
    monkeypatch.setattr(
        server.subprocess,
        "run",
-        lambda *args, **kwargs: types.SimpleNamespace(returncode=1, stdout="", stderr="failed"),
+        lambda *args, **kwargs: types.SimpleNamespace(
+            returncode=1, stdout="", stderr="failed"
+        ),
    )

-    resp = server.handle_request({"id": "1", "method": "command.dispatch", "params": {"name": "boom"}})
+    resp = server.handle_request(
+        {"id": "1", "method": "command.dispatch", "params": {"name": "boom"}}
+    )

    assert "error" in resp
    assert "failed" in resp["error"]["message"]
@@ -474,15 +644,22 @@ def test_command_dispatch_exec_nonzero_surfaces_error(monkeypatch):

 def test_plugins_list_surfaces_loader_error(monkeypatch):
    with patch("hermes_cli.plugins.get_plugin_manager", side_effect=Exception("boom")):
-        resp = server.handle_request({"id": "1", "method": "plugins.list", "params": {}})
+        resp = server.handle_request(
+            {"id": "1", "method": "plugins.list", "params": {}}
+        )

    assert "error" in resp
    assert "boom" in resp["error"]["message"]


 def test_complete_slash_surfaces_completer_error(monkeypatch):
-    with patch("hermes_cli.commands.SlashCommandCompleter", side_effect=Exception("no completer")):
-        resp = server.handle_request({"id": "1", "method": "complete.slash", "params": {"text": "/mo"}})
+    with patch(
+        "hermes_cli.commands.SlashCommandCompleter",
+        side_effect=Exception("no completer"),
+    ):
+        resp = server.handle_request(
+            {"id": "1", "method": "complete.slash", "params": {"text": "/mo"}}
+        )

    assert "error" in resp
    assert "no completer" in resp["error"]["message"]
@@ -500,7 +677,11 @@ def test_input_detect_drop_attaches_image(monkeypatch):
    monkeypatch.setitem(sys.modules, "cli", fake_cli)

    resp = server.handle_request(
-        {"id": "1", "method": "input.detect_drop", "params": {"session_id": "sid", "text": "/tmp/cat.png"}}
+        {
+            "id": "1",
+            "method": "input.detect_drop",
+            "params": {"session_id": "sid", "text": "/tmp/cat.png"},
+        }
    )

    assert resp["result"]["matched"] is True
@@ -521,7 +702,9 @@ def test_rollback_restore_resolves_number_and_file_path():
            calls["args"] = (cwd, target, file_path)
            return {"success": True, "message": "done"}

-    server._sessions["sid"] = _session(agent=types.SimpleNamespace(_checkpoint_mgr=_Mgr()), history=[])
+    server._sessions["sid"] = _session(
+        agent=types.SimpleNamespace(_checkpoint_mgr=_Mgr()), history=[]
+    )
    resp = server.handle_request(
        {
            "id": "1",
@@ -572,7 +755,9 @@ def test_session_steer_calls_agent_steer_when_agent_supports_it():


 def test_session_steer_rejects_empty_text():
-    server._sessions["sid"] = _session(agent=types.SimpleNamespace(steer=lambda t: True))
+    server._sessions["sid"] = _session(
+        agent=types.SimpleNamespace(steer=lambda t: True)
+    )
    try:
        resp = server.handle_request(
            {
@@ -632,10 +817,13 @@ def test_session_undo_rejects_while_running():
    """Fix for TUI silent-drop #1: /undo must not mutate history
    while the agent is mid-turn — would either clobber the undo or
    cause prompt.submit to silently drop the agent's response."""
-    server._sessions["sid"] = _session(running=True, history=[
-        {"role": "user", "content": "hi"},
-        {"role": "assistant", "content": "hello"},
-    ])
+    server._sessions["sid"] = _session(
+        running=True,
+        history=[
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hello"},
+        ],
+    )
    try:
        resp = server.handle_request(
            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
@@ -651,10 +839,13 @@ def test_session_undo_rejects_while_running():

 def test_session_undo_allowed_when_idle():
    """Regression guard: when not running, /undo still works."""
-    server._sessions["sid"] = _session(running=False, history=[
-        {"role": "user", "content": "hi"},
-        {"role": "assistant", "content": "hello"},
-    ])
+    server._sessions["sid"] = _session(
+        running=False,
+        history=[
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": "hello"},
+        ],
+    )
    try:
        resp = server.handle_request(
            {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}}
@@ -683,7 +874,11 @@ def test_rollback_restore_rejects_full_history_while_running(monkeypatch):
    server._sessions["sid"] = _session(running=True)
    try:
        resp = server.handle_request(
-            {"id": "1", "method": "rollback.restore", "params": {"session_id": "sid", "hash": "abc"}}
+            {
+                "id": "1",
+                "method": "rollback.restore",
+                "params": {"session_id": "sid", "hash": "abc"},
+            }
        )
        assert resp.get("error"), "full-history rollback should reject while running"
        assert resp["error"]["code"] == 4009
@@ -701,12 +896,17 @@ def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
    session_ref = {"s": None}

    class _RacyAgent:
-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
            # Simulate: something external bumped history_version
            # while we were running.
            with session_ref["s"]["history_lock"]:
                session_ref["s"]["history_version"] += 1
-            return {"final_response": "agent reply", "messages": [{"role": "assistant", "content": "agent reply"}]}
+            return {
+                "final_response": "agent reply",
+                "messages": [{"role": "assistant", "content": "agent reply"}],
+            }

    class _ImmediateThread:
        def __init__(self, target=None, daemon=None):
@@ -725,7 +925,11 @@ def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))

        resp = server.handle_request(
-            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+            {
+                "id": "1",
+                "method": "prompt.submit",
+                "params": {"session_id": "sid", "text": "hi"},
+            }
        )
        assert resp.get("result"), f"got error: {resp.get('error')}"

@@ -742,16 +946,25 @@ def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch):
            "history_version mismatch — otherwise the UI silently "
            "shows output that was never persisted"
        )
-        assert "not saved" in payload["warning"].lower() or "changed" in payload["warning"].lower()
+        assert (
+            "not saved" in payload["warning"].lower()
+            or "changed" in payload["warning"].lower()
+        )
    finally:
        server._sessions.pop("sid", None)


 def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
    """Regression guard: the backstop does not affect the happy path."""
+
    class _Agent:
-        def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
-            return {"final_response": "reply", "messages": [{"role": "assistant", "content": "reply"}]}
+        def run_conversation(
+            self, prompt, conversation_history=None, stream_callback=None
+        ):
+            return {
+                "final_response": "reply",
+                "messages": [{"role": "assistant", "content": "reply"}],
+            }

    class _ImmediateThread:
        def __init__(self, target=None, daemon=None):
@@ -769,12 +982,18 @@ def test_prompt_submit_history_version_match_persists_normally(monkeypatch):
        monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a))

        resp = server.handle_request(
-            {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}}
+            {
+                "id": "1",
+                "method": "prompt.submit",
+                "params": {"session_id": "sid", "text": "hi"},
+            }
        )
        assert resp.get("result")

        # History was written
-        assert server._sessions["sid"]["history"] == [{"role": "assistant", "content": "reply"}]
+        assert server._sessions["sid"]["history"] == [
+            {"role": "assistant", "content": "reply"}
+        ]
        assert server._sessions["sid"]["history_version"] == 1

        # No warning should be attached
@@ -818,7 +1037,11 @@ def test_interrupt_only_clears_own_session_pending():

        # Interrupt session A.
        resp = server.handle_request(
-            {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid_a"}}
+            {
+                "id": "1",
+                "method": "session.interrupt",
+                "params": {"session_id": "sid_a"},
+            }
        )
        assert resp.get("result"), f"got error: {resp.get('error')}"

@@ -891,8 +1114,11 @@ def test_respond_unpacks_sid_tuple_correctly():
    server._pending["rid-x"] = ("sid_x", ev)
    try:
        resp = server.handle_request(
-            {"id": "1", "method": "clarify.respond",
-             "params": {"request_id": "rid-x", "answer": "the answer"}}
+            {
+                "id": "1",
+                "method": "clarify.respond",
+                "params": {"request_id": "rid-x", "answer": "the answer"},
+            }
        )
        assert resp.get("result")
        assert ev.is_set()
@@ -902,7 +1128,6 @@ def test_respond_unpacks_sid_tuple_correctly():
        server._answers.pop("rid-x", None)


-
 # ---------------------------------------------------------------------------
 # /model switch and other agent-mutating commands must reject while the
 # session is running.  agent.switch_model() mutates self.model, self.provider,
@@ -925,10 +1150,17 @@ def test_config_set_model_rejects_while_running(monkeypatch):

    server._sessions["sid"] = _session(running=True)
    try:
-        resp = server.handle_request({
-            "id": "1", "method": "config.set",
-            "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6"},
-        })
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {
+                    "session_id": "sid",
+                    "key": "model",
+                    "value": "anthropic/claude-sonnet-4.6",
+                },
+            }
+        )
        assert resp.get("error")
        assert resp["error"]["code"] == 4009
        assert "session busy" in resp["error"]["message"]
@@ -952,10 +1184,13 @@ def test_config_set_model_allowed_when_idle(monkeypatch):

    server._sessions["sid"] = _session(running=False)
    try:
-        resp = server.handle_request({
-            "id": "1", "method": "config.set",
-            "params": {"session_id": "sid", "key": "model", "value": "newmodel"},
-        })
+        resp = server.handle_request(
+            {
+                "id": "1",
+                "method": "config.set",
+                "params": {"session_id": "sid", "key": "model", "value": "newmodel"},
+            }
+        )
        assert resp.get("result")
        assert resp["result"]["value"] == "newmodel"
        assert seen["called"]
@@ -993,9 +1228,9 @@ def test_mirror_slash_side_effects_rejects_mutating_commands_while_running(monke
        ("/compress", "compress"),
    ]:
        warning = server._mirror_slash_side_effects("sid", session, cmd)
-        assert "session busy" in warning, (
-            f"{cmd} should have returned busy warning, got: {warning!r}"
-        )
+        assert (
+            "session busy" in warning
+        ), f"{cmd} should have returned busy warning, got: {warning!r}"
        assert f"/{expected_name}" in warning

    # None of the mutating side-effect helpers should have fired.
@@ -1068,7 +1303,11 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
    # Stub everything _build touches
    monkeypatch.setattr(server, "_make_agent", _slow_make_agent)
    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
-    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(
+        server,
+        "_get_db",
+        lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None),
+    )
    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
@@ -1076,25 +1315,36 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):

    # Shim register/unregister to observe leaks
    import tools.approval as _approval
-    monkeypatch.setattr(_approval, "register_gateway_notify",
-                        lambda key, cb: None)
-    monkeypatch.setattr(_approval, "unregister_gateway_notify",
-                        lambda key: unregistered_keys.append(key))
+
+    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
+    monkeypatch.setattr(
+        _approval,
+        "unregister_gateway_notify",
+        lambda key: unregistered_keys.append(key),
+    )
    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)

    # Start: session.create spawns _build thread, returns synchronously
-    resp = server.handle_request({
-        "id": "1", "method": "session.create", "params": {"cols": 80},
-    })
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "session.create",
+            "params": {"cols": 80},
+        }
+    )
    assert resp.get("result"), f"got error: {resp.get('error')}"
    sid = resp["result"]["session_id"]

    # Build thread is blocked in _slow_make_agent.  Close the session
    # NOW — this pops _sessions[sid] before _build can install the
    # worker/notify.
-    close_resp = server.handle_request({
-        "id": "2", "method": "session.close", "params": {"session_id": sid},
-    })
+    close_resp = server.handle_request(
+        {
+            "id": "2",
+            "method": "session.close",
+            "params": {"session_id": sid},
+        }
+    )
    assert close_resp.get("result", {}).get("closed") is True

    # At this point session.close saw slash_worker=None (not yet
@@ -1108,11 +1358,12 @@ def test_session_create_close_race_does_not_orphan_worker(monkeypatch):
        if closed_workers:
            break
        import time
+
        time.sleep(0.02)

-    assert len(closed_workers) == 1, (
-        f"orphan worker was not cleaned up — closed_workers={closed_workers}"
-    )
+    assert (
+        len(closed_workers) == 1
+    ), f"orphan worker was not cleaned up — closed_workers={closed_workers}"
    # Notify may be unregistered by both session.close (unconditional)
    # and the orphan-cleanup path; the key guarantee is that the build
    # thread does at least one unregister call (any prior close
@@ -1146,21 +1397,33 @@ def test_session_create_no_race_keeps_worker_alive(monkeypatch):

    monkeypatch.setattr(server, "_make_agent", lambda sid, key: _FakeAgent())
    monkeypatch.setattr(server, "_SlashWorker", _FakeWorker)
-    monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None))
+    monkeypatch.setattr(
+        server,
+        "_get_db",
+        lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None),
+    )
    monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"})
    monkeypatch.setattr(server, "_probe_credentials", lambda _a: None)
    monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None)
    monkeypatch.setattr(server, "_emit", lambda *a, **kw: None)

    import tools.approval as _approval
+
    monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None)
-    monkeypatch.setattr(_approval, "unregister_gateway_notify",
-                        lambda key: unregistered_keys.append(key))
+    monkeypatch.setattr(
+        _approval,
+        "unregister_gateway_notify",
+        lambda key: unregistered_keys.append(key),
+    )
    monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None)

-    resp = server.handle_request({
-        "id": "1", "method": "session.create", "params": {"cols": 80},
-    })
+    resp = server.handle_request(
+        {
+            "id": "1",
+            "method": "session.create",
+            "params": {"cols": 80},
+        }
+    )
    sid = resp["result"]["session_id"]

    # Wait for the build to finish (ready event inside session dict).
@@ -1169,12 +1432,12 @@ def test_session_create_no_race_keeps_worker_alive(monkeypatch):

    # Build finished without a close race — nothing should have been
    # cleaned up by the orphan check.
-    assert closed_workers == [], (
-        f"build thread closed its own worker despite no race: {closed_workers}"
-    )
-    assert unregistered_keys == [], (
-        f"build thread unregistered its own notify despite no race: {unregistered_keys}"
-    )
+    assert (
+        closed_workers == []
+    ), f"build thread closed its own worker despite no race: {closed_workers}"
+    assert (
+        unregistered_keys == []
+    ), f"build thread unregistered its own notify despite no race: {unregistered_keys}"

    # Session should have the live worker installed.
    assert session.get("slash_worker") is not None
@@ -1,5 +1,6 @@
 """Tests for _parse_env_var and _get_env_config env-var validation."""

+import importlib
 import json
 from unittest.mock import patch

@@ -84,3 +85,23 @@ class TestParseEnvVar:
        with patch.dict("os.environ", {"TERMINAL_DOCKER_VOLUMES": "not json"}):
            with pytest.raises(ValueError, match="valid JSON"):
                _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON")
+
+
+class TestImportTimeEnvParsing:
+    """Module-level env parsing should never make terminal_tool unimportable."""
+
+    def test_invalid_foreground_timeout_falls_back_to_default(self):
+        try:
+            with patch.dict("os.environ", {"TERMINAL_MAX_FOREGROUND_TIMEOUT": "5m"}, clear=False):
+                mod = importlib.reload(_tt_mod)
+                assert mod.FOREGROUND_MAX_TIMEOUT == 600
+        finally:
+            importlib.reload(_tt_mod)
+
+    def test_invalid_disk_warning_threshold_falls_back_to_default(self):
+        try:
+            with patch.dict("os.environ", {"TERMINAL_DISK_WARNING_GB": "huge"}, clear=False):
+                mod = importlib.reload(_tt_mod)
+                assert mod.DISK_USAGE_WARNING_THRESHOLD_GB == 500.0
+        finally:
+            importlib.reload(_tt_mod)
@@ -3,7 +3,12 @@
 import socket
 from unittest.mock import patch

-from tools.url_safety import is_safe_url, _is_blocked_ip
+from tools.url_safety import (
+    is_safe_url,
+    _is_blocked_ip,
+    _global_allow_private_urls,
+    _reset_allow_private_cache,
+)

 import ipaddress
 import pytest
@@ -202,3 +207,189 @@ class TestIsBlockedIp:
    def test_allowed_ips(self, ip_str):
        ip = ipaddress.ip_address(ip_str)
        assert _is_blocked_ip(ip) is False, f"{ip_str} should be allowed"
+
+
+class TestGlobalAllowPrivateUrls:
+    """Tests for the security.allow_private_urls config toggle."""
+
+    @pytest.fixture(autouse=True)
+    def _reset_cache(self):
+        """Reset the module-level toggle cache before and after each test."""
+        _reset_allow_private_cache()
+        yield
+        _reset_allow_private_cache()
+
+    def test_default_is_false(self, monkeypatch):
+        """Toggle defaults to False when no env var or config is set."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        with patch("hermes_cli.config.read_raw_config", side_effect=Exception("no config")):
+            assert _global_allow_private_urls() is False
+
+    def test_env_var_true(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=true enables the toggle."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert _global_allow_private_urls() is True
+
+    def test_env_var_1(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=1 enables the toggle."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "1")
+        assert _global_allow_private_urls() is True
+
+    def test_env_var_yes(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=yes enables the toggle."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "yes")
+        assert _global_allow_private_urls() is True
+
+    def test_env_var_false(self, monkeypatch):
+        """HERMES_ALLOW_PRIVATE_URLS=false keeps it disabled."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "false")
+        assert _global_allow_private_urls() is False
+
+    def test_config_security_section(self, monkeypatch):
+        """security.allow_private_urls in config enables the toggle."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        cfg = {"security": {"allow_private_urls": True}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is True
+
+    def test_config_browser_fallback(self, monkeypatch):
+        """browser.allow_private_urls works as legacy fallback."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        cfg = {"browser": {"allow_private_urls": True}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is True
+
+    def test_config_security_takes_precedence_over_browser(self, monkeypatch):
+        """security section is checked before browser section."""
+        monkeypatch.delenv("HERMES_ALLOW_PRIVATE_URLS", raising=False)
+        cfg = {"security": {"allow_private_urls": True}, "browser": {"allow_private_urls": False}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is True
+
+    def test_env_var_overrides_config(self, monkeypatch):
+        """Env var takes priority over config."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "false")
+        cfg = {"security": {"allow_private_urls": True}}
+        with patch("hermes_cli.config.read_raw_config", return_value=cfg):
+            assert _global_allow_private_urls() is False
+
+    def test_result_is_cached(self, monkeypatch):
+        """Second call uses cached result, doesn't re-read config."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert _global_allow_private_urls() is True
+        # Change env after first call — should still be True (cached)
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "false")
+        assert _global_allow_private_urls() is True
+
+
+class TestAllowPrivateUrlsIntegration:
+    """Integration tests: is_safe_url respects the global toggle."""
+
+    @pytest.fixture(autouse=True)
+    def _reset_cache(self):
+        _reset_allow_private_cache()
+        yield
+        _reset_allow_private_cache()
+
+    def test_private_ip_allowed_when_toggle_on(self, monkeypatch):
+        """Private IPs pass is_safe_url when toggle is enabled."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("192.168.1.1", 0)),
+        ]):
+            assert is_safe_url("http://router.local") is True
+
+    def test_benchmark_ip_allowed_when_toggle_on(self, monkeypatch):
+        """198.18.x.x (benchmark/OpenWrt proxy range) passes when toggle is on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("198.18.23.183", 0)),
+        ]):
+            assert is_safe_url("https://nousresearch.com") is True
+
+    def test_cgnat_allowed_when_toggle_on(self, monkeypatch):
+        """CGNAT range (100.64.0.0/10) passes when toggle is on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("100.100.100.100", 0)),
+        ]):
+            assert is_safe_url("http://tailscale-peer.example/") is True
+
+    def test_localhost_allowed_when_toggle_on(self, monkeypatch):
+        """Even localhost passes when toggle is on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("127.0.0.1", 0)),
+        ]):
+            assert is_safe_url("http://localhost:8080/api") is True
+
+    # --- Cloud metadata always blocked regardless of toggle ---
+
+    def test_metadata_hostname_blocked_even_with_toggle(self, monkeypatch):
+        """metadata.google.internal is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert is_safe_url("http://metadata.google.internal/computeMetadata/v1/") is False
+
+    def test_metadata_goog_blocked_even_with_toggle(self, monkeypatch):
+        """metadata.goog is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert is_safe_url("http://metadata.goog/computeMetadata/v1/") is False
+
+    def test_metadata_ip_blocked_even_with_toggle(self, monkeypatch):
+        """169.254.169.254 (AWS/GCP metadata IP) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.169.254", 0)),
+        ]):
+            assert is_safe_url("http://169.254.169.254/latest/meta-data/") is False
+
+    def test_metadata_ipv6_blocked_even_with_toggle(self, monkeypatch):
+        """fd00:ec2::254 (AWS IPv6 metadata) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (10, 1, 6, "", ("fd00:ec2::254", 0, 0, 0)),
+        ]):
+            assert is_safe_url("http://[fd00:ec2::254]/latest/") is False
+
+    def test_ecs_metadata_blocked_even_with_toggle(self, monkeypatch):
+        """169.254.170.2 (AWS ECS task metadata) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.170.2", 0)),
+        ]):
+            assert is_safe_url("http://169.254.170.2/v2/credentials") is False
+
+    def test_alibaba_metadata_blocked_even_with_toggle(self, monkeypatch):
+        """100.100.100.200 (Alibaba Cloud metadata) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("100.100.100.200", 0)),
+        ]):
+            assert is_safe_url("http://100.100.100.200/latest/meta-data/") is False
+
+    def test_azure_wire_server_blocked_even_with_toggle(self, monkeypatch):
+        """169.254.169.253 (Azure IMDS wire server) is ALWAYS blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.169.253", 0)),
+        ]):
+            assert is_safe_url("http://169.254.169.253/") is False
+
+    def test_entire_link_local_blocked_even_with_toggle(self, monkeypatch):
+        """Any 169.254.x.x address is ALWAYS blocked (entire link-local range)."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.42.99", 0)),
+        ]):
+            assert is_safe_url("http://169.254.42.99/anything") is False
+
+    def test_dns_failure_still_blocked_with_toggle(self, monkeypatch):
+        """DNS failures are still blocked even with toggle on."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        with patch("socket.getaddrinfo", side_effect=socket.gaierror("fail")):
+            assert is_safe_url("https://nonexistent.example.com") is False
+
+    def test_empty_url_still_blocked_with_toggle(self, monkeypatch):
+        """Empty URLs are still blocked."""
+        monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
+        assert is_safe_url("") is False
@@ -1182,6 +1182,15 @@ def _run_browser_command(
        # used during CLI discovery.
        browser_env["PATH"] = _merge_browser_path(browser_env.get("PATH", ""))
        browser_env["AGENT_BROWSER_SOCKET_DIR"] = task_socket_dir
+
+        # Tell the agent-browser daemon to self-terminate after being idle
+        # for our configured inactivity timeout.  This is the daemon-side
+        # counterpart to our Python-side _cleanup_inactive_browser_sessions
+        # — the daemon kills itself and its Chrome children when no CLI
+        # commands arrive within the window.  Added in agent-browser 0.24.
+        if "AGENT_BROWSER_IDLE_TIMEOUT_MS" not in browser_env:
+            idle_ms = str(BROWSER_SESSION_INACTIVITY_TIMEOUT * 1000)
+            browser_env["AGENT_BROWSER_IDLE_TIMEOUT_MS"] = idle_ms
        
        # Use temp files for stdout/stderr instead of pipes.
        # agent-browser starts a background daemon that inherits file
@@ -72,11 +72,48 @@ from tools.tool_backend_helpers import (
 )


+def _safe_parse_import_env(
+    name: str,
+    default: Any,
+    converter,
+    type_label: str,
+):
+    """Parse module-level numeric env vars without breaking import.
+
+    Terminal tool is imported by CLI, ACP, tests, and tool discovery. A single
+    malformed env var must not make the whole module unloadable at import time.
+    """
+    raw = os.getenv(name)
+    if raw is None or raw == "":
+        return default
+    try:
+        return converter(raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "Invalid value for %s: %r (expected %s). Falling back to %r.",
+            name,
+            raw,
+            type_label,
+            default,
+        )
+        return default
+
+
 # Hard cap on foreground timeout; override via TERMINAL_MAX_FOREGROUND_TIMEOUT env var.
-FOREGROUND_MAX_TIMEOUT = int(os.getenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", "600"))
+FOREGROUND_MAX_TIMEOUT = _safe_parse_import_env(
+    "TERMINAL_MAX_FOREGROUND_TIMEOUT",
+    600,
+    int,
+    "integer",
+)

 # Disk usage warning threshold (in GB)
-DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500"))
+DISK_USAGE_WARNING_THRESHOLD_GB = _safe_parse_import_env(
+    "TERMINAL_DISK_WARNING_GB",
+    500.0,
+    float,
+    "number",
+)


 def _check_disk_usage_warning():
@@ -5,6 +5,13 @@ skill could trick the agent into fetching internal resources like cloud
 metadata endpoints (169.254.169.254), localhost services, or private
 network hosts.

+The check can be globally disabled via ``security.allow_private_urls: true``
+in config.yaml for environments where DNS resolves external domains to
+private/benchmark-range IPs (OpenWrt routers, corporate proxies, VPNs
+that use 198.18.0.0/15 or 100.64.0.0/10).  Even when disabled, cloud
+metadata hostnames (metadata.google.internal, 169.254.169.254) are
+**always** blocked — those are never legitimate agent targets.
+
 Limitations (documented, not fixable at pre-flight level):
  - DNS rebinding (TOCTOU): an attacker-controlled DNS server with TTL=0
    can return a public IP for the check, then a private IP for the actual
@@ -18,17 +25,35 @@ Limitations (documented, not fixable at pre-flight level):

 import ipaddress
 import logging
+import os
 import socket
 from urllib.parse import urlparse

 logger = logging.getLogger(__name__)

 # Hostnames that should always be blocked regardless of IP resolution
+# or any config toggle.  These are cloud metadata endpoints that an
+# attacker could use to steal instance credentials.
 _BLOCKED_HOSTNAMES = frozenset({
    "metadata.google.internal",
    "metadata.goog",
 })

+# IPs and networks that should always be blocked regardless of the
+# allow_private_urls toggle.  These are cloud metadata / credential
+# endpoints — the #1 SSRF target — and the link-local range where
+# they all live.
+_ALWAYS_BLOCKED_IPS = frozenset({
+    ipaddress.ip_address("169.254.169.254"),  # AWS/GCP/Azure/DO/Oracle metadata
+    ipaddress.ip_address("169.254.170.2"),     # AWS ECS task metadata (task IAM creds)
+    ipaddress.ip_address("169.254.169.253"),   # Azure IMDS wire server
+    ipaddress.ip_address("fd00:ec2::254"),     # AWS metadata (IPv6)
+    ipaddress.ip_address("100.100.100.200"),   # Alibaba Cloud metadata
+})
+_ALWAYS_BLOCKED_NETWORKS = (
+    ipaddress.ip_network("169.254.0.0/16"),    # Entire link-local range (no legit agent target)
+)
+
 # Exact HTTPS hostnames allowed to resolve to private/benchmark-space IPs.
 # This is intentionally narrow: QQ media downloads can legitimately resolve
 # to 198.18.0.0/15 behind local proxy/benchmark infrastructure.
@@ -42,6 +67,67 @@ _TRUSTED_PRIVATE_IP_HOSTS = frozenset({
 # VPNs, and some cloud internal networks.
 _CGNAT_NETWORK = ipaddress.ip_network("100.64.0.0/10")

+# ---------------------------------------------------------------------------
+# Global toggle: allow private/internal IP resolution
+# ---------------------------------------------------------------------------
+# Cached after first read so we don't hit the filesystem on every URL check.
+_allow_private_resolved = False
+_cached_allow_private: bool = False
+
+
+def _global_allow_private_urls() -> bool:
+    """Return True when the user has opted out of private-IP blocking.
+
+    Checks (in priority order):
+    1. ``HERMES_ALLOW_PRIVATE_URLS`` env var  (``true``/``1``/``yes``)
+    2. ``security.allow_private_urls`` in config.yaml
+    3. ``browser.allow_private_urls`` in config.yaml  (legacy / backward compat)
+
+    Result is cached for the process lifetime.
+    """
+    global _allow_private_resolved, _cached_allow_private
+    if _allow_private_resolved:
+        return _cached_allow_private
+
+    _allow_private_resolved = True
+    _cached_allow_private = False  # safe default
+
+    # 1. Env var override (highest priority)
+    env_val = os.getenv("HERMES_ALLOW_PRIVATE_URLS", "").strip().lower()
+    if env_val in ("true", "1", "yes"):
+        _cached_allow_private = True
+        return _cached_allow_private
+    if env_val in ("false", "0", "no"):
+        # Explicit false — don't fall through to config
+        return _cached_allow_private
+
+    # 2. Config file
+    try:
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
+        # security.allow_private_urls (preferred)
+        sec = cfg.get("security", {})
+        if isinstance(sec, dict) and sec.get("allow_private_urls"):
+            _cached_allow_private = True
+            return _cached_allow_private
+        # browser.allow_private_urls (legacy fallback)
+        browser = cfg.get("browser", {})
+        if isinstance(browser, dict) and browser.get("allow_private_urls"):
+            _cached_allow_private = True
+            return _cached_allow_private
+    except Exception:
+        # Config unavailable (e.g. tests, early import) — keep default
+        pass
+
+    return _cached_allow_private
+
+
+def _reset_allow_private_cache() -> None:
+    """Reset the cached toggle — only for tests."""
+    global _allow_private_resolved, _cached_allow_private
+    _allow_private_resolved = False
+    _cached_allow_private = False
+

 def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
    """Return True if the IP should be blocked for SSRF protection."""
@@ -65,6 +151,11 @@ def is_safe_url(url: str) -> bool:

    Resolves the hostname to an IP and checks against private ranges.
    Fails closed: DNS errors and unexpected exceptions block the request.
+
+    When ``security.allow_private_urls`` is enabled (or the env var
+    ``HERMES_ALLOW_PRIVATE_URLS=true``), private-IP blocking is skipped.
+    Cloud metadata endpoints (169.254.169.254, metadata.google.internal)
+    remain blocked regardless — they are never legitimate agent targets.
    """
    try:
        parsed = urlparse(url)
@@ -73,11 +164,14 @@ def is_safe_url(url: str) -> bool:
        if not hostname:
            return False

-        # Block known internal hostnames
+        # Block known internal hostnames — ALWAYS, even with toggle on
        if hostname in _BLOCKED_HOSTNAMES:
            logger.warning("Blocked request to internal hostname: %s", hostname)
            return False

+        # Check the global toggle AFTER blocking metadata hostnames
+        allow_all_private = _global_allow_private_urls()
+
        allow_private_ip = _allows_private_ip_resolution(hostname, scheme)

        # Try to resolve and check IP
@@ -96,14 +190,27 @@ def is_safe_url(url: str) -> bool:
            except ValueError:
                continue

-            if not allow_private_ip and _is_blocked_ip(ip):
+            # Always block cloud metadata IPs and link-local, even with toggle on
+            if ip in _ALWAYS_BLOCKED_IPS or any(ip in net for net in _ALWAYS_BLOCKED_NETWORKS):
+                logger.warning(
+                    "Blocked request to cloud metadata address: %s -> %s",
+                    hostname, ip_str,
+                )
+                return False
+
+            if not allow_all_private and not allow_private_ip and _is_blocked_ip(ip):
                logger.warning(
                    "Blocked request to private/internal address: %s -> %s",
                    hostname, ip_str,
                )
                return False

-        if allow_private_ip:
+        if allow_all_private:
+            logger.debug(
+                "Allowing private/internal resolution (security.allow_private_urls=true): %s",
+                hostname,
+            )
+        elif allow_private_ip:
            logger.debug(
                "Allowing trusted hostname despite private/internal resolution: %s",
                hostname,
@@ -2,6 +2,7 @@ import React, { type PropsWithChildren, useContext, useInsertionEffect } from 'r
 import { c as _c } from 'react/compiler-runtime'

 import instances from '../instances.js'
+import { CURSOR_HOME, ERASE_SCREEN, ERASE_SCROLLBACK } from '../termio/csi.js'
 import { DISABLE_MOUSE_TRACKING, ENABLE_MOUSE_TRACKING, ENTER_ALT_SCREEN, EXIT_ALT_SCREEN } from '../termio/dec.js'
 import { TerminalWriteContext } from '../useTerminalNotification.js'

@@ -51,7 +52,9 @@ export function AlternateScreen(t0: Props) {
        return
      }

-      writeRaw(ENTER_ALT_SCREEN + '\x1B[2J\x1B[H' + (mouseTracking ? ENABLE_MOUSE_TRACKING : ''))
+      writeRaw(
+        ENTER_ALT_SCREEN + ERASE_SCROLLBACK + ERASE_SCREEN + CURSOR_HOME + (mouseTracking ? ENABLE_MOUSE_TRACKING : '')
+      )
      ink?.setAltScreenActive(true, mouseTracking)

      return () => {
@@ -69,6 +69,12 @@ const memoizedStylesForWrap: Record<NonNullable<Styles['textWrap']>, Styles> = {
    flexDirection: 'row',
    textWrap: 'wrap'
  },
+  'wrap-char': {
+    flexGrow: 0,
+    flexShrink: 1,
+    flexDirection: 'row',
+    textWrap: 'wrap-char'
+  },
  'wrap-trim': {
    flexGrow: 0,
    flexShrink: 1,
@@ -343,7 +343,7 @@ function wrapWithSoftWrap(
  maxWidth: number,
  textWrap: Parameters<typeof wrapText>[2]
 ): { wrapped: string; softWrap: boolean[] | undefined } {
-  if (textWrap !== 'wrap' && textWrap !== 'wrap-trim') {
+  if (textWrap !== 'wrap' && textWrap !== 'wrap-char' && textWrap !== 'wrap-trim') {
    return {
      wrapped: wrapText(plainText, maxWidth, textWrap),
      softWrap: undefined
@@ -55,6 +55,7 @@ export type TextStyles = {
 export type Styles = {
  readonly textWrap?:
    | 'wrap'
+    | 'wrap-char'
    | 'wrap-trim'
    | 'end'
    | 'middle'
@@ -50,6 +50,10 @@ export default function wrapText(text: string, maxWidth: number, wrapType: Style
    })
  }

+  if (wrapType === 'wrap-char') {
+    return wrapAnsi(text, maxWidth, { trim: false, hard: true, wordWrap: false })
+  }
+
  if (wrapType === 'wrap-trim') {
    return wrapAnsi(text, maxWidth, {
      trim: true,
@@ -30,3 +30,22 @@ describe('platform action modifier', () => {
    expect(isActionMod({ ctrl: false, meta: false, super: true })).toBe(false)
  })
 })
+
+describe('isMacActionFallback', () => {
+  it('routes raw Ctrl+K and Ctrl+W to readline kill-to-end / delete-word on macOS', async () => {
+    const { isMacActionFallback } = await importPlatform('darwin')
+
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'k', 'k')).toBe(true)
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'w', 'w')).toBe(true)
+    // Must not fire when Cmd (meta/super) is held — those are distinct chords.
+    expect(isMacActionFallback({ ctrl: true, meta: true, super: false }, 'k', 'k')).toBe(false)
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: true }, 'w', 'w')).toBe(false)
+  })
+
+  it('is a no-op on non-macOS (Linux routes Ctrl+K/W through isActionMod directly)', async () => {
+    const { isMacActionFallback } = await importPlatform('linux')
+
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'k', 'k')).toBe(false)
+    expect(isMacActionFallback({ ctrl: true, meta: false, super: false }, 'w', 'w')).toBe(false)
+  })
+})
@@ -0,0 +1,407 @@
+import { describe, expect, it } from 'vitest'
+
+import {
+  buildSubagentTree,
+  descendantIds,
+  flattenTree,
+  fmtCost,
+  fmtDuration,
+  fmtTokens,
+  formatSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  topLevelSubagents,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
+import type { SubagentProgress } from '../types.js'
+
+const makeItem = (overrides: Partial<SubagentProgress> & Pick<SubagentProgress, 'id' | 'index'>): SubagentProgress => ({
+  depth: 0,
+  goal: overrides.id,
+  notes: [],
+  parentId: null,
+  status: 'running',
+  taskCount: 1,
+  thinking: [],
+  toolCount: 0,
+  tools: [],
+  ...overrides
+})
+
+describe('aggregate: tokens, cost, files, hotness', () => {
+  it('sums tokens and cost across subtree', () => {
+    const items = [
+      makeItem({ costUsd: 0.01, id: 'p', index: 0, inputTokens: 1000, outputTokens: 500 }),
+      makeItem({
+        costUsd: 0.005,
+        depth: 1,
+        id: 'c1',
+        index: 0,
+        inputTokens: 500,
+        outputTokens: 100,
+        parentId: 'p'
+      }),
+      makeItem({
+        costUsd: 0.008,
+        depth: 1,
+        id: 'c2',
+        index: 1,
+        inputTokens: 300,
+        outputTokens: 200,
+        parentId: 'p'
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate).toMatchObject({
+      costUsd: 0.023,
+      inputTokens: 1800,
+      outputTokens: 800
+    })
+  })
+
+  it('counts files read + written across subtree', () => {
+    const items = [
+      makeItem({ filesRead: ['a.ts', 'b.ts'], id: 'p', index: 0 }),
+      makeItem({ depth: 1, filesWritten: ['c.ts'], id: 'c', index: 0, parentId: 'p' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.filesTouched).toBe(3)
+  })
+
+  it('hotness = totalTools / totalDuration', () => {
+    const items = [
+      makeItem({
+        durationSeconds: 10,
+        id: 'p',
+        index: 0,
+        status: 'completed',
+        toolCount: 20
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.hotness).toBeCloseTo(2)
+  })
+
+  it('hotness is zero when duration is zero', () => {
+    const items = [makeItem({ id: 'p', index: 0, toolCount: 10 })]
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.hotness).toBe(0)
+  })
+})
+
+describe('hotnessBucket + peakHotness', () => {
+  it('peakHotness walks subtree', () => {
+    const items = [
+      makeItem({ durationSeconds: 100, id: 'p', index: 0, status: 'completed', toolCount: 1 }),
+      makeItem({
+        depth: 1,
+        durationSeconds: 1,
+        id: 'c',
+        index: 0,
+        parentId: 'p',
+        status: 'completed',
+        toolCount: 5
+      })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(peakHotness(tree)).toBeGreaterThan(2)
+  })
+
+  it('hotnessBucket clamps and normalizes', () => {
+    expect(hotnessBucket(0, 10, 4)).toBe(0)
+    expect(hotnessBucket(10, 10, 4)).toBe(3)
+    expect(hotnessBucket(5, 10, 4)).toBe(2)
+    expect(hotnessBucket(100, 10, 4)).toBe(3) // clamped
+    expect(hotnessBucket(5, 0, 4)).toBe(0) // guard against divide-by-zero
+  })
+})
+
+describe('fmtCost + fmtTokens', () => {
+  it('fmtCost handles ranges', () => {
+    expect(fmtCost(0)).toBe('')
+    expect(fmtCost(0.001)).toBe('<$0.01')
+    expect(fmtCost(0.42)).toBe('$0.42')
+    expect(fmtCost(1.23)).toBe('$1.23')
+    expect(fmtCost(12.5)).toBe('$12.5')
+  })
+
+  it('fmtTokens handles ranges', () => {
+    expect(fmtTokens(0)).toBe('0')
+    expect(fmtTokens(542)).toBe('542')
+    expect(fmtTokens(1234)).toBe('1.2k')
+    expect(fmtTokens(45678)).toBe('46k')
+  })
+})
+
+describe('formatSummary with tokens + cost', () => {
+  it('includes token + cost when present', () => {
+    expect(
+      formatSummary({
+        activeCount: 0,
+        costUsd: 0.42,
+        descendantCount: 3,
+        filesTouched: 0,
+        hotness: 0,
+        inputTokens: 8000,
+        maxDepthFromHere: 2,
+        outputTokens: 2000,
+        totalDuration: 30,
+        totalTools: 14
+      })
+    ).toBe('d2 · 3 agents · 14 tools · 30s · 10k tok · $0.42')
+  })
+})
+
+describe('buildSubagentTree', () => {
+  it('returns empty list for empty input', () => {
+    expect(buildSubagentTree([])).toEqual([])
+  })
+
+  it('treats flat list as top-level when no parentId is given', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 }), makeItem({ id: 'c', index: 2 })]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(3)
+    expect(tree.map(n => n.item.id)).toEqual(['a', 'b', 'c'])
+    expect(tree.every(n => n.children.length === 0)).toBe(true)
+  })
+
+  it('nests children under their parent by subagent_id', () => {
+    const items = [
+      makeItem({ id: 'parent', index: 0 }),
+      makeItem({ depth: 1, id: 'child-1', index: 0, parentId: 'parent' }),
+      makeItem({ depth: 1, id: 'child-2', index: 1, parentId: 'parent' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(1)
+    expect(tree[0]!.children).toHaveLength(2)
+    expect(tree[0]!.children.map(n => n.item.id)).toEqual(['child-1', 'child-2'])
+  })
+
+  it('builds multi-level nesting', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0 }),
+      makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' }),
+      makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.children[0]!.children[0]!.item.id).toBe('gc')
+    expect(tree[0]!.aggregate.maxDepthFromHere).toBe(2)
+    expect(tree[0]!.aggregate.descendantCount).toBe(2)
+  })
+
+  it('promotes orphaned children (missing parent) to top level', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
+
+    const tree = buildSubagentTree(items)
+    expect(tree).toHaveLength(2)
+    expect(tree.map(n => n.item.id)).toEqual(['a', 'orphan'])
+  })
+
+  it('stable sort: children ordered by (depth, index) not insert order', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0 }),
+      makeItem({ depth: 1, id: 'c3', index: 2, parentId: 'p' }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.children.map(n => n.item.id)).toEqual(['c1', 'c2', 'c3'])
+  })
+})
+
+describe('aggregate', () => {
+  it('sums tool counts and durations across subtree', () => {
+    const items = [
+      makeItem({ durationSeconds: 10, id: 'p', index: 0, status: 'completed', toolCount: 5 }),
+      makeItem({ depth: 1, durationSeconds: 4, id: 'c1', index: 0, parentId: 'p', status: 'completed', toolCount: 3 }),
+      makeItem({ depth: 1, durationSeconds: 2, id: 'c2', index: 1, parentId: 'p', status: 'completed', toolCount: 1 })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate).toMatchObject({
+      activeCount: 0,
+      descendantCount: 2,
+      totalDuration: 16,
+      totalTools: 9
+    })
+  })
+
+  it('counts queued + running as active', () => {
+    const items = [
+      makeItem({ id: 'p', index: 0, status: 'running' }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p', status: 'queued' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p', status: 'completed' })
+    ]
+
+    const tree = buildSubagentTree(items)
+    expect(tree[0]!.aggregate.activeCount).toBe(2)
+  })
+})
+
+describe('widthByDepth', () => {
+  it('returns empty array for empty tree', () => {
+    expect(widthByDepth([])).toEqual([])
+  })
+
+  it('tallies nodes at each depth', () => {
+    const items = [
+      makeItem({ id: 'p1', index: 0 }),
+      makeItem({ id: 'p2', index: 1 }),
+      makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p1' }),
+      makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p1' }),
+      makeItem({ depth: 1, id: 'c3', index: 0, parentId: 'p2' }),
+      makeItem({ depth: 2, id: 'gc1', index: 0, parentId: 'c1' })
+    ]
+
+    expect(widthByDepth(buildSubagentTree(items))).toEqual([2, 3, 1])
+  })
+})
+
+describe('treeTotals', () => {
+  it('folds a full tree into a single rollup', () => {
+    const items = [
+      makeItem({ id: 'p1', index: 0, toolCount: 5 }),
+      makeItem({ id: 'p2', index: 1, toolCount: 2 }),
+      makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p1', toolCount: 3 })
+    ]
+
+    const totals = treeTotals(buildSubagentTree(items))
+    expect(totals.descendantCount).toBe(3)
+    expect(totals.totalTools).toBe(10)
+    expect(totals.maxDepthFromHere).toBe(2)
+  })
+
+  it('returns zeros for empty tree', () => {
+    expect(treeTotals([])).toEqual({
+      activeCount: 0,
+      costUsd: 0,
+      descendantCount: 0,
+      filesTouched: 0,
+      hotness: 0,
+      inputTokens: 0,
+      maxDepthFromHere: 0,
+      outputTokens: 0,
+      totalDuration: 0,
+      totalTools: 0
+    })
+  })
+})
+
+describe('flattenTree + descendantIds', () => {
+  const items = [
+    makeItem({ id: 'p', index: 0 }),
+    makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
+    makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c1' }),
+    makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
+  ]
+
+  it('flattens in visit order (depth-first, pre-order)', () => {
+    const tree = buildSubagentTree(items)
+    expect(flattenTree(tree).map(n => n.item.id)).toEqual(['p', 'c1', 'gc', 'c2'])
+  })
+
+  it('collects descendant ids excluding the node itself', () => {
+    const tree = buildSubagentTree(items)
+    expect(descendantIds(tree[0]!)).toEqual(['c1', 'gc', 'c2'])
+  })
+})
+
+describe('sparkline', () => {
+  it('returns empty string for empty input', () => {
+    expect(sparkline([])).toBe('')
+  })
+
+  it('renders zeroes as spaces (not bottom glyph)', () => {
+    expect(sparkline([0, 0])).toBe('  ')
+  })
+
+  it('scales to the max value', () => {
+    const out = sparkline([1, 8])
+    expect(out).toHaveLength(2)
+    expect(out[1]).toBe('█')
+  })
+
+  it('sparse widths render as expected', () => {
+    const out = sparkline([2, 3, 7, 4])
+    expect(out).toHaveLength(4)
+    expect([...out].every(ch => /[\s▁-█]/.test(ch))).toBe(true)
+  })
+})
+
+describe('formatSummary', () => {
+  const emptyTotals = {
+    activeCount: 0,
+    costUsd: 0,
+    descendantCount: 0,
+    filesTouched: 0,
+    hotness: 0,
+    inputTokens: 0,
+    maxDepthFromHere: 0,
+    outputTokens: 0,
+    totalDuration: 0,
+    totalTools: 0
+  }
+
+  it('collapses zero-valued components', () => {
+    expect(formatSummary({ ...emptyTotals, descendantCount: 1 })).toBe('d0 · 1 agent')
+  })
+
+  it('emits rich summary with all pieces', () => {
+    expect(
+      formatSummary({
+        ...emptyTotals,
+        activeCount: 2,
+        descendantCount: 7,
+        maxDepthFromHere: 3,
+        totalDuration: 134,
+        totalTools: 124
+      })
+    ).toBe('d3 · 7 agents · 124 tools · 2m 14s · ⚡2')
+  })
+})
+
+describe('fmtDuration', () => {
+  it('formats under a minute as plain seconds', () => {
+    expect(fmtDuration(0)).toBe('0s')
+    expect(fmtDuration(42)).toBe('42s')
+    expect(fmtDuration(59.4)).toBe('59s')
+  })
+
+  it('formats whole minutes without trailing seconds', () => {
+    expect(fmtDuration(60)).toBe('1m')
+    expect(fmtDuration(180)).toBe('3m')
+  })
+
+  it('mixes minutes and seconds', () => {
+    expect(fmtDuration(134)).toBe('2m 14s')
+    expect(fmtDuration(605)).toBe('10m 5s')
+  })
+})
+
+describe('topLevelSubagents', () => {
+  it('returns items with no parent', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 })]
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'b'])
+  })
+
+  it('excludes children whose parent is present', () => {
+    const items = [makeItem({ id: 'p', index: 0 }), makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' })]
+
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['p'])
+  })
+
+  it('promotes orphans whose parent is missing', () => {
+    const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
+    expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'orphan'])
+  })
+})
@@ -0,0 +1,60 @@
+import { describe, expect, it } from 'vitest'
+
+import { cursorLayout, offsetFromPosition } from '../components/textInput.js'
+
+describe('cursorLayout — char-wrap parity with wrap-ansi', () => {
+  it('places cursor mid-line at its column', () => {
+    expect(cursorLayout('hello world', 6, 40)).toEqual({ column: 6, line: 0 })
+  })
+
+  it('places cursor at end of a non-full line', () => {
+    expect(cursorLayout('hi', 2, 10)).toEqual({ column: 2, line: 0 })
+  })
+
+  it('wraps to next line when cursor lands exactly at the right edge', () => {
+    // 8 chars on an 8-col line: text fills the row exactly; the cursor's
+    // inverted-space cell overflows to col 0 of the next row.
+    expect(cursorLayout('abcdefgh', 8, 8)).toEqual({ column: 0, line: 1 })
+  })
+
+  it('tracks a word across a char-wrap boundary without jumping', () => {
+    // With wordWrap:false, "hello world" at cols=8 is "hello wo\nrld" —
+    // typing incremental letters doesn't reshuffle the word across lines.
+    expect(cursorLayout('hello wo', 8, 8)).toEqual({ column: 0, line: 1 })
+    expect(cursorLayout('hello wor', 9, 8)).toEqual({ column: 1, line: 1 })
+    expect(cursorLayout('hello worl', 10, 8)).toEqual({ column: 2, line: 1 })
+  })
+
+  it('honours explicit newlines', () => {
+    expect(cursorLayout('one\ntwo', 5, 40)).toEqual({ column: 1, line: 1 })
+    expect(cursorLayout('one\ntwo', 4, 40)).toEqual({ column: 0, line: 1 })
+  })
+
+  it('does not wrap when cursor is before the right edge', () => {
+    expect(cursorLayout('abcdefg', 7, 8)).toEqual({ column: 7, line: 0 })
+  })
+})
+
+describe('offsetFromPosition — char-wrap inverse of cursorLayout', () => {
+  it('returns 0 for empty input', () => {
+    expect(offsetFromPosition('', 0, 0, 10)).toBe(0)
+  })
+
+  it('maps clicks within a single line', () => {
+    expect(offsetFromPosition('hello', 0, 3, 40)).toBe(3)
+  })
+
+  it('maps clicks past end to value length', () => {
+    expect(offsetFromPosition('hi', 0, 10, 40)).toBe(2)
+  })
+
+  it('maps clicks on a wrapped second row at cols boundary', () => {
+    // "abcdefghij" at cols=8 wraps to "abcdefgh\nij" — click at row 1 col 0
+    // should land on 'i' (offset 8).
+    expect(offsetFromPosition('abcdefghij', 1, 0, 8)).toBe(8)
+  })
+
+  it('maps clicks past a \\n into the target line', () => {
+    expect(offsetFromPosition('one\ntwo', 1, 2, 40)).toBe(6)
+  })
+})
@@ -1,7 +1,7 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest'

 import { $uiState, resetUiState } from '../app/uiStore.js'
-import { applyDisplay } from '../app/useConfigSync.js'
+import { applyDisplay, normalizeStatusBar } from '../app/useConfigSync.js'

 describe('applyDisplay', () => {
  beforeEach(() => {
@@ -36,10 +36,20 @@ describe('applyDisplay', () => {
    expect(s.inlineDiffs).toBe(false)
    expect(s.showCost).toBe(true)
    expect(s.showReasoning).toBe(true)
-    expect(s.statusBar).toBe(false)
+    expect(s.statusBar).toBe('off')
    expect(s.streaming).toBe(false)
  })

+  it('coerces legacy true + "on" alias to top', () => {
+    const setBell = vi.fn()
+
+    applyDisplay({ config: { display: { tui_statusbar: true as unknown as 'on' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('top')
+
+    applyDisplay({ config: { display: { tui_statusbar: 'on' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('top')
+  })
+
  it('applies v1 parity defaults when display fields are missing', () => {
    const setBell = vi.fn()

@@ -50,7 +60,7 @@ describe('applyDisplay', () => {
    expect(s.inlineDiffs).toBe(true)
    expect(s.showCost).toBe(false)
    expect(s.showReasoning).toBe(false)
-    expect(s.statusBar).toBe(true)
+    expect(s.statusBar).toBe('top')
    expect(s.streaming).toBe(true)
  })

@@ -64,4 +74,42 @@ describe('applyDisplay', () => {
    expect(s.inlineDiffs).toBe(true)
    expect(s.streaming).toBe(true)
  })
+
+  it('accepts the new string statusBar modes', () => {
+    const setBell = vi.fn()
+
+    applyDisplay({ config: { display: { tui_statusbar: 'bottom' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('bottom')
+
+    applyDisplay({ config: { display: { tui_statusbar: 'top' } } }, setBell)
+    expect($uiState.get().statusBar).toBe('top')
+  })
+})
+
+describe('normalizeStatusBar', () => {
+  it('maps legacy bool + on alias to top/off', () => {
+    expect(normalizeStatusBar(true)).toBe('top')
+    expect(normalizeStatusBar(false)).toBe('off')
+    expect(normalizeStatusBar('on')).toBe('top')
+  })
+
+  it('passes through the canonical enum', () => {
+    expect(normalizeStatusBar('off')).toBe('off')
+    expect(normalizeStatusBar('top')).toBe('top')
+    expect(normalizeStatusBar('bottom')).toBe('bottom')
+  })
+
+  it('defaults missing/unknown values to top', () => {
+    expect(normalizeStatusBar(undefined)).toBe('top')
+    expect(normalizeStatusBar(null)).toBe('top')
+    expect(normalizeStatusBar('sideways')).toBe('top')
+    expect(normalizeStatusBar(42)).toBe('top')
+  })
+
+  it('trims whitespace and folds case', () => {
+    expect(normalizeStatusBar(' Bottom ')).toBe('bottom')
+    expect(normalizeStatusBar('TOP')).toBe('top')
+    expect(normalizeStatusBar('  on  ')).toBe('top')
+    expect(normalizeStatusBar('OFF')).toBe('off')
+  })
 })
@@ -1,11 +1,13 @@
 import { STREAM_BATCH_MS } from '../config/timing.js'
 import { buildSetupRequiredSections, SETUP_REQUIRED_TITLE } from '../content/setup.js'
-import type { CommandsCatalogResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
+import type { CommandsCatalogResponse, DelegationStatusResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
 import { rpcErrorMessage } from '../lib/rpc.js'
+import { topLevelSubagents } from '../lib/subagentTree.js'
 import { formatToolCall, stripAnsi } from '../lib/text.js'
 import { fromSkin } from '../theme.js'
 import type { Msg, SubagentProgress } from '../types.js'

+import { applyDelegationStatus, getDelegationState } from './delegationStore.js'
 import type { GatewayEventHandlerContext } from './interfaces.js'
 import { patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
@@ -53,6 +55,55 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
  let pendingThinkingStatus = ''
  let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null

+  // Inject the disk-save callback into turnController so recordMessageComplete
+  // can fire-and-forget a persist without having to plumb a gateway ref around.
+  turnController.persistSpawnTree = async (subagents, sessionId) => {
+    try {
+      const startedAt = subagents.reduce<number>((min, s) => {
+        if (!s.startedAt) {
+          return min
+        }
+
+        return min === 0 ? s.startedAt : Math.min(min, s.startedAt)
+      }, 0)
+
+      const top = topLevelSubagents(subagents)
+        .map(s => s.goal)
+        .filter(Boolean)
+        .slice(0, 2)
+
+      const label = top.length ? top.join(' · ') : `${subagents.length} subagents`
+
+      await rpc('spawn_tree.save', {
+        finished_at: Date.now() / 1000,
+        label: label.slice(0, 120),
+        session_id: sessionId ?? 'default',
+        started_at: startedAt ? startedAt / 1000 : null,
+        subagents
+      })
+    } catch {
+      // Persistence is best-effort; in-memory history is the authoritative
+      // same-session source.  A write failure doesn't block the turn.
+    }
+  }
+
+  // Refresh delegation caps at most every 5s so the status bar HUD can
+  // render a /warning close to the configured cap without spamming the RPC.
+  let lastDelegationFetchAt = 0
+
+  const refreshDelegationStatus = (force = false) => {
+    const now = Date.now()
+
+    if (!force && now - lastDelegationFetchAt < 5000) {
+      return
+    }
+
+    lastDelegationFetchAt = now
+    rpc<DelegationStatusResponse>('delegation.status', {})
+      .then(r => applyDelegationStatus(r))
+      .catch(() => {})
+  }
+
  const setStatus = (status: string) => {
    pendingThinkingStatus = ''

@@ -85,7 +136,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
    }, ms)
  }

-  const keepCompletedElseRunning = (s: SubagentProgress['status']) => (s === 'completed' ? s : 'running')
+  // Terminal statuses are never overwritten by late-arriving live events —
+  // otherwise a stale `subagent.start` / `spawn_requested` can clobber a
+  // `failed` or `interrupted` terminal state (Copilot review #14045).
+  const isTerminalStatus = (s: SubagentProgress['status']) => s === 'completed' || s === 'failed' || s === 'interrupted'
+
+  const keepTerminalElseRunning = (s: SubagentProgress['status']) => (isTerminalStatus(s) ? s : 'running')

  const handleReady = (skin?: GatewaySkin) => {
    if (skin) {
@@ -260,32 +316,28 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
        turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')

        return
+      case 'tool.complete': {
+        const inlineDiffText =
+          ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''

-      case 'tool.complete':
-        {
-          const inlineDiffText =
-            ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
-
-          turnController.recordToolComplete(
-            ev.payload.tool_id,
-            ev.payload.name,
-            ev.payload.error,
-            inlineDiffText ? '' : ev.payload.summary
-          )
-
-          if (!inlineDiffText) {
-            return
-          }
-
-          // Keep inline diffs attached to the assistant completion body so
-          // they render in the same message flow, not as a standalone system
-          // artifact that can look out-of-place around tool rows.
-          turnController.queueInlineDiff(inlineDiffText)
+        turnController.recordToolComplete(
+          ev.payload.tool_id,
+          ev.payload.name,
+          ev.payload.error,
+          inlineDiffText ? '' : ev.payload.summary
+        )

+        if (!inlineDiffText) {
          return
        }

+        // Keep inline diffs attached to the assistant completion body so
+        // they render in the same message flow, not as a standalone system
+        // artifact that can look out-of-place around tool rows.
+        turnController.queueInlineDiff(inlineDiffText)
+
        return
+      }

      case 'clarify.request':
        patchOverlayState({
@@ -329,8 +381,23 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:

        return

+      case 'subagent.spawn_requested':
+        // Child built but not yet running (waiting on ThreadPoolExecutor slot).
+        // Preserve completed state if a later event races in before this one.
+        turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'queued' }))
+
+        // Prime the status-bar HUD: fetch caps (once every 5s) so we can
+        // warn as depth/concurrency approaches the configured ceiling.
+        if (getDelegationState().maxSpawnDepth === null) {
+          refreshDelegationStatus(true)
+        } else {
+          refreshDelegationStatus()
+        }
+
+        return
+
      case 'subagent.start':
-        turnController.upsertSubagent(ev.payload, () => ({ status: 'running' }))
+        turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'running' }))

        return
      case 'subagent.thinking': {
@@ -340,10 +407,16 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
          return
        }

-        turnController.upsertSubagent(ev.payload, c => ({
-          status: keepCompletedElseRunning(c.status),
-          thinking: pushThinking(c.thinking, text)
-        }))
+        // Update-only: never resurrect subagents whose spawn_requested/start
+        // we missed or that already flushed via message.complete.
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            status: keepTerminalElseRunning(c.status),
+            thinking: pushThinking(c.thinking, text)
+          }),
+          { createIfMissing: false }
+        )

        return
      }
@@ -354,10 +427,14 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
          ev.payload.tool_preview ?? ev.payload.text ?? ''
        )

-        turnController.upsertSubagent(ev.payload, c => ({
-          status: keepCompletedElseRunning(c.status),
-          tools: pushTool(c.tools, line)
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            status: keepTerminalElseRunning(c.status),
+            tools: pushTool(c.tools, line)
+          }),
+          { createIfMissing: false }
+        )

        return
      }
@@ -369,20 +446,28 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
          return
        }

-        turnController.upsertSubagent(ev.payload, c => ({
-          notes: pushNote(c.notes, text),
-          status: keepCompletedElseRunning(c.status)
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            notes: pushNote(c.notes, text),
+            status: keepTerminalElseRunning(c.status)
+          }),
+          { createIfMissing: false }
+        )

        return
      }

      case 'subagent.complete':
-        turnController.upsertSubagent(ev.payload, c => ({
-          durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
-          status: ev.payload.status ?? 'completed',
-          summary: ev.payload.summary || ev.payload.text || c.summary
-        }))
+        turnController.upsertSubagent(
+          ev.payload,
+          c => ({
+            durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
+            status: ev.payload.status ?? 'completed',
+            summary: ev.payload.summary || ev.payload.text || c.summary
+          }),
+          { createIfMissing: false }
+        )

        return

@@ -0,0 +1,77 @@
+import { atom } from 'nanostores'
+
+import type { DelegationStatusResponse } from '../gatewayTypes.js'
+
+export interface DelegationState {
+  // Last known caps from `delegation.status` RPC.  null until fetched.
+  maxConcurrentChildren: null | number
+  maxSpawnDepth: null | number
+  // True when spawning is globally paused (see tools/delegate_tool.py).
+  paused: boolean
+  // Monotonic clock of the last successful status fetch.
+  updatedAt: null | number
+}
+
+const buildState = (): DelegationState => ({
+  maxConcurrentChildren: null,
+  maxSpawnDepth: null,
+  paused: false,
+  updatedAt: null
+})
+
+export const $delegationState = atom<DelegationState>(buildState())
+
+export const getDelegationState = () => $delegationState.get()
+
+export const patchDelegationState = (next: Partial<DelegationState>) =>
+  $delegationState.set({ ...$delegationState.get(), ...next })
+
+export const resetDelegationState = () => $delegationState.set(buildState())
+
+// ── Overlay accordion open-state ──────────────────────────────────────
+//
+// Lifted out of OverlaySection's local useState so collapse choices
+// survive:
+//   - navigating to a different subagent (Detail remounts)
+//   - switching list ↔ detail mode (Detail unmounts in list mode)
+//   - walking history (←/→)
+// Keyed by section title; missing entries fall back to the section's
+// `defaultOpen` prop.
+
+export const $overlaySectionsOpen = atom<Record<string, boolean>>({})
+
+export const toggleOverlaySection = (title: string, defaultOpen: boolean) => {
+  const state = $overlaySectionsOpen.get()
+  const current = title in state ? state[title]! : defaultOpen
+
+  $overlaySectionsOpen.set({ ...state, [title]: !current })
+}
+
+export const getOverlaySectionOpen = (title: string, defaultOpen: boolean): boolean => {
+  const state = $overlaySectionsOpen.get()
+
+  return title in state ? state[title]! : defaultOpen
+}
+
+/** Merge a raw RPC response into the store.  Tolerant of partial/omitted fields. */
+export const applyDelegationStatus = (r: DelegationStatusResponse | null | undefined) => {
+  if (!r) {
+    return
+  }
+
+  const patch: Partial<DelegationState> = { updatedAt: Date.now() }
+
+  if (typeof r.max_spawn_depth === 'number') {
+    patch.maxSpawnDepth = r.max_spawn_depth
+  }
+
+  if (typeof r.max_concurrent_children === 'number') {
+    patch.maxConcurrentChildren = r.max_concurrent_children
+  }
+
+  if (typeof r.paused === 'boolean') {
+    patch.paused = r.paused
+  }
+
+  patchDelegationState(patch)
+}
@@ -27,6 +27,8 @@ export interface StateSetter<T> {
  (value: SetStateAction<T>): void
 }

+export type StatusBarMode = 'bottom' | 'off' | 'top'
+
 export interface SelectionApi {
  clearSelection: () => void
  copySelection: () => string
@@ -53,6 +55,8 @@ export interface GatewayProviderProps {
 }

 export interface OverlayState {
+  agents: boolean
+  agentsInitialHistoryIndex: number
  approval: ApprovalReq | null
  clarify: ClarifyReq | null
  confirm: ConfirmReq | null
@@ -87,7 +91,7 @@ export interface UiState {
  showReasoning: boolean
  sid: null | string
  status: string
-  statusBar: boolean
+  statusBar: StatusBarMode
  streaming: boolean
  theme: Theme
  usage: Usage
@@ -3,6 +3,8 @@ import { atom, computed } from 'nanostores'
 import type { OverlayState } from './interfaces.js'

 const buildOverlayState = (): OverlayState => ({
+  agents: false,
+  agentsInitialHistoryIndex: 0,
  approval: null,
  clarify: null,
  confirm: null,
@@ -18,8 +20,8 @@ export const $overlayState = atom<OverlayState>(buildOverlayState())

 export const $isBlocked = computed(
  $overlayState,
-  ({ approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
-    Boolean(approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
+  ({ agents, approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
+    Boolean(agents || approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
 )

 export const getOverlayState = () => $overlayState.get()
@@ -27,4 +29,23 @@ export const getOverlayState = () => $overlayState.get()
 export const patchOverlayState = (next: Partial<OverlayState> | ((state: OverlayState) => OverlayState)) =>
  $overlayState.set(typeof next === 'function' ? next($overlayState.get()) : { ...$overlayState.get(), ...next })

+/** Full reset — used by session/turn teardown and tests. */
 export const resetOverlayState = () => $overlayState.set(buildOverlayState())
+
+/**
+ * Soft reset: drop FLOW-scoped overlays (approval / clarify / confirm / sudo
+ * / secret / pager) but PRESERVE user-toggled ones — agents dashboard, model
+ * picker, skills hub, session picker.  Those are opened deliberately and
+ * shouldn't vanish when a turn ends.  Called from turnController.idle() on
+ * every turn completion / interrupt; the old "reset everything" behaviour
+ * silently closed /agents the moment delegation finished.
+ */
+export const resetFlowOverlays = () =>
+  $overlayState.set({
+    ...buildOverlayState(),
+    agents: $overlayState.get().agents,
+    agentsInitialHistoryIndex: $overlayState.get().agentsInitialHistoryIndex,
+    modelPicker: $overlayState.get().modelPicker,
+    picker: $overlayState.get().picker,
+    skillsHub: $overlayState.get().skillsHub
+  })
@@ -11,6 +11,7 @@ import type {
 import { writeOsc52Clipboard } from '../../../lib/osc52.js'
 import { configureDetectedTerminalKeybindings, configureTerminalKeybindings } from '../../../lib/terminalSetup.js'
 import type { DetailsMode, Msg, PanelSection } from '../../../types.js'
+import type { StatusBarMode } from '../../interfaces.js'
 import { patchOverlayState } from '../../overlayStore.js'
 import { patchUiState } from '../../uiStore.js'
 import type { SlashCommand } from '../types.js'
@@ -305,19 +306,29 @@ export const coreCommands: SlashCommand[] = [

  {
    aliases: ['sb'],
-    help: 'toggle status bar',
+    help: 'status bar position (on|off|top|bottom)',
    name: 'statusbar',
    run: (arg, ctx) => {
-      const next = flagFromArg(arg, ctx.ui.statusBar)
+      const mode = arg.trim().toLowerCase()
+      const toggle: StatusBarMode = ctx.ui.statusBar === 'off' ? 'top' : 'off'

-      if (next === null) {
-        return ctx.transcript.sys('usage: /statusbar [on|off|toggle]')
+      const next: null | StatusBarMode =
+        !mode || mode === 'toggle'
+          ? toggle
+          : mode === 'on' || mode === 'top'
+            ? 'top'
+            : mode === 'off' || mode === 'bottom'
+              ? mode
+              : null
+
+      if (!next) {
+        return ctx.transcript.sys('usage: /statusbar [on|off|top|bottom|toggle]')
      }

      patchUiState({ statusBar: next })
-      ctx.gateway.rpc<ConfigSetResponse>('config.set', { key: 'statusbar', value: next ? 'on' : 'off' }).catch(() => {})
+      ctx.gateway.rpc<ConfigSetResponse>('config.set', { key: 'statusbar', value: next }).catch(() => {})

-      queueMicrotask(() => ctx.transcript.sys(`status bar ${next ? 'on' : 'off'}`))
+      queueMicrotask(() => ctx.transcript.sys(`status bar ${next}`))
    }
  },

@@ -1,6 +1,14 @@
-import type { SlashExecResponse, ToolsConfigureResponse } from '../../../gatewayTypes.js'
+import type {
+  DelegationPauseResponse,
+  SlashExecResponse,
+  SpawnTreeListResponse,
+  SpawnTreeLoadResponse,
+  ToolsConfigureResponse
+} from '../../../gatewayTypes.js'
 import type { PanelSection } from '../../../types.js'
+import { applyDelegationStatus, getDelegationState } from '../../delegationStore.js'
 import { patchOverlayState } from '../../overlayStore.js'
+import { getSpawnHistory, pushDiskSnapshot, setDiffPair, type SpawnSnapshot } from '../../spawnHistoryStore.js'
 import type { SlashCommand } from '../types.js'

 interface SkillInfo {
@@ -42,6 +50,163 @@ interface SkillsBrowseResponse {
 }

 export const opsCommands: SlashCommand[] = [
+  {
+    aliases: ['tasks'],
+    help: 'open the spawn-tree dashboard (live audit + kill/pause controls)',
+    name: 'agents',
+    run: (arg, ctx) => {
+      const sub = arg.trim().toLowerCase()
+
+      // Stay compatible with the gateway `/agents [pause|resume|status]` CLI —
+      // explicit subcommands skip the overlay and act directly so scripts and
+      // multi-step flows can drive it without entering interactive mode.
+      if (sub === 'pause' || sub === 'resume' || sub === 'unpause') {
+        const paused = sub === 'pause'
+        ctx.gateway.gw
+          .request<DelegationPauseResponse>('delegation.pause', { paused })
+          .then(r => {
+            applyDelegationStatus({ paused: r?.paused })
+            ctx.transcript.sys(`delegation · ${r?.paused ? 'paused' : 'resumed'}`)
+          })
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      if (sub === 'status') {
+        const d = getDelegationState()
+        ctx.transcript.sys(
+          `delegation · ${d.paused ? 'paused' : 'active'} · caps d${d.maxSpawnDepth ?? '?'}/${d.maxConcurrentChildren ?? '?'}`
+        )
+
+        return
+      }
+
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
+    }
+  },
+
+  {
+    help: 'replay a completed spawn tree · `/replay [N|last|list|load <path>]`',
+    name: 'replay',
+    run: (arg, ctx) => {
+      const history = getSpawnHistory()
+      const raw = arg.trim()
+      const lower = raw.toLowerCase()
+
+      // ── Disk-backed listing ─────────────────────────────────────
+      if (lower === 'list' || lower === 'ls') {
+        ctx.gateway
+          .rpc<SpawnTreeListResponse>('spawn_tree.list', {
+            limit: 30,
+            session_id: ctx.sid ?? 'default'
+          })
+          .then(
+            ctx.guarded<SpawnTreeListResponse>(r => {
+              const entries = r.entries ?? []
+
+              if (!entries.length) {
+                return ctx.transcript.sys('no archived spawn trees on disk for this session')
+              }
+
+              const rows: [string, string][] = entries.map(e => {
+                const ts = e.finished_at ? new Date(e.finished_at * 1000).toLocaleString() : '?'
+                const label = e.label || `${e.count} subagents`
+
+                return [`${ts} · ${e.count}×`, `${label}\n  ${e.path}`]
+              })
+
+              ctx.transcript.panel('Archived spawn trees', [{ rows }])
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      // ── Disk-backed load by path ─────────────────────────────────
+      if (lower.startsWith('load ')) {
+        const path = raw.slice(5).trim()
+
+        if (!path) {
+          return ctx.transcript.sys('usage: /replay load <path>')
+        }
+
+        ctx.gateway
+          .rpc<SpawnTreeLoadResponse>('spawn_tree.load', { path })
+          .then(
+            ctx.guarded<SpawnTreeLoadResponse>(r => {
+              if (!r.subagents?.length) {
+                return ctx.transcript.sys('snapshot empty or unreadable')
+              }
+
+              // Push onto the in-memory history so the overlay picks it up
+              // by index 1 just like any other snapshot.
+              pushDiskSnapshot(r, path)
+              patchOverlayState({ agents: true, agentsInitialHistoryIndex: 1 })
+            })
+          )
+          .catch(ctx.guardedErr)
+
+        return
+      }
+
+      // ── In-memory nav (same-session) ─────────────────────────────
+      if (!history.length) {
+        return ctx.transcript.sys('no completed spawn trees this session · try /replay list')
+      }
+
+      let index = 1
+
+      if (raw && lower !== 'last') {
+        const parsed = parseInt(raw, 10)
+
+        if (Number.isNaN(parsed) || parsed < 1 || parsed > history.length) {
+          return ctx.transcript.sys(`replay: index out of range 1..${history.length} · use /replay list for disk`)
+        }
+
+        index = parsed
+      }
+
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: index })
+    }
+  },
+
+  {
+    help: 'diff two completed spawn trees · `/replay-diff <baseline> <candidate>` (indexes from /replay list or history N)',
+    name: 'replay-diff',
+    run: (arg, ctx) => {
+      const parts = arg.trim().split(/\s+/).filter(Boolean)
+
+      if (parts.length !== 2) {
+        return ctx.transcript.sys('usage: /replay-diff <a> <b>  (e.g. /replay-diff 1 2 for last two)')
+      }
+
+      const [a, b] = parts
+      const history = getSpawnHistory()
+
+      const resolve = (token: string): null | SpawnSnapshot => {
+        const n = parseInt(token!, 10)
+
+        if (Number.isFinite(n) && n >= 1 && n <= history.length) {
+          return history[n - 1] ?? null
+        }
+
+        return null
+      }
+
+      const baseline = resolve(a!)
+      const candidate = resolve(b!)
+
+      if (!baseline || !candidate) {
+        return ctx.transcript.sys(`replay-diff: could not resolve indices · history has ${history.length} entries`)
+      }
+
+      setDiffPair({ baseline, candidate })
+      patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
+    }
+  },
+
  {
    help: 'browse, inspect, install skills',
    name: 'skills',
@@ -0,0 +1,139 @@
+import { atom } from 'nanostores'
+
+import type { SpawnTreeLoadResponse } from '../gatewayTypes.js'
+import type { SubagentProgress } from '../types.js'
+
+export interface SpawnSnapshot {
+  finishedAt: number
+  fromDisk?: boolean
+  id: string
+  label: string
+  path?: string
+  sessionId: null | string
+  startedAt: number
+  subagents: SubagentProgress[]
+}
+
+export interface SpawnDiffPair {
+  baseline: SpawnSnapshot
+  candidate: SpawnSnapshot
+}
+
+const HISTORY_LIMIT = 10
+
+export const $spawnHistory = atom<SpawnSnapshot[]>([])
+export const $spawnDiff = atom<null | SpawnDiffPair>(null)
+
+export const getSpawnHistory = () => $spawnHistory.get()
+export const getSpawnDiff = () => $spawnDiff.get()
+
+export const clearSpawnHistory = () => $spawnHistory.set([])
+export const clearDiffPair = () => $spawnDiff.set(null)
+export const setDiffPair = (pair: SpawnDiffPair) => $spawnDiff.set(pair)
+
+/**
+ * Commit a finished turn's spawn tree to history.  Keeps the last 10
+ * non-empty snapshots — empty turns (no subagents) are dropped.
+ *
+ * Why in-memory?  The primary investigation loop is "I just ran a fan-out,
+ * it misbehaved, let me look at what happened" — same-session debugging.
+ * Disk persistence across process restarts is a natural extension but
+ * adds RPC surface for a less-common path.
+ */
+export const pushSnapshot = (
+  subagents: readonly SubagentProgress[],
+  meta: { sessionId?: null | string; startedAt?: null | number }
+) => {
+  if (!subagents.length) {
+    return
+  }
+
+  const now = Date.now()
+  const started = meta.startedAt ?? Math.min(...subagents.map(s => s.startedAt ?? now))
+
+  const snap: SpawnSnapshot = {
+    finishedAt: now,
+    id: `snap-${now.toString(36)}`,
+    label: summarizeLabel(subagents),
+    sessionId: meta.sessionId ?? null,
+    startedAt: Number.isFinite(started) ? started : now,
+    subagents: subagents.map(item => ({ ...item }))
+  }
+
+  const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
+  $spawnHistory.set(next)
+}
+
+function summarizeLabel(subagents: readonly SubagentProgress[]): string {
+  const top = subagents
+    .filter(s => s.parentId == null || subagents.every(o => o.id !== s.parentId))
+    .slice(0, 2)
+    .map(s => s.goal || 'subagent')
+    .join(' · ')
+
+  return top || `${subagents.length} agent${subagents.length === 1 ? '' : 's'}`
+}
+
+/**
+ * Push a disk-loaded snapshot onto the front of the history stack so the
+ * overlay can pick it up at index 1 via /replay load.  Normalises the
+ * server payload (arbitrary list) into the same SubagentProgress shape
+ * used for live data — defensive against cross-version reads.
+ */
+export const pushDiskSnapshot = (r: SpawnTreeLoadResponse, path: string) => {
+  const raw = Array.isArray(r.subagents) ? r.subagents : []
+  const normalised = raw.map(normaliseSubagent)
+
+  if (!normalised.length) {
+    return
+  }
+
+  const snap: SpawnSnapshot = {
+    finishedAt: (r.finished_at ?? Date.now() / 1000) * 1000,
+    fromDisk: true,
+    id: `disk-${path}`,
+    label: r.label || `${normalised.length} subagents`,
+    path,
+    sessionId: r.session_id ?? null,
+    startedAt: (r.started_at ?? r.finished_at ?? Date.now() / 1000) * 1000,
+    subagents: normalised
+  }
+
+  const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
+  $spawnHistory.set(next)
+}
+
+function normaliseSubagent(raw: unknown): SubagentProgress {
+  const o = raw as Record<string, unknown>
+  const s = (v: unknown) => (typeof v === 'string' ? v : undefined)
+  const n = (v: unknown) => (typeof v === 'number' ? v : undefined)
+  const arr = <T>(v: unknown): T[] | undefined => (Array.isArray(v) ? (v as T[]) : undefined)
+
+  return {
+    apiCalls: n(o.apiCalls),
+    costUsd: n(o.costUsd),
+    depth: typeof o.depth === 'number' ? o.depth : 0,
+    durationSeconds: n(o.durationSeconds),
+    filesRead: arr<string>(o.filesRead),
+    filesWritten: arr<string>(o.filesWritten),
+    goal: s(o.goal) ?? 'subagent',
+    id: s(o.id) ?? `sa-${Math.random().toString(36).slice(2, 8)}`,
+    index: typeof o.index === 'number' ? o.index : 0,
+    inputTokens: n(o.inputTokens),
+    iteration: n(o.iteration),
+    model: s(o.model),
+    notes: (arr<string>(o.notes) ?? []).filter(x => typeof x === 'string'),
+    outputTail: arr(o.outputTail) as SubagentProgress['outputTail'],
+    outputTokens: n(o.outputTokens),
+    parentId: s(o.parentId) ?? null,
+    reasoningTokens: n(o.reasoningTokens),
+    startedAt: n(o.startedAt),
+    status: (s(o.status) as SubagentProgress['status']) ?? 'completed',
+    summary: s(o.summary),
+    taskCount: typeof o.taskCount === 'number' ? o.taskCount : 1,
+    thinking: (arr<string>(o.thinking) ?? []).filter(x => typeof x === 'string'),
+    toolCount: typeof o.toolCount === 'number' ? o.toolCount : 0,
+    tools: (arr<string>(o.tools) ?? []).filter(x => typeof x === 'string'),
+    toolsets: arr<string>(o.toolsets)
+  }
+}
@@ -10,8 +10,9 @@ import {
 } from '../lib/text.js'
 import type { ActiveTool, ActivityItem, Msg, SubagentProgress } from '../types.js'

-import { resetOverlayState } from './overlayStore.js'
-import { patchTurnState, resetTurnState } from './turnStore.js'
+import { resetFlowOverlays } from './overlayStore.js'
+import { pushSnapshot } from './spawnHistoryStore.js'
+import { getTurnState, patchTurnState, resetTurnState } from './turnStore.js'
 import { getUiState, patchUiState } from './uiStore.js'

 const INTERRUPT_COOLDOWN_MS = 1500
@@ -41,6 +42,7 @@ class TurnController {
  lastStatusNote = ''
  pendingInlineDiffs: string[] = []
  persistedToolLabels = new Set<string>()
+  persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
  protocolWarned = false
  reasoningText = ''
  segmentMessages: Msg[] = []
@@ -90,7 +92,7 @@ class TurnController {
      turnTrail: []
    })
    patchUiState({ busy: false })
-    resetOverlayState()
+    resetFlowOverlays()
  }

  interruptTurn({ appendMessage, gw, sid, sys }: InterruptDeps) {
@@ -189,9 +191,7 @@ class TurnController {
    // leading "┊ review diff" header written by `_emit_inline_diff` for the
    // terminal printer). That header only makes sense as stdout dressing,
    // not inside a markdown ```diff block.
-    const text = diffText
-      .replace(/^\s*┊[^\n]*\n?/, '')
-      .trim()
+    const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()

    if (!text || this.pendingInlineDiffs.includes(text)) {
      return
@@ -249,12 +249,15 @@ class TurnController {
    // markdown fence of its own — otherwise we render two stacked diff
    // blocks for the same edit.
    const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
+
    const remainingInlineDiffs = assistantAlreadyHasDiff
      ? []
      : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
+
    const inlineDiffBlock = remainingInlineDiffs.length
      ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
      : ''
+
    const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
    const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
    const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
@@ -276,6 +279,20 @@ class TurnController {

    const wasInterrupted = this.interrupted

+    // Archive the turn's spawn tree to history BEFORE idle() drops subagents
+    // from turnState.  Lets /replay and the overlay's history nav pull up
+    // finished fan-outs without a round-trip to disk.
+    const finishedSubagents = getTurnState().subagents
+    const sessionId = getUiState().sid
+
+    if (finishedSubagents.length > 0) {
+      pushSnapshot(finishedSubagents, { sessionId, startedAt: null })
+      // Fire-and-forget disk persistence so /replay survives process restarts.
+      // The same snapshot lives in memory via spawnHistoryStore for immediate
+      // recall — disk is the long-term archive.
+      void this.persistSpawnTree?.(finishedSubagents, sessionId)
+    }
+
    this.idle()
    this.clearReasoning()
    this.turnTools = []
@@ -443,33 +460,82 @@ class TurnController {
    patchTurnState({ activity: [], outcome: '', subagents: [], toolTokens: 0, tools: [], turnTrail: [] })
  }

-  upsertSubagent(p: SubagentEventPayload, patch: (current: SubagentProgress) => Partial<SubagentProgress>) {
-    const id = `sa:${p.task_index}:${p.goal || 'subagent'}`
+  upsertSubagent(
+    p: SubagentEventPayload,
+    patch: (current: SubagentProgress) => Partial<SubagentProgress>,
+    opts: { createIfMissing?: boolean } = { createIfMissing: true }
+  ) {
+    // Stable id: prefer the server-issued subagent_id (survives nested
+    // grandchildren + cross-tree joins).  Fall back to the composite key
+    // for older gateways that omit the field — those produce a flat list.
+    const id = p.subagent_id || `sa:${p.task_index}:${p.goal || 'subagent'}`

    patchTurnState(state => {
      const existing = state.subagents.find(item => item.id === id)

+      // Late events (subagent.complete/tool/progress arriving after message.complete
+      // has already fired idle()) would otherwise resurrect a finished
+      // subagent into turn.subagents and block the "finished" title on the
+      // /agents overlay.  When `createIfMissing` is false we drop silently.
+      if (!existing && !opts.createIfMissing) {
+        return state
+      }
+
      const base: SubagentProgress = existing ?? {
+        depth: p.depth ?? 0,
        goal: p.goal,
        id,
        index: p.task_index,
+        model: p.model,
        notes: [],
+        parentId: p.parent_id ?? null,
+        startedAt: Date.now(),
        status: 'running',
        taskCount: p.task_count ?? 1,
        thinking: [],
-        tools: []
+        toolCount: p.tool_count ?? 0,
+        tools: [],
+        toolsets: p.toolsets
      }

+      // Map snake_case payload keys onto camelCase state.  Only overwrite
+      // when the event actually carries the field; `??` preserves prior
+      // values across streaming events that emit partial payloads.
+      const outputTail = p.output_tail
+        ? p.output_tail.map(e => ({
+            isError: Boolean(e.is_error),
+            preview: String(e.preview ?? ''),
+            tool: String(e.tool ?? 'tool')
+          }))
+        : base.outputTail
+
      const next: SubagentProgress = {
        ...base,
+        apiCalls: p.api_calls ?? base.apiCalls,
+        costUsd: p.cost_usd ?? base.costUsd,
+        depth: p.depth ?? base.depth,
+        filesRead: p.files_read ?? base.filesRead,
+        filesWritten: p.files_written ?? base.filesWritten,
        goal: p.goal || base.goal,
+        inputTokens: p.input_tokens ?? base.inputTokens,
+        iteration: p.iteration ?? base.iteration,
+        model: p.model ?? base.model,
+        outputTail,
+        outputTokens: p.output_tokens ?? base.outputTokens,
+        parentId: p.parent_id ?? base.parentId,
+        reasoningTokens: p.reasoning_tokens ?? base.reasoningTokens,
        taskCount: p.task_count ?? base.taskCount,
+        toolCount: p.tool_count ?? base.toolCount,
+        toolsets: p.toolsets ?? base.toolsets,
        ...patch(base)
      }

+      // Stable order: by spawn (depth, parent, index) rather than insert time.
+      // Without it, grandchildren can shuffle relative to siblings when
+      // events arrive out of order under high concurrency.
      const subagents = existing
        ? state.subagents.map(item => (item.id === id ? next : item))
-        : [...state.subagents, next].sort((a, b) => a.index - b.index)
+        : [...state.subagents, next].sort((a, b) => a.depth - b.depth || a.index - b.index)

      return { ...state, subagents }
    })
@@ -16,7 +16,7 @@ const buildUiState = (): UiState => ({
  showReasoning: false,
  sid: null,
  status: 'summoning hermes…',
-  statusBar: true,
+  statusBar: 'top',
  streaming: true,
  theme: DEFAULT_THEME,
  usage: ZERO
@@ -10,9 +10,20 @@ import type {
 } from '../gatewayTypes.js'
 import { asRpcResult } from '../lib/rpc.js'

+import type { StatusBarMode } from './interfaces.js'
 import { turnController } from './turnController.js'
 import { patchUiState } from './uiStore.js'

+const STATUSBAR_ALIAS: Record<string, StatusBarMode> = {
+  bottom: 'bottom',
+  off: 'off',
+  on: 'top',
+  top: 'top'
+}
+
+export const normalizeStatusBar = (raw: unknown): StatusBarMode =>
+  raw === false ? 'off' : typeof raw === 'string' ? (STATUSBAR_ALIAS[raw.trim().toLowerCase()] ?? 'top') : 'top'
+
 const MTIME_POLL_MS = 5000

 const quietRpc = async <T extends Record<string, any> = Record<string, any>>(
@@ -37,7 +48,7 @@ export const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolea
    inlineDiffs: d.inline_diffs !== false,
    showCost: !!d.show_cost,
    showReasoning: !!d.show_reasoning,
-    statusBar: d.tui_statusbar !== false,
+    statusBar: normalizeStatusBar(d.tui_statusbar),
    streaming: d.streaming !== false
  })
 }
@@ -3,6 +3,7 @@ import { useStore } from '@nanostores/react'

 import type {
  ApprovalRespondResponse,
+  ConfigSetResponse,
  SecretRespondResponse,
  SudoRespondResponse,
  VoiceRecordResponse
@@ -74,6 +75,10 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
    if (overlay.picker) {
      return patchOverlayState({ picker: false })
    }
+
+    if (overlay.agents) {
+      return patchOverlayState({ agents: false })
+    }
  }

  const cycleQueue = (dir: 1 | -1) => {
@@ -180,6 +185,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
        if (isCtrl(key, ch, 'c')) {
          cancelOverlayFromCtrlC()
        }
+
        return
      }

@@ -290,6 +296,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
    if (key.upArrow && !cState.inputBuf.length) {
      const inputSel = getInputSelection()
      const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
+
      const noLineAbove =
        !cState.input || (cursor !== null && cState.input.lastIndexOf('\n', Math.max(0, cursor - 1)) < 0)

@@ -371,6 +378,29 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
      return cActions.openEditor()
    }

+    // shift-tab flips yolo without spending a turn (claude-code parity)
+    if (key.shift && key.tab && !cState.completions.length) {
+      if (!live.sid) {
+        return void actions.sys('yolo needs an active session')
+      }
+
+      // gateway.rpc swallows errors with its own sys() message and resolves to null,
+      // so we only speak when it came back with a real shape. null = rpc already spoke.
+      return void gateway.rpc<ConfigSetResponse>('config.set', { key: 'yolo', session_id: live.sid }).then(r => {
+        if (r?.value === '1') {
+          return actions.sys('yolo on')
+        }
+
+        if (r?.value === '0') {
+          return actions.sys('yolo off')
+        }
+
+        if (r) {
+          actions.sys('failed to toggle yolo')
+        }
+      })
+    }
+
    if (key.tab && cState.completions.length) {
      const row = cState.completions[cState.compIdx]

@@ -5,7 +5,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { STARTUP_RESUME_ID } from '../config/env.js'
 import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js'
 import { attachedImageNotice, imageTokenMeta } from '../domain/messages.js'
-import { fmtCwdBranch } from '../domain/paths.js'
+import { fmtCwdBranch, shortCwd } from '../domain/paths.js'
 import { type GatewayClient } from '../gatewayClient.js'
 import type {
  ClarifyRespondResponse,
@@ -314,12 +314,14 @@ export function useMainApp(gw: GatewayClient) {

  useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid: ui.sid })

-  // ── Terminal tab title ─────────────────────────────────────────────
-  // Show model name + status so users can identify the Hermes tab.
-  const shortModel = ui.info?.model?.replace(/^.*\//, '') ?? ''
-  const titleStatus = ui.busy ? '⏳' : '✓'
-  const terminalTitle = shortModel ? `${titleStatus} ${shortModel} — Hermes` : 'Hermes'
-  useTerminalTitle(terminalTitle)
+  // Tab title: `⚠` waiting on approval/sudo/secret/clarify, `⏳` busy, `✓` idle.
+  const model = ui.info?.model?.replace(/^.*\//, '') ?? ''
+
+  const marker = overlay.approval || overlay.sudo || overlay.secret || overlay.clarify ? '⚠' : ui.busy ? '⏳' : '✓'
+
+  const tabCwd = ui.info?.cwd
+
+  useTerminalTitle(model ? `${marker} ${model}${tabCwd ? ` · ${shortCwd(tabCwd, 24)}` : ''}` : 'Hermes')

  useEffect(() => {
    if (!ui.sid || !stdout) {
@@ -1,26 +0,0 @@
-const GOLD = '\x1b[38;2;255;215;0m'
-const AMBER = '\x1b[38;2;255;191;0m'
-const BRONZE = '\x1b[38;2;205;127;50m'
-const DIM = '\x1b[38;2;184;134;11m'
-const RESET = '\x1b[0m'
-
-const LOGO = [
-  '██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗',
-  '██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝',
-  '███████║█████╗  ██████╔╝██╔████╔██║█████╗  ███████╗█████╗███████║██║  ███╗█████╗  ██╔██╗ ██║   ██║   ',
-  '██╔══██║██╔══╝  ██╔══██╗██║╚██╔╝██║██╔══╝  ╚════██║╚════╝██╔══██║██║   ██║██╔══╝  ██║╚██╗██║   ██║   ',
-  '██║  ██║███████╗██║  ██║██║ ╚═╝ ██║███████╗███████║      ██║  ██║╚██████╔╝███████╗██║ ╚████║   ██║   ',
-  '╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚═╝     ╚═╝╚══════╝╚══════╝      ╚═╝  ╚═╝ ╚═════╝ ╚══════╝╚═╝  ╚═══╝   ╚═╝   '
-]
-
-const GRADIENT = [GOLD, GOLD, AMBER, AMBER, BRONZE, BRONZE] as const
-const LOGO_WIDTH = 98
-
-const TAGLINE = `${DIM}⚕ Nous Research · Messenger of the Digital Gods${RESET}`
-const FALLBACK = `\x1b[1m${GOLD}⚕ NOUS HERMES${RESET}`
-
-export function bootBanner(cols: number = process.stdout.columns || 80): string {
-  const body = cols >= LOGO_WIDTH ? LOGO.map((text, i) => `${GRADIENT[i]}${text}${RESET}`).join('\n') : FALLBACK
-
-  return `\n${body}\n${TAGLINE}\n\n`
-}
@@ -1,10 +1,14 @@
 import { Box, type ScrollBoxHandle, Text } from '@hermes/ink'
-import { type ReactNode, type RefObject, useCallback, useEffect, useState, useSyncExternalStore } from 'react'
+import { useStore } from '@nanostores/react'
+import { type ReactNode, type RefObject, useCallback, useEffect, useMemo, useState, useSyncExternalStore } from 'react'

+import { $delegationState } from '../app/delegationStore.js'
+import { $turnState } from '../app/turnStore.js'
 import { FACES } from '../content/faces.js'
 import { VERBS } from '../content/verbs.js'
 import { fmtDuration } from '../domain/messages.js'
 import { stickyPromptFromViewport } from '../domain/viewport.js'
+import { buildSubagentTree, treeTotals, widthByDepth } from '../lib/subagentTree.js'
 import { fmtK } from '../lib/text.js'
 import type { Theme } from '../theme.js'
 import type { Msg, Usage } from '../types.js'
@@ -60,6 +64,67 @@ function ctxBar(pct: number | undefined, w = 10) {
  return '█'.repeat(filled) + '░'.repeat(w - filled)
 }

+function SpawnHud({ t }: { t: Theme }) {
+  // Tight HUD that only appears when the session is actually fanning out.
+  // Colour escalates to warn/error as depth or concurrency approaches the cap.
+  const delegation = useStore($delegationState)
+  const turn = useStore($turnState)
+
+  const tree = useMemo(() => buildSubagentTree(turn.subagents), [turn.subagents])
+  const totals = useMemo(() => treeTotals(tree), [tree])
+
+  if (!totals.descendantCount && !delegation.paused) {
+    return null
+  }
+
+  const maxDepth = delegation.maxSpawnDepth
+  const maxConc = delegation.maxConcurrentChildren
+  const depth = Math.max(0, totals.maxDepthFromHere)
+  const active = totals.activeCount
+
+  // `max_concurrent_children` is a per-parent cap, not a global one.
+  // `activeCount` sums every running agent across the tree and would
+  // over-warn for multi-orchestrator runs.  The widest level of the tree
+  // is a closer proxy to "most concurrent spawns that could be hitting a
+  // single parent's slot budget".
+  const widestLevel = widthByDepth(tree).reduce((a, b) => Math.max(a, b), 0)
+  const depthRatio = maxDepth ? depth / maxDepth : 0
+  const concRatio = maxConc ? widestLevel / maxConc : 0
+  const ratio = Math.max(depthRatio, concRatio)
+
+  const color = delegation.paused || ratio >= 1 ? t.color.error : ratio >= 0.66 ? t.color.warn : t.color.dim
+
+  const pieces: string[] = []
+
+  if (delegation.paused) {
+    pieces.push('⏸ paused')
+  }
+
+  if (totals.descendantCount > 0) {
+    const depthLabel = maxDepth ? `${depth}/${maxDepth}` : `${depth}`
+    pieces.push(`d${depthLabel}`)
+
+    if (active > 0) {
+      // Label pairs the widest-level count (drives concRatio above) with
+      // the total active count for context.  `W/cap` triggers the warn,
+      // `+N` is everything else currently running across the tree.
+      const extra = Math.max(0, active - widestLevel)
+      const widthLabel = maxConc ? `${widestLevel}/${maxConc}` : `${widestLevel}`
+      const suffix = extra > 0 ? `+${extra}` : ''
+      pieces.push(`⚡${widthLabel}${suffix}`)
+    }
+  }
+
+  const atCap = depthRatio >= 1 || concRatio >= 1
+
+  return (
+    <Text color={color}>
+      {atCap ? ' │ ⚠ ' : ' │ '}
+      {pieces.join(' ')}
+    </Text>
+  )
+}
+
 function SessionDuration({ startedAt }: { startedAt: number }) {
  const [now, setNow] = useState(() => Date.now())

@@ -91,7 +156,11 @@ export function GoodVibesHeart({ tick, t }: { tick: number; t: Theme }) {
    return () => clearTimeout(id)
  }, [t.color.amber, tick])

-  return <Text color={color}>{active ? '♥' : ' '}</Text>
+  if (!active) {
+    return null
+  }
+
+  return <Text color={color}>♥</Text>
 }

 export function StatusRule({
@@ -122,7 +191,7 @@ export function StatusRule({
  const leftWidth = Math.max(12, cols - cwdLabel.length - 3)

  return (
-    <Box>
+    <Box height={1}>
      <Box flexShrink={1} width={leftWidth}>
        <Text color={t.color.bronze} wrap="truncate-end">
          {'─ '}
@@ -145,6 +214,7 @@ export function StatusRule({
              <SessionDuration startedAt={sessionStartedAt} />
            </Text>
          ) : null}
+          <SpawnHud t={t} />
          {voiceLabel ? <Text color={t.color.dim}> │ {voiceLabel}</Text> : null}
          {bgCount > 0 ? <Text color={t.color.dim}> │ {bgCount} bg</Text> : null}
          {showCost && typeof usage.cost_usd === 'number' ? (
@@ -2,13 +2,15 @@ import { AlternateScreen, Box, NoSelect, ScrollBox, Text } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { memo } from 'react'

+import { useGateway } from '../app/gatewayContext.js'
 import type { AppLayoutProgressProps, AppLayoutProps } from '../app/interfaces.js'
-import { $isBlocked } from '../app/overlayStore.js'
+import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStore.js'
 import { $uiState } from '../app/uiStore.js'
 import { PLACEHOLDER } from '../content/placeholders.js'
 import type { Theme } from '../theme.js'
 import type { DetailsMode } from '../types.js'

+import { AgentsOverlay } from './agentsOverlay.js'
 import { GoodVibesHeart, StatusRule, StickyPromptTracker, TranscriptScrollbar } from './appChrome.js'
 import { FloatingOverlays, PromptZone } from './appOverlays.js'
 import { Banner, Panel, SessionPanel } from './branding.js'
@@ -181,37 +183,19 @@ const ComposerPane = memo(function ComposerPane({
        <Text> </Text>
      )}

-      <Box flexDirection="column" position="relative">
-        {ui.statusBar && (
-          <StatusRule
-            bgCount={ui.bgTasks.size}
-            busy={ui.busy}
-            cols={composer.cols}
-            cwdLabel={status.cwdLabel}
-            model={ui.info?.model?.split('/').pop() ?? ''}
-            sessionStartedAt={status.sessionStartedAt}
-            showCost={ui.showCost}
-            status={ui.status}
-            statusColor={status.statusColor}
-            t={ui.theme}
-            turnStartedAt={status.turnStartedAt}
-            usage={ui.usage}
-            voiceLabel={status.voiceLabel}
-          />
-        )}
-
-        <FloatingOverlays
-          cols={composer.cols}
-          compIdx={composer.compIdx}
-          completions={composer.completions}
-          onModelSelect={actions.onModelSelect}
-          onPickerSelect={actions.resumeById}
-          pagerPageSize={composer.pagerPageSize}
-        />
-      </Box>
+      <StatusRulePane at="top" composer={composer} status={status} />

      {!isBlocked && (
-        <Box flexDirection="column" marginBottom={1}>
+        <Box flexDirection="column" marginTop={ui.statusBar === 'top' ? 0 : 1} position="relative">
+          <FloatingOverlays
+            cols={composer.cols}
+            compIdx={composer.compIdx}
+            completions={composer.completions}
+            onModelSelect={actions.onModelSelect}
+            onPickerSelect={actions.resumeById}
+            pagerPageSize={composer.pagerPageSize}
+          />
+
          {composer.inputBuf.map((line, i) => (
            <Box key={i}>
              <Box width={3}>
@@ -234,8 +218,9 @@ const ComposerPane = memo(function ComposerPane({
            </Box>

            <Box flexGrow={1} position="relative">
+              {/* subtract NoSelect paddingX={1} (2 cols) + pw so wrap-ansi and cursorLayout agree */}
              <TextInput
-                columns={Math.max(20, composer.cols - pw)}
+                columns={Math.max(20, composer.cols - pw - 2)}
                onChange={composer.updateInput}
                onPaste={composer.handleTextPaste}
                onSubmit={composer.submit}
@@ -256,6 +241,53 @@ const ComposerPane = memo(function ComposerPane({
  )
 })

+const AgentsOverlayPane = memo(function AgentsOverlayPane() {
+  const { gw } = useGateway()
+  const ui = useStore($uiState)
+  const overlay = useStore($overlayState)
+
+  return (
+    <AgentsOverlay
+      gw={gw}
+      initialHistoryIndex={overlay.agentsInitialHistoryIndex}
+      onClose={() => patchOverlayState({ agents: false, agentsInitialHistoryIndex: 0 })}
+      t={ui.theme}
+    />
+  )
+})
+
+const StatusRulePane = memo(function StatusRulePane({
+  at,
+  composer,
+  status
+}: Pick<AppLayoutProps, 'composer' | 'status'> & { at: 'bottom' | 'top' }) {
+  const ui = useStore($uiState)
+
+  if (ui.statusBar !== at) {
+    return null
+  }
+
+  return (
+    <Box marginTop={at === 'top' ? 1 : 0}>
+      <StatusRule
+        bgCount={ui.bgTasks.size}
+        busy={ui.busy}
+        cols={composer.cols}
+        cwdLabel={status.cwdLabel}
+        model={ui.info?.model?.split('/').pop() ?? ''}
+        sessionStartedAt={status.sessionStartedAt}
+        showCost={ui.showCost}
+        status={ui.status}
+        statusColor={status.statusColor}
+        t={ui.theme}
+        turnStartedAt={status.turnStartedAt}
+        usage={ui.usage}
+        voiceLabel={status.voiceLabel}
+      />
+    </Box>
+  )
+})
+
 export const AppLayout = memo(function AppLayout({
  actions,
  composer,
@@ -264,22 +296,34 @@ export const AppLayout = memo(function AppLayout({
  status,
  transcript
 }: AppLayoutProps) {
+  const overlay = useStore($overlayState)
+
  return (
    <AlternateScreen mouseTracking={mouseTracking}>
      <Box flexDirection="column" flexGrow={1}>
        <Box flexDirection="row" flexGrow={1}>
-          <TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
+          {overlay.agents ? (
+            <AgentsOverlayPane />
+          ) : (
+            <TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
+          )}
        </Box>

-        <PromptZone
-          cols={composer.cols}
-          onApprovalChoice={actions.answerApproval}
-          onClarifyAnswer={actions.answerClarify}
-          onSecretSubmit={actions.answerSecret}
-          onSudoSubmit={actions.answerSudo}
-        />
+        {!overlay.agents && (
+          <>
+            <PromptZone
+              cols={composer.cols}
+              onApprovalChoice={actions.answerApproval}
+              onClarifyAnswer={actions.answerClarify}
+              onSecretSubmit={actions.answerSecret}
+              onSudoSubmit={actions.answerSudo}
+            />

-        <ComposerPane actions={actions} composer={composer} status={status} />
+            <ComposerPane actions={actions} composer={composer} status={status} />
+
+            <StatusRulePane at="bottom" composer={composer} status={status} />
+          </>
+        )}
      </Box>
    </AlternateScreen>
  )
@@ -167,9 +167,11 @@ export function lineNav(s: string, p: number, dir: -1 | 1): null | number {
  return snapPos(s, Math.min(nextBreak + 1 + col, lineEnd))
 }

-function cursorLayout(value: string, cursor: number, cols: number) {
+// mirrors wrap-ansi(..., { wordWrap: false, hard: true }) so the declared
+// cursor lines up with what <Text wrap="wrap-char"> actually renders
+export function cursorLayout(value: string, cursor: number, cols: number) {
  const pos = Math.max(0, Math.min(cursor, value.length))
-  const w = Math.max(1, cols - 1)
+  const w = Math.max(1, cols)

  let col = 0,
    line = 0
@@ -200,17 +202,23 @@ function cursorLayout(value: string, cursor: number, cols: number) {
    col += sw
  }

+  // trailing cursor-cell overflows to the next row at the wrap column
+  if (col >= w) {
+    line++
+    col = 0
+  }
+
  return { column: col, line }
 }

-function offsetFromPosition(value: string, row: number, col: number, cols: number) {
+export function offsetFromPosition(value: string, row: number, col: number, cols: number) {
  if (!value.length) {
    return 0
  }

  const targetRow = Math.max(0, Math.floor(row))
  const targetCol = Math.max(0, Math.floor(col))
-  const w = Math.max(1, cols - 1)
+  const w = Math.max(1, cols)

  let line = 0
  let column = 0
@@ -615,14 +623,7 @@ export function TextInput({
        return
      }

-      if (
-        (k.ctrl && inp === 'c') ||
-        k.tab ||
-        (k.shift && k.tab) ||
-        k.pageUp ||
-        k.pageDown ||
-        k.escape
-      ) {
+      if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
        return
      }

@@ -641,6 +642,8 @@ export function TextInput({
      const actionHome = k.home || (!isMac && mod && inp === 'a') || isMacActionFallback(k, inp, 'a')
      const actionEnd = k.end || (mod && inp === 'e') || isMacActionFallback(k, inp, 'e')
      const actionDeleteToStart = (mod && inp === 'u') || isMacActionFallback(k, inp, 'u')
+      const actionKillToEnd = (mod && inp === 'k') || isMacActionFallback(k, inp, 'k')
+      const actionDeleteWord = (mod && inp === 'w') || isMacActionFallback(k, inp, 'w')
      const range = selRange()
      const delFwd = k.delete || fwdDel.current

@@ -704,7 +707,7 @@ export function TextInput({
        } else {
          v = v.slice(0, c) + v.slice(nextPos(v, c))
        }
-      } else if (mod && inp === 'w') {
+      } else if (actionDeleteWord) {
        if (range) {
          v = v.slice(0, range.start) + v.slice(range.end)
          c = range.start
@@ -724,7 +727,7 @@ export function TextInput({
          v = v.slice(c)
          c = 0
        }
-      } else if (mod && inp === 'k') {
+      } else if (actionKillToEnd) {
        if (range) {
          v = v.slice(0, range.start) + v.slice(range.end)
          c = range.start
@@ -807,7 +810,7 @@ export function TextInput({
      }}
      ref={boxRef}
    >
-      <Text wrap="wrap">{rendered}</Text>
+      <Text wrap="wrap-char">{rendered}</Text>
    </Box>
  )
 }
@@ -1,8 +1,19 @@
 import { Box, NoSelect, Text } from '@hermes/ink'
-import { memo, useEffect, useMemo, useState, type ReactNode } from 'react'
+import { memo, type ReactNode, useEffect, useMemo, useState } from 'react'
 import spinners, { type BrailleSpinnerName } from 'unicode-animations'

 import { THINKING_COT_MAX } from '../config/limits.js'
+import {
+  buildSubagentTree,
+  fmtCost,
+  fmtTokens,
+  formatSummary as formatSpawnSummary,
+  hotnessBucket,
+  peakHotness,
+  sparkline,
+  treeTotals,
+  widthByDepth
+} from '../lib/subagentTree.js'
 import {
  compactPreview,
  estimateTokensRough,
@@ -14,7 +25,7 @@ import {
  toolTrailLabel
 } from '../lib/text.js'
 import type { Theme } from '../theme.js'
-import type { ActiveTool, ActivityItem, DetailsMode, SubagentProgress, ThinkingMode } from '../types.js'
+import type { ActiveTool, ActivityItem, DetailsMode, SubagentNode, SubagentProgress, ThinkingMode } from '../types.js'

 const THINK: BrailleSpinnerName[] = ['helix', 'breathe', 'orbit', 'dna', 'waverows', 'snake', 'pulse']
 const TOOL: BrailleSpinnerName[] = ['cascade', 'scan', 'diagswipe', 'fillsweep', 'rain', 'columns', 'sparkle']
@@ -106,6 +117,8 @@ function TreeNode({
  header,
  open,
  rails = [],
+  stemColor,
+  stemDim,
  t
 }: {
  branch: TreeBranch
@@ -113,11 +126,13 @@ function TreeNode({
  header: ReactNode
  open: boolean
  rails?: TreeRails
+  stemColor?: string
+  stemDim?: boolean
  t: Theme
 }) {
  return (
    <Box flexDirection="column">
-      <TreeRow branch={branch} rails={rails} t={t}>
+      <TreeRow branch={branch} rails={rails} stemColor={stemColor} stemDim={stemDim} t={t}>
        {header}
      </TreeRow>
      {open ? children?.(nextTreeRails(rails, branch)) : null}
@@ -239,16 +254,31 @@ function Chevron({
  )
 }

+function heatColor(node: SubagentNode, peak: number, theme: Theme): string | undefined {
+  const palette = [theme.color.bronze, theme.color.amber, theme.color.gold, theme.color.warn, theme.color.error]
+  const idx = hotnessBucket(node.aggregate.hotness, peak, palette.length)
+
+  // Below the median bucket we keep the default dim stem so cool branches
+  // fade into the chrome — only "hot" branches draw the eye.
+  if (idx < 2) {
+    return undefined
+  }
+
+  return palette[idx]
+}
+
 function SubagentAccordion({
  branch,
  expanded,
-  item,
+  node,
+  peak,
  rails = [],
  t
 }: {
  branch: TreeBranch
  expanded: boolean
-  item: SubagentProgress
+  node: SubagentNode
+  peak: number
  rails?: TreeRails
  t: Theme
 }) {
@@ -257,6 +287,7 @@ function SubagentAccordion({
  const [openThinking, setOpenThinking] = useState(expanded)
  const [openTools, setOpenTools] = useState(expanded)
  const [openNotes, setOpenNotes] = useState(expanded)
+  const [openKids, setOpenKids] = useState(expanded)

  useEffect(() => {
    if (!expanded) {
@@ -268,6 +299,7 @@ function SubagentAccordion({
    setOpenThinking(true)
    setOpenTools(true)
    setOpenNotes(true)
+    setOpenKids(true)
  }, [expanded])

  const expandAll = () => {
@@ -276,8 +308,13 @@ function SubagentAccordion({
    setOpenThinking(true)
    setOpenTools(true)
    setOpenNotes(true)
+    setOpenKids(true)
  }

+  const item = node.item
+  const children = node.children
+  const aggregate = node.aggregate
+
  const statusTone: 'dim' | 'error' | 'warn' =
    item.status === 'failed' ? 'error' : item.status === 'interrupted' ? 'warn' : 'dim'

@@ -286,10 +323,60 @@ function SubagentAccordion({
  const title = `${prefix}${open ? goalLabel : compactPreview(goalLabel, 60)}`
  const summary = compactPreview((item.summary || '').replace(/\s+/g, ' ').trim(), 72)

-  const suffix =
-    item.status === 'running'
-      ? 'running'
-      : `${item.status}${item.durationSeconds ? ` · ${fmtElapsed(item.durationSeconds * 1000)}` : ''}`
+  // Suffix packs branch rollup: status · elapsed · per-branch tool/agent/token/cost.
+  // Emphasises the numbers the user can't easily eyeball from a flat list.
+  const statusLabel = item.status === 'queued' ? 'queued' : item.status === 'running' ? 'running' : String(item.status)
+
+  const rollupBits: string[] = [statusLabel]
+
+  if (item.durationSeconds) {
+    rollupBits.push(fmtElapsed(item.durationSeconds * 1000))
+  }
+
+  const localTools = item.toolCount ?? 0
+  const subtreeTools = aggregate.totalTools - localTools
+
+  if (localTools > 0) {
+    rollupBits.push(`${localTools} tool${localTools === 1 ? '' : 's'}`)
+  }
+
+  const localTokens = (item.inputTokens ?? 0) + (item.outputTokens ?? 0)
+
+  if (localTokens > 0) {
+    rollupBits.push(`${fmtTokens(localTokens)} tok`)
+  }
+
+  const localCost = item.costUsd ?? 0
+
+  if (localCost > 0) {
+    rollupBits.push(fmtCost(localCost))
+  }
+
+  const filesLocal = (item.filesWritten?.length ?? 0) + (item.filesRead?.length ?? 0)
+
+  if (filesLocal > 0) {
+    rollupBits.push(`⎘${filesLocal}`)
+  }
+
+  if (children.length > 0) {
+    rollupBits.push(`${aggregate.descendantCount}↓`)
+
+    if (subtreeTools > 0) {
+      rollupBits.push(`+${subtreeTools}t sub`)
+    }
+
+    const subCost = aggregate.costUsd - localCost
+
+    if (subCost >= 0.01) {
+      rollupBits.push(`+${fmtCost(subCost)} sub`)
+    }
+
+    if (aggregate.activeCount > 0 && item.status !== 'running') {
+      rollupBits.push(`⚡${aggregate.activeCount}`)
+    }
+  }
+
+  const suffix = rollupBits.join(' · ')

  const thinkingText = item.thinking.join('\n')
  const hasThinking = Boolean(thinkingText)
@@ -418,6 +505,50 @@ function SubagentAccordion({
    })
  }

+  if (children.length > 0) {
+    // Nested grandchildren — rendered recursively via SubagentAccordion,
+    // sharing the same keybindings / expand semantics as top-level nodes.
+    sections.push({
+      header: (
+        <Chevron
+          count={children.length}
+          onClick={shift => {
+            if (shift) {
+              expandAll()
+            } else {
+              setOpenKids(v => !v)
+            }
+          }}
+          open={showChildren || openKids}
+          suffix={`d${item.depth + 1} · ${aggregate.descendantCount} total`}
+          t={t}
+          title="Spawned"
+        />
+      ),
+      key: 'subagents',
+      open: showChildren || openKids,
+      render: childRails => (
+        <Box flexDirection="column">
+          {children.map((child, i) => (
+            <SubagentAccordion
+              branch={i === children.length - 1 ? 'last' : 'mid'}
+              expanded={expanded || deep}
+              key={child.item.id}
+              node={child}
+              peak={peak}
+              rails={childRails}
+              t={t}
+            />
+          ))}
+        </Box>
+      )
+    })
+  }
+
+  // Heatmap: amber→error gradient on the stem when this branch is "hot"
+  // (high tools/sec) relative to the whole tree's peak.
+  const stem = heatColor(node, peak, t)
+
  return (
    <TreeNode
      branch={branch}
@@ -447,6 +578,8 @@ function SubagentAccordion({
      }
      open={open}
      rails={rails}
+      stemColor={stem}
+      stemDim={stem == null}
      t={t}
    >
      {childRails => (
@@ -598,6 +731,16 @@ export const ToolTrail = memo(function ToolTrail({

  const cot = useMemo(() => thinkingPreview(reasoning, 'full', THINKING_COT_MAX), [reasoning])

+  // Spawn-tree derivations must live above any early return so React's
+  // rules-of-hooks sees a stable call order.  Cheap O(N) builds memoised
+  // by subagent-list identity.
+  const spawnTree = useMemo(() => buildSubagentTree(subagents), [subagents])
+  const spawnPeak = useMemo(() => peakHotness(spawnTree), [spawnTree])
+  const spawnTotals = useMemo(() => treeTotals(spawnTree), [spawnTree])
+  const spawnWidths = useMemo(() => widthByDepth(spawnTree), [spawnTree])
+  const spawnSpark = useMemo(() => sparkline(spawnWidths), [spawnWidths])
+  const spawnSummaryLabel = useMemo(() => formatSpawnSummary(spawnTotals), [spawnTotals])
+
  if (
    !busy &&
    !trail.length &&
@@ -753,12 +896,13 @@ export const ToolTrail = memo(function ToolTrail({

  const renderSubagentList = (rails: boolean[]) => (
    <Box flexDirection="column">
-      {subagents.map((item, index) => (
+      {spawnTree.map((node, index) => (
        <SubagentAccordion
-          branch={index === subagents.length - 1 ? 'last' : 'mid'}
+          branch={index === spawnTree.length - 1 ? 'last' : 'mid'}
          expanded={detailsMode === 'expanded' || deepSubagents}
-          item={item}
-          key={item.id}
+          key={node.item.id}
+          node={node}
+          peak={spawnPeak}
          rails={rails}
          t={t}
        />
@@ -881,10 +1025,14 @@ export const ToolTrail = memo(function ToolTrail({
  }

  if (hasSubagents && !inlineDelegateKey) {
+    // Spark + summary give a one-line read on the branch shape before
+    // opening the subtree.  `/agents` opens the full-screen audit overlay.
+    const suffix = spawnSpark ? `${spawnSummaryLabel}  ${spawnSpark}  (/agents)` : `${spawnSummaryLabel}  (/agents)`
+
    sections.push({
      header: (
        <Chevron
-          count={subagents.length}
+          count={spawnTotals.descendantCount}
          onClick={shift => {
            if (shift) {
              expandAll()
@@ -895,8 +1043,9 @@ export const ToolTrail = memo(function ToolTrail({
            }
          }}
          open={detailsMode === 'expanded' || openSubagents}
+          suffix={suffix}
          t={t}
-          title="Subagents"
+          title="Spawn tree"
        />
      ),
      key: 'subagents',
@@ -24,6 +24,6 @@ export const HOTKEYS: [string, string][] = [
  ['Home/End', 'start / end of line'],
  ['Shift+Enter / Alt+Enter', 'insert newline'],
  ['\\+Enter', 'multi-line continuation (fallback)'],
-  ['!cmd', 'run shell command'],
-  ['{!cmd}', 'interpolate shell output inline']
+  ['!<cmd>', 'run a shell command (e.g. !ls, !git status)'],
+  ['{!<cmd>}', 'interpolate shell output inline (e.g. "branch is {!git branch --show-current}")']
 ]
@@ -1,5 +1,4 @@
 #!/usr/bin/env -S node --max-old-space-size=8192 --expose-gc
-import { bootBanner } from './bootBanner.js'
 import { GatewayClient } from './gatewayClient.js'
 import { setupGracefulExit } from './lib/gracefulExit.js'
 import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js'
@@ -10,8 +9,6 @@ if (!process.stdin.isTTY) {
  process.exit(0)
 }

-process.stdout.write(bootBanner())
-
 const gw = new GatewayClient()

 gw.start()
@@ -60,7 +60,7 @@ export interface ConfigDisplayConfig {
  streaming?: boolean
  thinking_mode?: string
  tui_compact?: boolean
-  tui_statusbar?: boolean
+  tui_statusbar?: 'bottom' | 'off' | 'on' | 'top' | boolean
 }

 export interface ConfigFullResponse {
@@ -280,15 +280,85 @@ export interface ReloadMcpResponse {
 // ── Subagent events ──────────────────────────────────────────────────

 export interface SubagentEventPayload {
+  api_calls?: number
+  cost_usd?: number
+  depth?: number
  duration_seconds?: number
+  files_read?: string[]
+  files_written?: string[]
  goal: string
-  status?: 'completed' | 'failed' | 'interrupted' | 'running'
+  input_tokens?: number
+  iteration?: number
+  model?: string
+  output_tail?: { is_error?: boolean; preview?: string; tool?: string }[]
+  output_tokens?: number
+  parent_id?: null | string
+  reasoning_tokens?: number
+  status?: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
+  subagent_id?: string
  summary?: string
  task_count?: number
  task_index: number
  text?: string
+  tool_count?: number
  tool_name?: string
  tool_preview?: string
+  toolsets?: string[]
+}
+
+// ── Delegation control RPCs ──────────────────────────────────────────
+
+export interface DelegationStatusResponse {
+  active?: {
+    depth?: number
+    goal?: string
+    model?: null | string
+    parent_id?: null | string
+    started_at?: number
+    status?: string
+    subagent_id?: string
+    tool_count?: number
+  }[]
+  max_concurrent_children?: number
+  max_spawn_depth?: number
+  paused?: boolean
+}
+
+export interface DelegationPauseResponse {
+  paused?: boolean
+}
+
+export interface SubagentInterruptResponse {
+  found?: boolean
+  subagent_id?: string
+}
+
+// ── Spawn-tree snapshots ─────────────────────────────────────────────
+
+export interface SpawnTreeListEntry {
+  count: number
+  finished_at?: number
+  label?: string
+  path: string
+  session_id?: string
+  started_at?: number | null
+}
+
+export interface SpawnTreeListResponse {
+  entries?: SpawnTreeListEntry[]
+}
+
+export interface SpawnTreeLoadResponse {
+  finished_at?: number
+  label?: string
+  session_id?: string
+  started_at?: null | number
+  subagents?: unknown[]
+}
+
+export interface SpawnTreeSaveResponse {
+  path?: string
+  session_id?: string
 }

 export type GatewayEvent =
@@ -320,6 +390,7 @@ export type GatewayEvent =
  | { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' }
  | { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' }
  | { payload: { text: string }; session_id?: string; type: 'btw.complete' }
+  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' }
  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' }
  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' }
  | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.tool' }
@@ -16,15 +16,18 @@ export const isActionMod = (key: { ctrl: boolean; meta: boolean; super?: boolean
  isMac ? key.meta || key.super === true : key.ctrl

 /**
- * Some macOS terminals rewrite Cmd navigation/deletion into readline control keys.
- * Treat those as action shortcuts too, but only for the specific fallbacks we
- * have observed from terminals: Cmd+Left → Ctrl+A, Cmd+Right → Ctrl+E,
- * Cmd+Backspace → Ctrl+U.
+ * Accept raw Ctrl+<letter> as an action shortcut on macOS, where `isActionMod`
+ * otherwise means Cmd. Two motivations:
+ *   - Some macOS terminals rewrite Cmd navigation/deletion into readline control
+ *     keys (Cmd+Left → Ctrl+A, Cmd+Right → Ctrl+E, Cmd+Backspace → Ctrl+U).
+ *   - Ctrl+K (kill-to-end) and Ctrl+W (delete-word-back) are standard readline
+ *     bindings that users expect to work regardless of platform, even though
+ *     no terminal rewrites Cmd into them.
 */
 export const isMacActionFallback = (
  key: { ctrl: boolean; meta: boolean; super?: boolean },
  ch: string,
-  target: 'a' | 'e' | 'u'
+  target: 'a' | 'e' | 'u' | 'k' | 'w'
 ): boolean => isMac && key.ctrl && !key.meta && key.super !== true && ch.toLowerCase() === target

 /** Match action-modifier + a single character (case-insensitive). */
@@ -0,0 +1,355 @@
+import type { SubagentAggregate, SubagentNode, SubagentProgress } from '../types.js'
+
+const ROOT_KEY = '__root__'
+
+/**
+ * Reconstruct the subagent spawn tree from a flat event-ordered list.
+ *
+ * Grouping is by `parentId`; a missing `parentId` (or one pointing at an
+ * unknown subagent) is treated as a top-level spawn of the current turn.
+ * Children within a parent are sorted by `depth` then `index` — same key
+ * used in `turnController.upsertSubagent`, so render order matches spawn
+ * order regardless of network reordering of gateway events.
+ *
+ * Older gateways omit `parentId`; every subagent is then a top-level node
+ * and the tree renders flat — matching pre-observability behaviour.
+ */
+export function buildSubagentTree(items: readonly SubagentProgress[]): SubagentNode[] {
+  if (!items.length) {
+    return []
+  }
+
+  const byParent = new Map<string, SubagentProgress[]>()
+  const known = new Set<string>()
+
+  for (const item of items) {
+    known.add(item.id)
+  }
+
+  for (const item of items) {
+    const parentKey = item.parentId && known.has(item.parentId) ? item.parentId : ROOT_KEY
+    const bucket = byParent.get(parentKey) ?? []
+    bucket.push(item)
+    byParent.set(parentKey, bucket)
+  }
+
+  for (const bucket of byParent.values()) {
+    bucket.sort((a, b) => a.depth - b.depth || a.index - b.index)
+  }
+
+  const build = (item: SubagentProgress): SubagentNode => {
+    const kids = byParent.get(item.id) ?? []
+    const children = kids.map(build)
+
+    return { aggregate: aggregate(item, children), children, item }
+  }
+
+  return (byParent.get(ROOT_KEY) ?? []).map(build)
+}
+
+/**
+ * Roll up counts for a node's whole subtree.  Kept pure so the live view
+ * and the post-hoc replay can share the same renderer unchanged.
+ *
+ * `hotness` = tools per second across the subtree — a crude proxy for
+ * "how much work is happening in this branch".  Used to colour tree rails
+ * in the overlay / inline view so the eye spots the expensive branch.
+ */
+export function aggregate(item: SubagentProgress, children: readonly SubagentNode[]): SubagentAggregate {
+  let totalTools = item.toolCount ?? 0
+  let totalDuration = item.durationSeconds ?? 0
+  let descendantCount = 0
+  let activeCount = isRunning(item) ? 1 : 0
+  let maxDepthFromHere = 0
+  let inputTokens = item.inputTokens ?? 0
+  let outputTokens = item.outputTokens ?? 0
+  let costUsd = item.costUsd ?? 0
+  let filesTouched = (item.filesRead?.length ?? 0) + (item.filesWritten?.length ?? 0)
+
+  for (const child of children) {
+    totalTools += child.aggregate.totalTools
+    totalDuration += child.aggregate.totalDuration
+    descendantCount += child.aggregate.descendantCount + 1
+    activeCount += child.aggregate.activeCount
+    maxDepthFromHere = Math.max(maxDepthFromHere, child.aggregate.maxDepthFromHere + 1)
+    inputTokens += child.aggregate.inputTokens
+    outputTokens += child.aggregate.outputTokens
+    costUsd += child.aggregate.costUsd
+    filesTouched += child.aggregate.filesTouched
+  }
+
+  const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
+
+  return {
+    activeCount,
+    costUsd,
+    descendantCount,
+    filesTouched,
+    hotness,
+    inputTokens,
+    maxDepthFromHere,
+    outputTokens,
+    totalDuration,
+    totalTools
+  }
+}
+
+/**
+ * Count of subagents at each depth level, indexed by depth (0 = top level).
+ * Drives the inline sparkline (`▁▃▇▅`) and the status-bar HUD.
+ */
+export function widthByDepth(tree: readonly SubagentNode[]): number[] {
+  const widths: number[] = []
+
+  const walk = (nodes: readonly SubagentNode[], depth: number) => {
+    if (!nodes.length) {
+      return
+    }
+
+    widths[depth] = (widths[depth] ?? 0) + nodes.length
+
+    for (const node of nodes) {
+      walk(node.children, depth + 1)
+    }
+  }
+
+  walk(tree, 0)
+
+  return widths
+}
+
+/**
+ * Flat totals across the full tree — feeds the summary chip header.
+ */
+export function treeTotals(tree: readonly SubagentNode[]): SubagentAggregate {
+  let totalTools = 0
+  let totalDuration = 0
+  let descendantCount = 0
+  let activeCount = 0
+  let maxDepthFromHere = 0
+  let inputTokens = 0
+  let outputTokens = 0
+  let costUsd = 0
+  let filesTouched = 0
+
+  for (const node of tree) {
+    totalTools += node.aggregate.totalTools
+    totalDuration += node.aggregate.totalDuration
+    descendantCount += node.aggregate.descendantCount + 1
+    activeCount += node.aggregate.activeCount
+    maxDepthFromHere = Math.max(maxDepthFromHere, node.aggregate.maxDepthFromHere + 1)
+    inputTokens += node.aggregate.inputTokens
+    outputTokens += node.aggregate.outputTokens
+    costUsd += node.aggregate.costUsd
+    filesTouched += node.aggregate.filesTouched
+  }
+
+  const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
+
+  return {
+    activeCount,
+    costUsd,
+    descendantCount,
+    filesTouched,
+    hotness,
+    inputTokens,
+    maxDepthFromHere,
+    outputTokens,
+    totalDuration,
+    totalTools
+  }
+}
+
+/**
+ * Flatten the tree into visit order — useful for keyboard navigation and
+ * for "kill subtree" walks that fire one RPC per descendant.
+ */
+export function flattenTree(tree: readonly SubagentNode[]): SubagentNode[] {
+  const out: SubagentNode[] = []
+
+  const walk = (nodes: readonly SubagentNode[]) => {
+    for (const node of nodes) {
+      out.push(node)
+      walk(node.children)
+    }
+  }
+
+  walk(tree)
+
+  return out
+}
+
+/**
+ * Collect every descendant's id for a given node (excluding the node itself).
+ */
+export function descendantIds(node: SubagentNode): string[] {
+  const ids: string[] = []
+
+  const walk = (children: readonly SubagentNode[]) => {
+    for (const child of children) {
+      ids.push(child.item.id)
+      walk(child.children)
+    }
+  }
+
+  walk(node.children)
+
+  return ids
+}
+
+export function isRunning(item: Pick<SubagentProgress, 'status'>): boolean {
+  return item.status === 'running' || item.status === 'queued'
+}
+
+const SPARK_RAMP = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'] as const
+
+/**
+ * 8-step unicode bar sparkline from a positive-integer array.  Zeroes render
+ * as spaces so a sparse tree doesn't read as equal activity at every depth.
+ */
+export function sparkline(values: readonly number[]): string {
+  if (!values.length) {
+    return ''
+  }
+
+  const max = Math.max(...values)
+
+  if (max <= 0) {
+    return ' '.repeat(values.length)
+  }
+
+  return values
+    .map(v => {
+      if (v <= 0) {
+        return ' '
+      }
+
+      const idx = Math.min(SPARK_RAMP.length - 1, Math.max(0, Math.ceil((v / max) * (SPARK_RAMP.length - 1))))
+
+      return SPARK_RAMP[idx]
+    })
+    .join('')
+}
+
+/**
+ * Format totals into a compact one-line summary: `d2 · 7 agents · 124 tools · 2m 14s`
+ */
+export function formatSummary(totals: SubagentAggregate): string {
+  const pieces = [`d${Math.max(0, totals.maxDepthFromHere)}`]
+  pieces.push(`${totals.descendantCount} agent${totals.descendantCount === 1 ? '' : 's'}`)
+
+  if (totals.totalTools > 0) {
+    pieces.push(`${totals.totalTools} tool${totals.totalTools === 1 ? '' : 's'}`)
+  }
+
+  if (totals.totalDuration > 0) {
+    pieces.push(fmtDuration(totals.totalDuration))
+  }
+
+  const tokens = totals.inputTokens + totals.outputTokens
+
+  if (tokens > 0) {
+    pieces.push(`${fmtTokens(tokens)} tok`)
+  }
+
+  if (totals.costUsd > 0) {
+    pieces.push(fmtCost(totals.costUsd))
+  }
+
+  if (totals.activeCount > 0) {
+    pieces.push(`⚡${totals.activeCount}`)
+  }
+
+  return pieces.join(' · ')
+}
+
+/** Compact dollar amount: `$0.02`, `$1.34`, `$12.4` — never > 5 chars beyond the `$`. */
+export function fmtCost(usd: number): string {
+  if (!Number.isFinite(usd) || usd <= 0) {
+    return ''
+  }
+
+  if (usd < 0.01) {
+    return '<$0.01'
+  }
+
+  if (usd < 10) {
+    return `$${usd.toFixed(2)}`
+  }
+
+  return `$${usd.toFixed(1)}`
+}
+
+/** Compact token count: `12k`, `1.2k`, `542`. */
+export function fmtTokens(n: number): string {
+  if (!Number.isFinite(n) || n <= 0) {
+    return '0'
+  }
+
+  if (n < 1000) {
+    return String(Math.round(n))
+  }
+
+  if (n < 10_000) {
+    return `${(n / 1000).toFixed(1)}k`
+  }
+
+  return `${Math.round(n / 1000)}k`
+}
+
+/**
+ * `Ns` / `Nm` / `Nm Ss` formatter for seconds.  Shared with the agents
+ * overlay so the timeline + list + summary all speak the same dialect.
+ */
+export function fmtDuration(seconds: number): string {
+  if (seconds < 60) {
+    return `${Math.max(0, Math.round(seconds))}s`
+  }
+
+  const m = Math.floor(seconds / 60)
+  const s = Math.round(seconds - m * 60)
+
+  return s === 0 ? `${m}m` : `${m}m ${s}s`
+}
+
+/**
+ * A subagent is top-level if it has no `parentId`, or its parent isn't in
+ * the same snapshot (orphaned by a pruned mid-flight root).  Same rule
+ * `buildSubagentTree` uses — keep call sites consistent across the live
+ * view, disk label, and diff pane.
+ */
+export function topLevelSubagents(items: readonly SubagentProgress[]): SubagentProgress[] {
+  const ids = new Set(items.map(s => s.id))
+
+  return items.filter(s => !s.parentId || !ids.has(s.parentId))
+}
+
+/**
+ * Normalize a node's hotness into a palette index 0..N-1 where N = buckets.
+ * Higher hotness = "hotter" colour. Normalized against the tree's peak hotness
+ * so a uniformly slow tree still shows gradient across its busiest branches.
+ */
+export function hotnessBucket(hotness: number, peakHotness: number, buckets: number): number {
+  if (!Number.isFinite(hotness) || hotness <= 0 || peakHotness <= 0 || buckets <= 1) {
+    return 0
+  }
+
+  const ratio = Math.min(1, hotness / peakHotness)
+
+  return Math.min(buckets - 1, Math.max(0, Math.round(ratio * (buckets - 1))))
+}
+
+export function peakHotness(tree: readonly SubagentNode[]): number {
+  let peak = 0
+
+  const walk = (nodes: readonly SubagentNode[]) => {
+    for (const node of nodes) {
+      peak = Math.max(peak, node.aggregate.hotness)
+      walk(node.children)
+    }
+  }
+
+  walk(tree)
+
+  return peak
+}
@@ -94,7 +94,12 @@ export const DARK_THEME: Theme = {
    amber: '#FFBF00',
    bronze: '#CD7F32',
    cornsilk: '#FFF8DC',
-    dim: '#B8860B',
+    // Bumped from the old `#B8860B` darkgoldenrod (~53% luminance) which
+    // read as barely-visible on dark terminals for long body text.  The
+    // new value sits ~60% luminance — readable without losing the "muted /
+    // secondary" semantic.  Field labels still use `label` (65%) which
+    // stays brighter so hierarchy holds.
+    dim: '#CC9B1F',
    completionBg: '#FFFFFF',
    completionCurrentBg: mix('#FFFFFF', '#FFBF00', 0.25),

@@ -104,8 +109,11 @@ export const DARK_THEME: Theme = {
    warn: '#ffa726',

    prompt: '#FFF8DC',
-    sessionLabel: '#B8860B',
-    sessionBorder: '#B8860B',
+    // sessionLabel/sessionBorder intentionally track the `dim` value — they
+    // are "same role, same colour" by design.  fromSkin's banner_dim fallback
+    // relies on this pairing (#11300).
+    sessionLabel: '#CC9B1F',
+    sessionBorder: '#CC9B1F',

    statusBg: '#1a1a2e',
    statusFg: '#C0C0C0',
@@ -12,16 +12,72 @@ export interface ActivityItem {
 }

 export interface SubagentProgress {
+  apiCalls?: number
+  costUsd?: number
+  depth: number
  durationSeconds?: number
+  filesRead?: string[]
+  filesWritten?: string[]
  goal: string
  id: string
  index: number
+  inputTokens?: number
+  iteration?: number
+  model?: string
  notes: string[]
-  status: 'completed' | 'failed' | 'interrupted' | 'running'
+  outputTail?: SubagentOutputEntry[]
+  outputTokens?: number
+  parentId: null | string
+  reasoningTokens?: number
+  startedAt?: number
+  status: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
  summary?: string
  taskCount: number
  thinking: string[]
+  toolCount: number
  tools: string[]
+  toolsets?: string[]
+}
+
+export interface SubagentOutputEntry {
+  isError: boolean
+  preview: string
+  tool: string
+}
+
+export interface SubagentNode {
+  aggregate: SubagentAggregate
+  children: SubagentNode[]
+  item: SubagentProgress
+}
+
+export interface SubagentAggregate {
+  activeCount: number
+  costUsd: number
+  descendantCount: number
+  filesTouched: number
+  hotness: number
+  inputTokens: number
+  maxDepthFromHere: number
+  outputTokens: number
+  totalDuration: number
+  totalTools: number
+}
+
+export interface DelegationStatus {
+  active: {
+    depth?: number
+    goal?: string
+    model?: null | string
+    parent_id?: null | string
+    started_at?: number
+    status?: string
+    subagent_id?: string
+    tool_count?: number
+  }[]
+  max_concurrent_children?: number
+  max_spawn_depth?: number
+  paused: boolean
 }

 export interface ApprovalReq {