feat(gui): make desktop setup flow real and testable

Add a GUI-first setup gate and runtime state API so desktop onboarding is safe, iterative, and works with isolated fresh-mode installs. Scaffold and wire the desktop shell/runtime pieces so this branch runs end-to-end without disturbing existing user installs.
Merge pull request #15766 from NousResearch/bb/tui-ssh-copy
2026-04-25 19:48:02 -05:00 · 2026-04-25 15:33:17 -05:00 · 2026-04-25 15:32:45 -05:00 · 2026-04-25 15:26:51 -05:00 · 2026-04-25 15:21:26 -05:00 · 2026-04-25 15:17:36 -05:00
278 changed files with 31523 additions and 10070 deletions
@@ -52,10 +52,6 @@ ignored/
 .worktrees/
 environments/benchmarks/evals/

-# Compression eval run outputs (harness lives in scripts/compression_eval/)
-scripts/compression_eval/results/*
-!scripts/compression_eval/results/.gitkeep
-
 # Web UI build output
 hermes_cli/web_dist/

@@ -240,6 +240,19 @@ npm run fmt       # prettier
 npm test          # vitest
 ```

+### TUI in the Dashboard (`hermes dashboard` → `/chat`)
+
+The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
+
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
+- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
+- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
+
+**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
+
+**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
+
 ---

 ## Adding New Tools
@@ -986,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
 # ---------------------------------------------------------------------------


+def _is_bedrock_model_id(model: str) -> bool:
+    """Detect AWS Bedrock model IDs that use dots as namespace separators.
+
+    Bedrock model IDs come in two forms:
+    - Bare:    ``anthropic.claude-opus-4-7``
+    - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
+
+    In both cases the dots separate namespace components, not version
+    numbers, and must be preserved verbatim for the Bedrock API.
+    """
+    lower = model.lower()
+    # Regional inference-profile prefixes
+    if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
+        return True
+    # Bare Bedrock model IDs: provider.model-family
+    if lower.startswith("anthropic."):
+        return True
+    return False
+
+
 def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    """Normalize a model name for the Anthropic API.

@@ -993,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    - Converts dots to hyphens in version numbers (OpenRouter uses dots,
      Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
      preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
+    - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
+      regional inference profiles (``us.anthropic.claude-*``) whose dots
+      are namespace separators, not version separators.
    """
    lower = model.lower()
    if lower.startswith("anthropic/"):
        model = model[len("anthropic/"):]
    if not preserve_dots:
+        # Bedrock model IDs use dots as namespace separators
+        # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
+        # These must not be converted to hyphens.  See issue #12295.
+        if _is_bedrock_model_id(model):
+            return model
        # OpenRouter uses dots for version separators (claude-opus-4.6),
        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
        model = model.replace(".", "-")
@@ -1652,9 +1680,9 @@ def build_anthropic_kwargs(

    # ── Strip sampling params on 4.7+ ─────────────────────────────────
    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
-    # Callers (auxiliary_client, flush_memories, etc.) may set these for
-    # older models; drop them here as a safety net so upstream 4.6 → 4.7
-    # migrations don't require coordinated edits everywhere.
+    # Callers (auxiliary_client, etc.) may set these for older models;
+    # drop them here as a safety net so upstream 4.6 → 4.7 migrations
+    # don't require coordinated edits everywhere.
    if _forbids_sampling_params(model):
        for _sampling_key in ("temperature", "top_p", "top_k"):
            kwargs.pop(_sampling_key, None)
@@ -390,7 +390,7 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

-        # Tools support for flush_memories and similar callers
+        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
            converted = []
@@ -1349,6 +1349,49 @@ def _is_auth_error(exc: Exception) -> bool:
    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()


+def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
+    """Detect provider 400s for an unsupported request parameter.
+
+    Different OpenAI-compatible endpoints phrase the same class of error a few
+    ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
+    ``param`` field, ``X is not supported``, ``unknown parameter: X``,
+    ``unrecognized request argument: X``.  We match on both the parameter
+    name and a generic "unsupported/unknown/unrecognized parameter" marker so
+    call sites can reactively retry without the offending key instead of
+    surfacing a noisy auxiliary failure.
+
+    Generalizes the temperature-specific detector that originally shipped
+    with PR #15621 so the same retry strategy can cover ``max_tokens``,
+    ``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
+    for the generalization pattern.
+    """
+    param_lower = (param or "").lower()
+    if not param_lower:
+        return False
+    err_lower = str(exc).lower()
+    if param_lower not in err_lower:
+        return False
+    return any(marker in err_lower for marker in (
+        "unsupported parameter",
+        "unsupported_parameter",
+        "not supported",
+        "does not support",
+        "unknown parameter",
+        "unrecognized request argument",
+        "unrecognized parameter",
+        "invalid parameter",
+    ))
+
+
+def _is_unsupported_temperature_error(exc: Exception) -> bool:
+    """Back-compat wrapper: detect API errors where the model rejects ``temperature``.
+
+    Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
+    public symbol because existing tests and call sites import it by name.
+    """
+    return _is_unsupported_parameter_error(exc, "temperature")
+
+
 def _evict_cached_clients(provider: str) -> None:
    """Drop cached auxiliary clients for a provider so fresh creds are used."""
    normalized = _normalize_aux_provider(provider)
@@ -1993,6 +2036,39 @@ def resolve_provider_client(
                       "directly supported", provider)
        return None, None

+    elif pconfig.auth_type == "aws_sdk":
+        # AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
+        # boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
+        try:
+            from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
+            from agent.anthropic_adapter import build_anthropic_bedrock_client
+        except ImportError:
+            logger.warning("resolve_provider_client: bedrock requested but "
+                           "boto3 or anthropic SDK not installed")
+            return None, None
+
+        if not has_aws_credentials():
+            logger.debug("resolve_provider_client: bedrock requested but "
+                         "no AWS credentials found")
+            return None, None
+
+        region = resolve_bedrock_region()
+        default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
+        final_model = _normalize_resolved_model(model or default_model, provider)
+        try:
+            real_client = build_anthropic_bedrock_client(region)
+        except ImportError as exc:
+            logger.warning("resolve_provider_client: cannot create Bedrock "
+                           "client: %s", exc)
+            return None, None
+        client = AnthropicAuxiliaryClient(
+            real_client, final_model, api_key="aws-sdk",
+            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
+        )
+        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
+        return (_to_async_client(client, final_model) if async_mode
+                else (client, final_model))
+
    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
        # OAuth providers — route through their specific try functions
        if provider == "nous":
@@ -2727,8 +2803,8 @@ def _build_call_kwargs(
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
-    # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
-    # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
+    # drop here so auxiliary callers that hardcode temperature (e.g. 0 on
+    # structured-JSON extraction) don't 400 the moment
    # the aux model is flipped to 4.7.
    if temperature is not None:
        from agent.anthropic_adapter import _forbids_sampling_params
@@ -2816,7 +2892,7 @@ def call_llm(

    Args:
        task: Auxiliary task name ("compression", "vision", "web_extract",
-              "session_search", "skills_hub", "mcp", "flush_memories").
+              "session_search", "skills_hub", "mcp", "title_generation").
              Reads provider:model from config/env. Ignored if provider is set.
        provider: Explicit provider override.
        model: Explicit model override.
@@ -2919,13 +2995,45 @@ def call_llm(
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

-    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
+    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
+    # then payment fallback.
    try:
        return _validate_llm_response(
            client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s: provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                # If retry still fails, fall through to the max_tokens /
+                # payment / auth chains below using the temperature-stripped
+                # kwargs.  Re-raise only if the retry hit something those
+                # chains won't handle.
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -3188,8 +3296,35 @@ async def async_call_llm(
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s (async): provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    await client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -87,6 +87,114 @@ def reset_client_cache():
    _bedrock_control_client_cache.clear()


+def invalidate_runtime_client(region: str) -> bool:
+    """Evict the cached ``bedrock-runtime`` client for a single region.
+
+    Per-region counterpart to :func:`reset_client_cache`. Used by the converse
+    call wrappers to discard clients whose underlying HTTP connection has
+    gone stale, so the next call allocates a fresh client (with a fresh
+    connection pool) instead of reusing a dead socket.
+
+    Returns True if a cached entry was evicted, False if the region was not
+    cached.
+    """
+    existed = region in _bedrock_runtime_client_cache
+    _bedrock_runtime_client_cache.pop(region, None)
+    return existed
+
+
+# ---------------------------------------------------------------------------
+# Stale-connection detection
+# ---------------------------------------------------------------------------
+#
+# boto3 caches its HTTPS connection pool inside the client object. When a
+# pooled connection is killed out from under us (NAT timeout, VPN flap,
+# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
+# one of a handful of low-level exceptions — most commonly
+# ``botocore.exceptions.ConnectionClosedError`` or
+# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
+# ``assert`` in a couple of paths (connection pool state checks, chunked
+# response readers) which bubbles up as a bare ``AssertionError`` with an
+# empty ``str(exc)``.
+#
+# In all of these cases the client is the problem, not the request: retrying
+# with the same cached client reproduces the failure until the process
+# restarts. The fix is to evict the region's cached client so the next
+# attempt builds a new one.
+
+_STALE_LIB_MODULE_PREFIXES = (
+    "urllib3.",
+    "botocore.",
+    "boto3.",
+)
+
+
+def _traceback_frames_modules(exc: BaseException):
+    """Yield ``__name__``-style module strings for each frame in exc's traceback."""
+    tb = getattr(exc, "__traceback__", None)
+    while tb is not None:
+        frame = tb.tb_frame
+        module = frame.f_globals.get("__name__", "")
+        yield module or ""
+        tb = tb.tb_next
+
+
+def is_stale_connection_error(exc: BaseException) -> bool:
+    """Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
+
+    Matches:
+      * ``botocore.exceptions.ConnectionError`` and subclasses
+        (``ConnectionClosedError``, ``EndpointConnectionError``,
+        ``ReadTimeoutError``, ``ConnectTimeoutError``).
+      * ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
+        ``ConnectionError`` (best-effort import — urllib3 is a transitive
+        dependency of botocore so it is always available in practice).
+      * Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
+        or boto3. These are internal-invariant failures (typically triggered
+        by corrupted connection-pool state after a dropped socket) and are
+        recoverable by swapping the client.
+
+    Non-library ``AssertionError``s (from application code or tests) are
+    intentionally not matched — only library-internal asserts signal stale
+    connection state.
+    """
+    # botocore: the canonical signal — HTTPClientError is the umbrella for
+    # ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
+    # ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
+    # the same family via a different branch of the hierarchy.
+    try:
+        from botocore.exceptions import (
+            ConnectionError as BotoConnectionError,
+            HTTPClientError,
+        )
+        botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
+    except ImportError:  # pragma: no cover — botocore always present with boto3
+        botocore_errors = ()
+    if botocore_errors and isinstance(exc, botocore_errors):
+        return True
+
+    # urllib3: low-level transport failures
+    try:
+        from urllib3.exceptions import (
+            ProtocolError,
+            NewConnectionError,
+            ConnectionError as Urllib3ConnectionError,
+        )
+        urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
+    except ImportError:  # pragma: no cover
+        urllib3_errors = ()
+    if urllib3_errors and isinstance(exc, urllib3_errors):
+        return True
+
+    # Library-internal AssertionError (urllib3 / botocore / boto3)
+    if isinstance(exc, AssertionError):
+        for module in _traceback_frames_modules(exc):
+            if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
+                return True
+
+    return False
+
+
 # ---------------------------------------------------------------------------
 # AWS credential detection
 # ---------------------------------------------------------------------------
@@ -787,7 +895,17 @@ def call_converse(
        guardrail_config=guardrail_config,
    )

-    response = client.converse(**kwargs)
+    try:
+        response = client.converse(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse(region=%s, model=%s): "
+                "%s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_response(response)


@@ -819,7 +937,17 @@ def call_converse_stream(
        guardrail_config=guardrail_config,
    )

-    response = client.converse_stream(**kwargs)
+    try:
+        response = client.converse_stream(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse_stream(region=%s, "
+                "model=%s): %s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_stream_events(response)


@@ -23,26 +23,52 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
 logger = logging.getLogger(__name__)


+# Matches Codex/Harmony tool-call serialization that occasionally leaks into
+# assistant-message content when the model fails to emit a structured
+# ``function_call`` item.  Accepts the common forms:
+#
+#   to=functions.exec_command
+#   assistant to=functions.exec_command
+#   <|channel|>commentary to=functions.exec_command
+#
+# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
+# Harmony channel prefix varies by degeneration mode.  Case-insensitive to
+# cover lowercase/uppercase ``assistant`` variants.
+_TOOL_CALL_LEAK_PATTERN = re.compile(
+    r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
+    re.IGNORECASE,
+)
+
+
 # ---------------------------------------------------------------------------
 # Multimodal content helpers
 # ---------------------------------------------------------------------------

-def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
    """Convert chat-style multimodal content to Responses API input parts.

    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+    Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
+
+    The ``role`` parameter controls the text content type:
+    - ``"user"`` (default) → ``"input_text"``
+    - ``"assistant"`` → ``"output_text"``
+
+    The Responses API rejects ``input_text`` inside assistant messages and
+    ``output_text`` inside user messages, so callers MUST pass the correct
+    role for the message being converted.

    Returns an empty list when ``content`` is not a list or contains no
    recognized parts — callers fall back to the string path.
    """
+    text_type = "output_text" if role == "assistant" else "input_text"
    if not isinstance(content, list):
        return []
    converted: List[Dict[str, Any]] = []
    for part in content:
        if isinstance(part, str):
            if part:
-                converted.append({"type": "input_text", "text": part})
+                converted.append({"type": text_type, "text": part})
            continue
        if not isinstance(part, dict):
            continue
@@ -50,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
        if ptype in {"text", "input_text", "output_text"}:
            text = part.get("text")
            if isinstance(text, str) and text:
-                converted.append({"type": "input_text", "text": text})
+                converted.append({"type": text_type, "text": text})
            continue
        if ptype in {"image_url", "input_image"}:
            image_ref = part.get("image_url")
@@ -216,9 +242,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
        if role in {"user", "assistant"}:
            content = msg.get("content", "")
            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content)
+                content_parts = _chat_content_to_responses_parts(content, role=role)
+                text_type = "output_text" if role == "assistant" else "input_text"
                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                    p.get("text", "") for p in content_parts if p.get("type") == text_type
                )
            else:
                content_parts = []
@@ -412,13 +439,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                content = ""
            if isinstance(content, list):
                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``input_image``).
-                # Validate each part and pass through.
+                # is already in Responses format (``input_text`` / ``output_text``
+                # / ``input_image``).  Validate each part and pass through.
+                # Use the correct text type for the role — ``output_text`` for
+                # assistant messages, ``input_text`` for user messages.
+                text_type = "output_text" if role == "assistant" else "input_text"
                validated: List[Dict[str, Any]] = []
                for part_idx, part in enumerate(content):
                    if isinstance(part, str):
                        if part:
-                            validated.append({"type": "input_text", "text": part})
+                            validated.append({"type": text_type, "text": part})
                        continue
                    if not isinstance(part, dict):
                        raise ValueError(
@@ -429,7 +459,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                        text = part.get("text", "")
                        if not isinstance(text, str):
                            text = str(text or "")
-                        validated.append({"type": "input_text", "text": text})
+                        validated.append({"type": text_type, "text": text})
                    elif ptype in {"input_image", "image_url"}:
                        image_ref = part.get("image_url", "")
                        detail = part.get("detail")
@@ -787,6 +817,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        if isinstance(out_text, str):
            final_text = out_text.strip()

+    # ── Tool-call leak recovery ──────────────────────────────────
+    # gpt-5.x on the Codex Responses API sometimes degenerates and emits
+    # what should be a structured `function_call` item as plain assistant
+    # text using the Harmony/Codex serialization (``to=functions.foo
+    # {json}`` or ``assistant to=functions.foo {json}``). The model
+    # intended to call a tool, but the intent never made it into
+    # ``response.output`` as a ``function_call`` item, so ``tool_calls``
+    # is empty here. If we pass this through, the parent sees a
+    # confident-looking summary with no audit trail (empty ``tool_trace``)
+    # and no tools actually ran — the Taiwan-embassy-email incident.
+    #
+    # Detection: leaked tokens always contain ``to=functions.<name>`` and
+    # the assistant message has no real tool calls. Treat it as incomplete
+    # so the existing Codex-incomplete continuation path (3 retries,
+    # handled in run_agent.py) gets a chance to re-elicit a proper
+    # ``function_call`` item. The existing loop already handles message
+    # append, dedup, and retry budget.
+    leaked_tool_call_text = False
+    if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
+        leaked_tool_call_text = True
+        logger.warning(
+            "Codex response contains leaked tool-call text in assistant content "
+            "(no structured function_call items). Treating as incomplete so the "
+            "continuation path can re-elicit a proper tool call. Leaked snippet: %r",
+            final_text[:300],
+        )
+        # Clear the text so downstream code doesn't surface the garbage as
+        # a summary. The encrypted reasoning items (if any) are preserved
+        # so the model keeps its chain-of-thought on the retry.
+        final_text = ""
+
    assistant_message = SimpleNamespace(
        content=final_text,
        tool_calls=tool_calls,
@@ -798,6 +859,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:

    if tool_calls:
        finish_reason = "tool_calls"
+    elif leaked_tool_call_text:
+        finish_reason = "incomplete"
    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif reasoning_items_raw and not final_text:
@@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_summary_error = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -317,6 +318,13 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        # Recalculate token budgets for the new context length so the
+        # compressor stays calibrated after a model switch (e.g. 200K → 32K).
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
+        self.tail_token_budget = target_tokens
+        self.max_summary_tokens = min(
+            int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+        )

    def __init__(
        self,
@@ -389,6 +397,7 @@ class ContextCompressor(ContextEngine):
        self._last_compression_savings_pct: float = 100.0
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
+        self._last_summary_error: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -812,10 +821,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
            self._summary_model_fallen_back = False
+            self._last_summary_error = None
            return self._with_summary_prefix(summary)
        except RuntimeError:
            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            self._last_summary_error = "no auxiliary LLM provider configured"
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
                            "for %d seconds.",
@@ -853,6 +864,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
+            err_text = str(e).strip() or e.__class__.__name__
+            if len(err_text) > 220:
+                err_text = err_text[:217].rstrip() + "..."
+            self._last_summary_error = err_text
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
@@ -31,6 +31,7 @@ from __future__ import annotations
 import json
 import logging
 import re
+import inspect
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
@@ -312,7 +313,39 @@ class MemoryManager:
                )
        return "\n\n".join(parts)

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    @staticmethod
+    def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
+        """Return how to pass metadata to a provider's memory-write hook."""
+        try:
+            signature = inspect.signature(provider.on_memory_write)
+        except (TypeError, ValueError):
+            return "keyword"
+
+        params = list(signature.parameters.values())
+        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
+            return "keyword"
+        if "metadata" in signature.parameters:
+            return "keyword"
+
+        accepted = [
+            p for p in params
+            if p.kind in (
+                inspect.Parameter.POSITIONAL_ONLY,
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                inspect.Parameter.KEYWORD_ONLY,
+            )
+        ]
+        if len(accepted) >= 4:
+            return "positional"
+        return "legacy"
+
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Notify external providers when the built-in memory tool writes.

        Skips the builtin provider itself (it's the source of the write).
@@ -321,7 +354,15 @@ class MemoryManager:
            if provider.name == "builtin":
                continue
            try:
-                provider.on_memory_write(action, target, content)
+                metadata_mode = self._provider_memory_write_metadata_mode(provider)
+                if metadata_mode == "keyword":
+                    provider.on_memory_write(
+                        action, target, content, metadata=dict(metadata or {})
+                    )
+                elif metadata_mode == "positional":
+                    provider.on_memory_write(action, target, content, dict(metadata or {}))
+                else:
+                    provider.on_memory_write(action, target, content)
            except Exception as e:
                logger.debug(
                    "Memory provider '%s' on_memory_write failed: %s",
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
  on_pre_compress(messages) -> str       — extract before context compression
-  on_memory_write(action, target, content) — mirror built-in memory writes
+  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
 """

@@ -34,7 +34,7 @@ from __future__ import annotations

 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional

 logger = logging.getLogger(__name__)

@@ -220,12 +220,21 @@ class MemoryProvider(ABC):
          should all have ``env_var`` set and this method stays no-op).
        """

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Called when the built-in memory tool writes an entry.

        action: 'add', 'replace', or 'remove'
        target: 'memory' or 'user'
        content: the entry content
+        metadata: structured provenance for the write, when available. Common
+          keys include ``write_origin``, ``execution_context``, ``session_id``,
+          ``parent_session_id``, ``platform``, and ``tool_name``.

        Use to mirror built-in memory writes to your backend.
        """
@@ -1199,6 +1199,7 @@ def get_model_context_length(
    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing)
+    1b. AWS Bedrock static table (must precede custom-endpoint probe)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
@@ -1237,6 +1238,26 @@ def get_model_context_length(
            else:
                return cached

+    # 1b. AWS Bedrock — use static context length table.
+    # Bedrock's ListFoundationModels API doesn't expose context window sizes,
+    # so we maintain a curated table in bedrock_adapter.py that reflects
+    # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
+    # Anthropic API).  This must run BEFORE the custom-endpoint probe at
+    # step 2 — bedrock-runtime.<region>.amazonaws.com is not in
+    # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
+    # fail the /models probe (Bedrock doesn't expose that shape), and fall
+    # back to the 128K default before reaching the original step 4b branch.
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
+        try:
+            from agent.bedrock_adapter import get_bedrock_context_length
+            return get_bedrock_context_length(model)
+        except ImportError:
+            pass  # boto3 not installed — fall through to generic resolution
+
    # 2. Active endpoint metadata for truly custom/unknown endpoints.
    # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
    # /models endpoint may report a provider-imposed limit (e.g. Copilot
@@ -1282,19 +1303,7 @@ def get_model_context_length(
        if ctx:
            return ctx

-    # 4b. AWS Bedrock — use static context length table.
-    # Bedrock's ListFoundationModels doesn't expose context window sizes,
-    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
-        try:
-            from agent.bedrock_adapter import get_bedrock_context_length
-            return get_bedrock_context_length(model)
-        except ImportError:
-            pass  # boto3 not installed — fall through to generic resolution
+    # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)

    # 5. Provider-aware lookups (before generic OpenRouter cache)
    # These are provider-specific and take priority over the generic OR cache,
@@ -7,11 +7,15 @@ can invoke skills via /skill-name commands.
 import json
 import logging
 import re
-import subprocess
 from pathlib import Path
 from typing import Any, Dict, Optional

 from hermes_constants import display_hermes_home
+from agent.skill_preprocessing import (
+    expand_inline_shell as _expand_inline_shell,
+    load_skills_config as _load_skills_config,
+    substitute_template_vars as _substitute_template_vars,
+)

 logger = logging.getLogger(__name__)

@@ -20,111 +24,6 @@ _skill_commands: Dict[str, Dict[str, Any]] = {}
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
-# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
-# left as-is so the user can debug them.
-_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
-
-# Matches inline shell snippets like:  !`date +%Y-%m-%d`
-# Non-greedy, single-line only — no newlines inside the backticks.
-_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
-
-# Cap inline-shell output so a runaway command can't blow out the context.
-_INLINE_SHELL_MAX_OUTPUT = 4000
-
-
-def _load_skills_config() -> dict:
-    """Load the ``skills`` section of config.yaml (best-effort)."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config() or {}
-        skills_cfg = cfg.get("skills")
-        if isinstance(skills_cfg, dict):
-            return skills_cfg
-    except Exception:
-        logger.debug("Could not read skills config", exc_info=True)
-    return {}
-
-
-def _substitute_template_vars(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None,
-) -> str:
-    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
-
-    Only substitutes tokens for which a concrete value is available —
-    unresolved tokens are left in place so the author can spot them.
-    """
-    if not content:
-        return content
-
-    skill_dir_str = str(skill_dir) if skill_dir else None
-
-    def _replace(match: re.Match) -> str:
-        token = match.group(1)
-        if token == "HERMES_SKILL_DIR" and skill_dir_str:
-            return skill_dir_str
-        if token == "HERMES_SESSION_ID" and session_id:
-            return str(session_id)
-        return match.group(0)
-
-    return _SKILL_TEMPLATE_RE.sub(_replace, content)
-
-
-def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
-    """Execute a single inline-shell snippet and return its stdout (trimmed).
-
-    Failures return a short ``[inline-shell error: ...]`` marker instead of
-    raising, so one bad snippet can't wreck the whole skill message.
-    """
-    try:
-        completed = subprocess.run(
-            ["bash", "-c", command],
-            cwd=str(cwd) if cwd else None,
-            capture_output=True,
-            text=True,
-            timeout=max(1, int(timeout)),
-            check=False,
-        )
-    except subprocess.TimeoutExpired:
-        return f"[inline-shell timeout after {timeout}s: {command}]"
-    except FileNotFoundError:
-        return f"[inline-shell error: bash not found]"
-    except Exception as exc:
-        return f"[inline-shell error: {exc}]"
-
-    output = (completed.stdout or "").rstrip("\n")
-    if not output and completed.stderr:
-        output = completed.stderr.rstrip("\n")
-    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
-        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
-    return output
-
-
-def _expand_inline_shell(
-    content: str,
-    skill_dir: Path | None,
-    timeout: int,
-) -> str:
-    """Replace every !`cmd` snippet in ``content`` with its stdout.
-
-    Runs each snippet with the skill directory as CWD so relative paths in
-    the snippet work the way the author expects.
-    """
-    if "!`" not in content:
-        return content
-
-    def _replace(match: re.Match) -> str:
-        cmd = match.group(1).strip()
-        if not cmd:
-            return ""
-        return _run_inline_shell(cmd, skill_dir, timeout)
-
-    return _INLINE_SHELL_RE.sub(_replace, content)
-
-
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
    raw_identifier = (skill_identifier or "").strip()
@@ -143,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
        else:
            normalized = raw_identifier.lstrip("/")

-        loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
+        loaded_skill = json.loads(
+            skill_view(normalized, task_id=task_id, preprocess=False)
+        )
    except Exception:
        return None

@@ -0,0 +1,131 @@
+"""Shared SKILL.md preprocessing helpers."""
+
+import logging
+import re
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only -- no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available --
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return "[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
+    return output
+
+
+def expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+
+
+def preprocess_skill_content(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None = None,
+    skills_cfg: dict | None = None,
+) -> str:
+    """Apply configured SKILL.md template and inline-shell preprocessing."""
+    if not content:
+        return content
+
+    cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
+    if cfg.get("template_vars", True):
+        content = substitute_template_vars(content, skill_dir, session_id)
+    if cfg.get("inline_shell", False):
+        timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
+        content = expand_inline_shell(content, skill_dir, timeout)
+    return content
@@ -0,0 +1,58 @@
+# Hermes Apps
+
+Platform apps live here. The first app is a cross-platform GUI shell around the
+existing Hermes dashboard; it should not fork chat, config, logs, or session UI.
+
+## Shape
+
+```text
+apps/
+  gui/      # cross-platform app shell: dev Chrome shell now, Tauri native next
+  shared/   # runtime bundle notes/scripts used by Windows + macOS packaging
+```
+
+## Desktop Dev
+
+The backend-only GUI mode is:
+
+```bash
+hermes dashboard --gui
+```
+
+The fast GUI shell is:
+
+```powershell
+cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
+npm run dev
+```
+
+The native Tauri shell is:
+
+```powershell
+cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
+npm run dev:tauri
+```
+
+`--gui` implies the embedded TUI; do not pass `--tui` separately for GUI mode.
+
+## MVP Boundary
+
+Included:
+
+- bundled Python runtime
+- bundled Node/TUI runtime
+- CLI install to PATH
+- profile picker and first-run setup
+- dashboard health/reconnect state
+- tray controls
+- desktop notifications
+- Windows installer
+
+Deferred:
+
+- code signing
+- native self-updater
+- store distribution
+
+For MVP updates, the desktop UI should run the existing `hermes update` flow and
+surface progress/finish notifications.
@@ -0,0 +1,102 @@
+# Hermes GUI
+
+Cross-platform GUI shell for the Hermes dashboard.
+
+## Fast Dev Shell
+
+This gets a GUI window on Windows/WSL today by launching Chrome in app mode:
+
+```bash
+cd apps/gui
+npm run dev
+```
+
+It starts `hermes dashboard --gui --no-open --port 9120`, waits for
+`/api/health`, then opens a standalone app window at `http://127.0.0.1:9120`.
+
+## Native Shell
+
+The native Tauri shell is still scaffolded:
+
+```bash
+cd apps/gui
+npm run dev:tauri
+```
+
+From Windows PowerShell on a `\\wsl$` path, use PowerShell `npm`, not
+`npm.cmd`:
+
+```powershell
+Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
+cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
+npm run dev:tauri
+```
+
+`npm.cmd` goes through `cmd.exe`, and `cmd.exe` cannot use UNC paths as the
+current directory.
+
+If `npm run` still falls through `cmd.exe`, bypass npm entirely:
+
+```powershell
+\\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1
+```
+
+The launcher builds into `%LOCALAPPDATA%\Hermes\cargo-target\gui` instead of
+`\\wsl$` because Windows Cargo incremental locks do not work reliably on UNC
+WSL filesystems.
+
+In dev, either start Hermes yourself:
+
+```bash
+hermes dashboard --gui --no-open --port 9120
+```
+
+or let the native shell start it. The tray menu owns:
+
+- Open Hermes
+- Open in Browser
+- Restart Hermes Runtime
+- Quit Hermes
+
+The native shell reuses a healthy GUI runtime when one is already running.
+Otherwise it picks the first free port from `9120..9139`, passes that port into
+the WSL/backend process, and navigates the Tauri window there. Set
+`HERMES_GUI_PORT` to force a starting port.
+
+## Fresh Install Emulation
+
+Use an isolated Hermes home without touching your real `~/.hermes`:
+
+```powershell
+powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh
+```
+
+Reset that disposable home and run again:
+
+```powershell
+powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh -ResetFresh
+```
+
+Fresh mode stores state in `%LOCALAPPDATA%\Hermes\fresh-install-home` and starts
+from port `9140` so it does not collide with your normal GUI dev session.
+
+Set `HERMES_GUI_MIN_SPLASH_MS` only when debugging the startup screen; default
+startup is instant once the backend is healthy.
+
+## Boundary
+
+GUI owns:
+
+- app shell/window
+- startup state
+- sidecar process lifecycle
+- future tray/notifications/installers
+
+Hermes owns:
+
+- dashboard UI
+- auth/session token
+- profiles/config/env
+- TUI/PTT chat bridge
+- tools/skills/gateway
+- update flow
@@ -0,0 +1,57 @@
+param(
+  [string]$Command = "dev",
+  [switch]$Fresh,
+  [switch]$ResetFresh
+)
+
+$ErrorActionPreference = "Stop"
+Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
+
+$AppRoot = Split-Path -Parent $MyInvocation.MyCommand.Path
+$Script = Join-Path $AppRoot "scripts\tauri.mjs"
+
+if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
+  throw "Windows Node.js was not found. Install it with: winget install OpenJS.NodeJS.LTS"
+}
+
+if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {
+  throw "Windows Rust was not found. Install it with: winget install Rustlang.Rustup"
+}
+
+$Tauri = Get-Command tauri -ErrorAction SilentlyContinue
+$CargoTauri = Get-Command cargo-tauri -ErrorAction SilentlyContinue
+
+if (-not $Tauri -and -not $CargoTauri) {
+  throw "Tauri CLI not found. Install it with: npm install -g @tauri-apps/cli (run from a normal Windows path, not \\wsl$)"
+}
+
+$env:CARGO_INCREMENTAL = "0"
+$env:CARGO_TARGET_DIR = Join-Path $env:LOCALAPPDATA "Hermes\cargo-target\gui"
+New-Item -ItemType Directory -Force -Path $env:CARGO_TARGET_DIR | Out-Null
+
+if ($Fresh) {
+  $FreshHome = Join-Path $env:LOCALAPPDATA "Hermes\fresh-install-home"
+  if ($ResetFresh -and (Test-Path $FreshHome)) {
+    Remove-Item -Recurse -Force $FreshHome
+  }
+  New-Item -ItemType Directory -Force -Path $FreshHome | Out-Null
+  $env:HERMES_HOME = $FreshHome
+  $env:HERMES_GUI_PORT = "9140"
+  $env:HERMES_GUI_FRESH = "1"
+  Write-Host "Fresh GUI mode"
+  Write-Host "  HERMES_HOME=$FreshHome"
+  Write-Host "  HERMES_GUI_PORT=$env:HERMES_GUI_PORT"
+}
+
+Push-Location $AppRoot
+try {
+  if ($Tauri) {
+    & tauri $Command
+  }
+  else {
+    & cargo tauri $Command
+  }
+}
+finally {
+  Pop-Location
+}
@@ -0,0 +1,13 @@
+{
+  "name": "@hermes/gui",
+  "version": "0.0.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "node scripts/dev-shell.mjs",
+    "dev:tauri": "node scripts/tauri.mjs dev",
+    "build": "node scripts/tauri.mjs build",
+    "dashboard": "node scripts/start-dashboard.mjs",
+    "tauri": "node scripts/tauri.mjs"
+  }
+}
@@ -0,0 +1,156 @@
+import { spawn, spawnSync } from "node:child_process";
+import { createServer } from "node:net";
+import { dirname, resolve } from "node:path";
+import { setTimeout as delay } from "node:timers/promises";
+import { fileURLToPath } from "node:url";
+
+const here = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(here, "../../..");
+const python = process.env.HERMES_PYTHON || "python";
+let port = process.env.HERMES_GUI_PORT || "9120";
+let url = `http://127.0.0.1:${port}`;
+
+let dashboard = null;
+
+function stop() {
+  if (dashboard && !dashboard.killed) dashboard.kill();
+}
+
+process.on("SIGINT", () => {
+  stop();
+  process.exit(130);
+});
+process.on("SIGTERM", () => {
+  stop();
+  process.exit(143);
+});
+process.on("exit", stop);
+
+async function waitForHealth() {
+  for (let i = 0; i < 120; i += 1) {
+    if (await isHealthy()) return true;
+    await delay(500);
+  }
+  return false;
+}
+
+async function isHealthy() {
+  try {
+    const res = await fetch(`${url}/api/health`, {
+      signal: AbortSignal.timeout(1000),
+    });
+    const data = await res.json();
+    return res.ok && data.status === "ok";
+  } catch {
+    return false;
+  }
+}
+
+function canBind(candidate) {
+  return new Promise((resolveBind) => {
+    const server = createServer();
+    server.once("error", () => resolveBind(false));
+    server.listen(Number(candidate), "127.0.0.1", () => {
+      server.close(() => resolveBind(true));
+    });
+  });
+}
+
+async function choosePort() {
+  if (process.env.HERMES_GUI_PORT) return;
+
+  let candidate = Number(port);
+  for (let i = 0; i < 20; i += 1) {
+    if (await canBind(candidate)) {
+      port = String(candidate);
+      url = `http://127.0.0.1:${port}`;
+      return;
+    }
+    candidate += 1;
+  }
+}
+
+function startDashboard() {
+  dashboard = spawn(
+    python,
+    [
+      "-m",
+      "hermes_cli.main",
+      "dashboard",
+      "--gui",
+      "--no-open",
+      "--host",
+      "127.0.0.1",
+      "--port",
+      port,
+    ],
+    {
+      cwd: repoRoot,
+      env: {
+        ...process.env,
+        HERMES_GUI: "1",
+      },
+      stdio: "inherit",
+    },
+  );
+
+  dashboard.on("exit", (code) => {
+    process.exit(code ?? 0);
+  });
+}
+
+function run(command, args) {
+  return (
+    spawnSync(command, args, {
+      shell: process.platform === "win32",
+      stdio: "ignore",
+    }).status === 0
+  );
+}
+
+function openGuiWindow() {
+  if (process.platform === "win32") {
+    return (
+      run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", url])
+    );
+  }
+
+  if (process.env.WSL_DISTRO_NAME) {
+    return (
+      run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
+      run("cmd.exe", ["/C", "start", "", url])
+    );
+  }
+
+  if (process.platform === "darwin") {
+    return (
+      run("open", ["-na", "Google Chrome", "--args", `--app=${url}`]) ||
+      run("open", [url])
+    );
+  }
+
+  return (
+    run("google-chrome", [`--app=${url}`]) ||
+    run("chromium", [`--app=${url}`]) ||
+    run("xdg-open", [url])
+  );
+}
+
+if (await isHealthy()) {
+  console.log(`Hermes GUI already running -> ${url}`);
+  openGuiWindow();
+  process.exit(0);
+}
+
+await choosePort();
+startDashboard();
+
+if (await waitForHealth()) {
+  console.log(`Hermes GUI -> ${url}`);
+  openGuiWindow();
+} else {
+  console.error(`Hermes GUI did not become healthy at ${url}`);
+}
@@ -0,0 +1,95 @@
+import { spawn } from "node:child_process";
+import { dirname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const here = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(here, "../../..");
+const python = process.env.HERMES_PYTHON || "python";
+const port = process.env.HERMES_GUI_PORT || "9120";
+const url = `http://127.0.0.1:${port}`;
+
+async function isHealthy() {
+  try {
+    const res = await fetch(`${url}/api/health`, {
+      signal: AbortSignal.timeout(1000),
+    });
+    const data = await res.json();
+    return res.ok && data.status === "ok";
+  } catch {
+    return false;
+  }
+}
+
+function wslRepoRoot() {
+  const normalized = repoRoot.replaceAll("\\", "/");
+  const parts = normalized.split("/");
+  const host = parts[2]?.toLowerCase();
+  if (process.platform !== "win32") return null;
+  if (host !== "wsl$" && host !== "wsl.localhost") return null;
+  const distro = parts[3];
+  const path = `/${parts.slice(4).join("/")}`;
+  return distro && path !== "/" ? { distro, path } : null;
+}
+
+function spawnDashboard() {
+  const wsl = wslRepoRoot();
+  if (wsl) {
+    return spawn(
+      "wsl.exe",
+      [
+        "-d",
+        wsl.distro,
+        "--cd",
+        wsl.path,
+        "env",
+        "HERMES_GUI=1",
+        process.env.HERMES_WSL_PYTHON || "python",
+        "-m",
+        "hermes_cli.main",
+        "dashboard",
+        "--gui",
+        "--no-open",
+        "--host",
+        "127.0.0.1",
+        "--port",
+        port,
+      ],
+      { stdio: "inherit" },
+    );
+  }
+
+  return spawn(
+    python,
+    [
+      "-m",
+      "hermes_cli.main",
+      "dashboard",
+      "--gui",
+      "--no-open",
+      "--host",
+      "127.0.0.1",
+      "--port",
+      port,
+    ],
+    {
+      cwd: repoRoot,
+      env: {
+        ...process.env,
+        HERMES_GUI: "1",
+      },
+      stdio: "inherit",
+    },
+  );
+}
+
+if (await isHealthy()) {
+  console.log(`Hermes GUI already running -> ${url}`);
+  process.exit(0);
+}
+
+const child = spawnDashboard();
+
+child.on("exit", (code, signal) => {
+  if (signal) process.kill(process.pid, signal);
+  process.exit(code ?? 0);
+});
@@ -0,0 +1,90 @@
+import { spawnSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const here = dirname(fileURLToPath(import.meta.url));
+const appRoot = resolve(here, "..");
+const bin = process.platform === "win32" ? "tauri.cmd" : "tauri";
+const localTauri = resolve(appRoot, "node_modules", ".bin", bin);
+const args = process.argv.slice(2);
+
+function isWsl() {
+  return process.platform === "linux" && !!process.env.WSL_DISTRO_NAME;
+}
+
+function quotePs(value) {
+  return `'${value.replaceAll("'", "''")}'`;
+}
+
+function dispatchToWindows() {
+  const pathResult = spawnSync("wslpath", ["-w", appRoot], {
+    encoding: "utf8",
+  });
+  const windowsPath = pathResult.stdout.trim();
+  if (!windowsPath) return false;
+
+  const command = [
+    "$ErrorActionPreference = 'Stop'",
+    "Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
+    "if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {",
+    '  Write-Error "Windows npm was not found. Install Windows Node.js first: winget install OpenJS.NodeJS.LTS"',
+    "}",
+    "if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {",
+    '  Write-Error "Windows Rust was not found. Install Rust first: winget install Rustlang.Rustup"',
+    "}",
+    `Set-Location -LiteralPath ${quotePs(windowsPath)}`,
+    "& npm run dev:tauri",
+  ].join("; ");
+  const result = spawnSync(
+    "powershell.exe",
+    ["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command],
+    { stdio: "inherit" },
+  );
+  process.exit(result.status ?? 1);
+}
+
+function run(command, commandArgs, { exit = true } = {}) {
+  if (process.platform === "win32") {
+    const psCommand = [
+      "$ErrorActionPreference = 'Stop'",
+      "Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
+      `Set-Location -LiteralPath ${quotePs(appRoot)}`,
+      `& ${quotePs(command)} ${commandArgs.map(quotePs).join(" ")}`,
+    ].join("; ");
+    const result = spawnSync(
+      "powershell.exe",
+      ["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", psCommand],
+      { stdio: "inherit" },
+    );
+    if (result.error && result.error.code === "ENOENT") return false;
+    if (exit) process.exit(result.status ?? 1);
+    return result.status === 0;
+  }
+
+  const result = spawnSync(command, commandArgs, {
+    cwd: appRoot,
+    env: process.env,
+    stdio: "inherit",
+  });
+
+  if (result.error && result.error.code === "ENOENT") return false;
+  if (exit) process.exit(result.status ?? 1);
+  return result.status === 0;
+}
+
+if (isWsl() && process.env.HERMES_GUI_TAURI_WSL !== "1") {
+  console.log("Launching native Windows Tauri from WSL...");
+  dispatchToWindows();
+  console.error(
+    "Could not hand off to Windows PowerShell. Run this from Windows PowerShell instead:",
+  );
+  console.error("  cd \\\\wsl$\\Ubuntu\\home\\bb\\hermes-agent\\apps\\gui");
+  console.error("  npm run dev:tauri");
+  process.exit(1);
+}
+
+if (existsSync(localTauri)) run(localTauri, args);
+if (run("tauri", args, { exit: false })) process.exit(0);
+if (run("cargo", ["tauri", ...args], { exit: false })) process.exit(0);
+run("npx", ["--yes", "@tauri-apps/cli@latest", ...args]);
@@ -0,0 +1 @@
+/target/
@@ -0,0 +1,17 @@
+[package]
+name = "hermes-gui"
+version = "0.0.0"
+description = "Hermes GUI shell"
+edition = "2021"
+
+[lib]
+name = "hermes_gui_lib"
+crate-type = ["staticlib", "cdylib", "rlib"]
+
+[build-dependencies]
+tauri-build = { version = "2", features = [] }
+
+[dependencies]
+tauri = { version = "2", features = ["tray-icon"] }
+tauri-plugin-notification = "2"
+tauri-plugin-opener = "2"
@@ -0,0 +1,3 @@
+fn main() {
+    tauri_build::build();
+}
@@ -0,0 +1,7 @@
+{
+  "$schema": "../gen/schemas/desktop-schema.json",
+  "identifier": "default",
+  "description": "Default Hermes GUI permissions",
+  "windows": ["main"],
+  "permissions": ["core:default", "notification:default", "opener:default"]
+}
@@ -0,0 +1 @@
+{"default":{"identifier":"default","description":"Default Hermes GUI permissions","local":true,"windows":["main"],"permissions":["core:default","notification:default","opener:default"]}}
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+  <rect width="100" height="100" rx="18" fill="#071313"/>
+  <text x="50" y="70" text-anchor="middle" font-size="68" fill="#f0e6d2">⚕</text>
+</svg>
@@ -0,0 +1 @@
+
@@ -0,0 +1,433 @@
+use std::{
+    io::{Read, Write},
+    net::{TcpListener, TcpStream},
+    process::{Child, Command, Stdio},
+    sync::Mutex,
+    time::{Duration, Instant},
+};
+
+use tauri::{
+    image::Image,
+    menu::{Menu, MenuItem, PredefinedMenuItem},
+    tray::{MouseButton, MouseButtonState, TrayIconBuilder, TrayIconEvent},
+    App, AppHandle, Manager, WebviewWindow,
+};
+
+const GUI_HOST: &str = "127.0.0.1";
+const DEFAULT_GUI_PORT: u16 = 9120;
+const MIN_SPLASH_MS: u64 = 0;
+const SPLASH_URL: &str = "data:text/html,%3C!doctype%20html%3E%3Cmeta%20charset%3Dutf-8%3E%3Cstyle%3Ebody%7Bmargin%3A0%3Bheight%3A100vh%3Bdisplay%3Agrid%3Bplace-items%3Acenter%3Bbackground%3A%23071313%3Bcolor%3A%23f0e6d2%3Bfont%3A14px%20monospace%3Bletter-spacing%3A.08em%3Btext-transform%3Auppercase%7D%3C%2Fstyle%3E%3Cbody%3EStarting%20Hermes%E2%80%A6%3C%2Fbody%3E";
+
+struct GuiState {
+    child: Mutex<Option<Child>>,
+    port: Mutex<u16>,
+}
+
+fn gui_url(port: u16) -> String {
+    format!("http://{GUI_HOST}:{port}")
+}
+
+fn check_health(port: u16) -> bool {
+    let Ok(mut stream) = TcpStream::connect_timeout(
+        &format!("{GUI_HOST}:{port}").parse().unwrap(),
+        Duration::from_secs(1),
+    ) else {
+        return false;
+    };
+
+    let _ = stream.set_read_timeout(Some(Duration::from_secs(1)));
+    let request =
+        format!("GET /api/health HTTP/1.1\r\nHost: {GUI_HOST}:{port}\r\nConnection: close\r\n\r\n");
+
+    if stream.write_all(request.as_bytes()).is_err() {
+        return false;
+    }
+
+    let mut response = String::new();
+    let _ = stream.read_to_string(&mut response);
+    response.contains("200 OK")
+        && response.contains("\"status\":\"ok\"")
+        && response.contains("\"mode\":\"gui\"")
+}
+
+fn can_bind(port: u16) -> bool {
+    TcpListener::bind((GUI_HOST, port)).is_ok()
+}
+
+fn base_port() -> u16 {
+    std::env::var("HERMES_GUI_PORT")
+        .ok()
+        .and_then(|raw| raw.parse().ok())
+        .unwrap_or(DEFAULT_GUI_PORT)
+}
+
+fn select_port() -> u16 {
+    let start = base_port();
+    for port in start..start.saturating_add(20) {
+        if check_health(port) || can_bind(port) {
+            return port;
+        }
+    }
+    start
+}
+
+fn repo_root() -> std::path::PathBuf {
+    std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("../../..")
+        .canonicalize()
+        .unwrap_or_else(|_| std::path::PathBuf::from("."))
+}
+
+fn runtime_dir() -> Option<std::path::PathBuf> {
+    std::env::var_os("HERMES_GUI_RUNTIME_DIR").map(std::path::PathBuf::from)
+}
+
+fn runtime_python(runtime: &std::path::Path) -> std::path::PathBuf {
+    if cfg!(target_os = "windows") {
+        runtime.join("venv").join("Scripts").join("python.exe")
+    } else {
+        runtime.join("venv").join("bin").join("python")
+    }
+}
+
+fn wsl_path(root: &std::path::Path) -> Option<(String, String)> {
+    let raw = root.to_string_lossy().replace('\\', "/");
+    let parts: Vec<&str> = raw.split('/').collect();
+    let host = parts.get(2)?.to_ascii_lowercase();
+    if host != "wsl$" && host != "wsl.localhost" {
+        return None;
+    }
+    let distro = parts.get(3)?.to_string();
+    let path = format!("/{}", parts.get(4..)?.join("/"));
+    Some((distro, path))
+}
+
+fn start_dashboard(port: u16) -> std::io::Result<Child> {
+    if let Some(runtime) = runtime_dir() {
+        let python = runtime_python(&runtime);
+        let web_dist = runtime.join("web_dist");
+        let tui_dir = runtime.join("ui-tui");
+        let port = port.to_string();
+        return Command::new(python)
+            .args([
+                "-m",
+                "hermes_cli.main",
+                "dashboard",
+                "--gui",
+                "--no-open",
+                "--host",
+                GUI_HOST,
+                "--port",
+                &port,
+            ])
+            .env("HERMES_GUI", "1")
+            .env("HERMES_GUI_PORT", &port)
+            .env("HERMES_WEB_DIST", web_dist)
+            .env("HERMES_TUI_DIR", tui_dir)
+            .envs(
+                std::env::vars()
+                    .filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
+            )
+            .stdin(Stdio::null())
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn();
+    }
+
+    let root = repo_root();
+    let port = port.to_string();
+
+    if let Some((distro, path)) = wsl_path(&root) {
+        let port_env = format!("HERMES_GUI_PORT={port}");
+        let mut env_args = vec!["HERMES_GUI=1".to_string(), port_env];
+        if let Ok(home) = std::env::var("HERMES_HOME") {
+            env_args.push(format!("HERMES_HOME={home}"));
+        }
+        if let Ok(fresh) = std::env::var("HERMES_GUI_FRESH") {
+            env_args.push(format!("HERMES_GUI_FRESH={fresh}"));
+        }
+        let mut args = vec![
+            "-d".to_string(),
+            distro,
+            "--cd".to_string(),
+            path,
+            "env".to_string(),
+        ];
+        args.extend(env_args);
+        args.extend([
+            "python".to_string(),
+            "-m".to_string(),
+            "hermes_cli.main".to_string(),
+            "dashboard".to_string(),
+            "--gui".to_string(),
+            "--no-open".to_string(),
+            "--host".to_string(),
+            GUI_HOST.to_string(),
+            "--port".to_string(),
+            port.clone(),
+        ]);
+        return Command::new("wsl.exe")
+            .args(args)
+            .stdin(Stdio::null())
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .spawn();
+    }
+
+    Command::new("python")
+        .args([
+            "-m",
+            "hermes_cli.main",
+            "dashboard",
+            "--gui",
+            "--no-open",
+            "--host",
+            GUI_HOST,
+            "--port",
+            &port,
+        ])
+        .current_dir(root)
+        .env("HERMES_GUI", "1")
+        .env("HERMES_GUI_PORT", &port)
+        .envs(
+            std::env::vars()
+                .filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
+        )
+        .stdin(Stdio::null())
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .spawn()
+}
+
+fn stop_owned_dashboard(state: &GuiState) {
+    let Some(mut child) = state.child.lock().expect("gui child lock poisoned").take() else {
+        return;
+    };
+    let _ = child.kill();
+    let _ = child.wait();
+}
+
+fn current_port(state: &GuiState) -> u16 {
+    *state.port.lock().expect("gui port lock poisoned")
+}
+
+fn ensure_dashboard(state: &GuiState) -> Result<(), String> {
+    let current = current_port(state);
+    if check_health(current) {
+        return Ok(());
+    }
+
+    let port = select_port();
+    *state.port.lock().expect("gui port lock poisoned") = port;
+
+    if check_health(port) {
+        return Ok(());
+    }
+
+    let child = start_dashboard(port).map_err(|err| {
+        format!(
+            "Could not auto-start Hermes dashboard ({err}). Start it manually with: hermes dashboard --gui --no-open --port {port}"
+        )
+    })?;
+    *state.child.lock().expect("gui child lock poisoned") = Some(child);
+    Ok(())
+}
+
+fn navigate_when_ready(window: WebviewWindow, port: u16) {
+    std::thread::spawn(move || {
+        let started = Instant::now();
+        while started.elapsed() < Duration::from_secs(60) {
+            if check_health(port) {
+                let min_splash = std::env::var("HERMES_GUI_MIN_SPLASH_MS")
+                    .ok()
+                    .and_then(|raw| raw.parse::<u64>().ok())
+                    .unwrap_or(MIN_SPLASH_MS);
+                let elapsed = started.elapsed();
+                if elapsed < Duration::from_millis(min_splash) {
+                    std::thread::sleep(Duration::from_millis(min_splash) - elapsed);
+                }
+                if let Ok(url) = tauri::Url::parse(&gui_url(port)) {
+                    let _ = window.navigate(url);
+                    let _ = window.show();
+                    let _ = window.set_focus();
+                }
+                return;
+            }
+            std::thread::sleep(Duration::from_millis(500));
+        }
+    });
+}
+
+fn show_main_window(app: &AppHandle) {
+    if let Some(window) = app.get_webview_window("main") {
+        let _ = window.show();
+        let _ = window.set_focus();
+    }
+}
+
+fn open_browser(port: u16) {
+    let url = gui_url(port);
+
+    #[cfg(target_os = "windows")]
+    let _ = Command::new("cmd")
+        .args(["/C", "start", "", &url])
+        .stdin(Stdio::null())
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .spawn();
+
+    #[cfg(target_os = "macos")]
+    let _ = Command::new("open").arg(&url).spawn();
+
+    #[cfg(all(unix, not(target_os = "macos")))]
+    let _ = Command::new("xdg-open").arg(&url).spawn();
+}
+
+fn tray_icon() -> Image<'static> {
+    let width = 32;
+    let height = 32;
+    let mut rgba = Vec::with_capacity(width * height * 4);
+
+    for y in 0..height {
+        for x in 0..width {
+            let mark = (14..=17).contains(&x) && (5..=26).contains(&y)
+                || (8..=23).contains(&x) && (13..=16).contains(&y)
+                || (10..=21).contains(&x) && (y == 5 || y == 26);
+            if mark {
+                rgba.extend_from_slice(&[0xF0, 0xE6, 0xD2, 0xFF]);
+            } else {
+                rgba.extend_from_slice(&[0x07, 0x13, 0x13, 0xFF]);
+            }
+        }
+    }
+
+    Image::new_owned(rgba, width as u32, height as u32)
+}
+
+fn restart_runtime(app: &AppHandle) -> Result<(), String> {
+    let state = app.state::<GuiState>();
+    stop_owned_dashboard(&state);
+    ensure_dashboard(&state)?;
+
+    if let Some(window) = app.get_webview_window("main") {
+        if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
+            let _ = window.navigate(url);
+        }
+        let port = current_port(&state);
+        navigate_when_ready(window, port);
+    }
+
+    Ok(())
+}
+
+fn setup_tray(app: &App) -> tauri::Result<()> {
+    let open_item = MenuItem::with_id(app, "open", "Open Hermes", true, None::<&str>)?;
+    let browser_item = MenuItem::with_id(app, "browser", "Open in Browser", true, None::<&str>)?;
+    let restart_item =
+        MenuItem::with_id(app, "restart", "Restart Hermes Runtime", true, None::<&str>)?;
+    let status_item = MenuItem::with_id(app, "status", "Local runtime", false, None::<&str>)?;
+    let separator = PredefinedMenuItem::separator(app)?;
+    let separator2 = PredefinedMenuItem::separator(app)?;
+    let quit_item = MenuItem::with_id(app, "quit", "Quit Hermes", true, None::<&str>)?;
+
+    let menu = Menu::with_items(
+        app,
+        &[
+            &open_item,
+            &browser_item,
+            &restart_item,
+            &separator,
+            &status_item,
+            &separator2,
+            &quit_item,
+        ],
+    )?;
+
+    let icon = tray_icon();
+    let _tray = TrayIconBuilder::new()
+        .icon(icon)
+        .menu(&menu)
+        .tooltip("Hermes")
+        .on_menu_event(|app, event| match event.id.as_ref() {
+            "open" => show_main_window(app),
+            "browser" => {
+                let state = app.state::<GuiState>();
+                open_browser(current_port(&state));
+            }
+            "restart" => {
+                if let Err(err) = restart_runtime(app) {
+                    eprintln!("Failed to restart Hermes runtime: {err}");
+                }
+            }
+            "quit" => {
+                let state = app.state::<GuiState>();
+                stop_owned_dashboard(&state);
+                app.exit(0);
+            }
+            _ => {}
+        })
+        .on_tray_icon_event(|tray, event| {
+            if let TrayIconEvent::Click {
+                button: MouseButton::Left,
+                button_state: MouseButtonState::Up,
+                ..
+            } = event
+            {
+                show_main_window(&tray.app_handle());
+            }
+        })
+        .build(app)?;
+
+    Ok(())
+}
+
+#[tauri::command]
+fn runtime_running(app: AppHandle) -> bool {
+    let state = app.state::<GuiState>();
+    check_health(current_port(&state))
+}
+
+#[tauri::command]
+fn restart_runtime_command(app: AppHandle) -> Result<(), String> {
+    restart_runtime(&app)
+}
+
+pub fn run() {
+    tauri::Builder::default()
+        .plugin(tauri_plugin_notification::init())
+        .plugin(tauri_plugin_opener::init())
+        .manage(GuiState {
+            child: Mutex::new(None),
+            port: Mutex::new(base_port()),
+        })
+        .invoke_handler(tauri::generate_handler![
+            runtime_running,
+            restart_runtime_command
+        ])
+        .setup(|app| {
+            setup_tray(app)?;
+
+            if let Some(window) = app.get_webview_window("main") {
+                if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
+                    let _ = window.navigate(url);
+                }
+
+                let state = app.state::<GuiState>();
+                if let Err(err) = ensure_dashboard(&state) {
+                    eprintln!("{err}");
+                }
+
+                let port = current_port(&state);
+                navigate_when_ready(window, port);
+            }
+            Ok(())
+        })
+        .on_window_event(|window, event| {
+            if let tauri::WindowEvent::CloseRequested { api, .. } = event {
+                api.prevent_close();
+                let _ = window.hide();
+            }
+        })
+        .run(tauri::generate_context!())
+        .expect("failed to run Hermes GUI");
+}
@@ -0,0 +1,5 @@
+#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
+
+fn main() {
+    hermes_gui_lib::run();
+}
@@ -0,0 +1,38 @@
+{
+  "$schema": "https://schema.tauri.app/config/2",
+  "productName": "Hermes",
+  "version": "0.0.0",
+  "identifier": "ai.nous.hermes.gui",
+  "build": {
+    "beforeDevCommand": "",
+    "beforeBuildCommand": "",
+    "devUrl": "http://127.0.0.1:9120",
+    "frontendDist": "../dist"
+  },
+  "app": {
+    "withGlobalTauri": true,
+    "windows": [
+      {
+        "label": "main",
+        "title": "Hermes",
+        "width": 1400,
+        "height": 900,
+        "minWidth": 900,
+        "minHeight": 600,
+        "resizable": true,
+        "center": true
+      }
+    ],
+    "security": {
+      "csp": "default-src 'self' http://127.0.0.1:* http://localhost:*; connect-src 'self' http://127.0.0.1:* http://localhost:* ws://127.0.0.1:* ws://localhost:*; img-src 'self' data: blob: http://127.0.0.1:* http://localhost:*; style-src 'self' 'unsafe-inline' http://127.0.0.1:* http://localhost:*; script-src 'self' 'unsafe-inline' 'unsafe-eval' http://127.0.0.1:* http://localhost:*"
+    }
+  },
+  "bundle": {
+    "active": true,
+    "icon": ["icons/32x32.png", "icons/icon.ico", "icons/icon.svg"],
+    "targets": ["nsis", "dmg", "app"],
+    "resources": {
+      "sidecars": "sidecars/"
+    }
+  }
+}
@@ -0,0 +1,5 @@
+// Browser-side GUI bridge entry.
+//
+// The dashboard remains in `web/`; this file is reserved for future shell-only
+// glue if we need pre-navigation scripts or native event wiring.
+export {};
@@ -0,0 +1,44 @@
+param(
+  [string]$Out = "$PSScriptRoot\..\gui\src-tauri\sidecars\hermes-runtime",
+  [string]$Python = "python"
+)
+
+$Root = Resolve-Path "$PSScriptRoot\..\.."
+
+Write-Host "Bundling Hermes GUI runtime"
+Write-Host "repo: $Root"
+Write-Host "out:  $Out"
+
+if (Test-Path $Out) {
+  Remove-Item -Recurse -Force $Out
+}
+New-Item -ItemType Directory -Force -Path $Out | Out-Null
+
+Write-Host "-> Building dashboard"
+npm --prefix "$Root\web" ci
+npm --prefix "$Root\web" run build
+Copy-Item -Recurse "$Root\web\dist" "$Out\web_dist"
+
+Write-Host "-> Building TUI"
+npm --prefix "$Root\ui-tui" ci
+npm --prefix "$Root\ui-tui" run build
+New-Item -ItemType Directory -Force -Path "$Out\ui-tui" | Out-Null
+Copy-Item -Recurse "$Root\ui-tui\dist" "$Out\ui-tui\dist"
+Copy-Item "$Root\ui-tui\package.json" "$Out\ui-tui\package.json"
+Copy-Item "$Root\ui-tui\package-lock.json" "$Out\ui-tui\package-lock.json"
+Copy-Item -Recurse "$Root\ui-tui\node_modules" "$Out\ui-tui\node_modules"
+
+Write-Host "-> Creating Python runtime"
+& $Python -m venv "$Out\venv"
+& "$Out\venv\Scripts\python.exe" -m pip install --upgrade pip
+& "$Out\venv\Scripts\python.exe" -m pip install -e "$Root[web,pty]"
+
+@"
+# Hermes GUI Runtime
+
+Generated by apps/shared/bundle-runtime.ps1.
+
+Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
+"@ | Set-Content "$Out\README.md"
+
+Write-Host "Runtime bundle ready: $Out"
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+OUT="${1:-"$ROOT/apps/gui/src-tauri/sidecars/hermes-runtime"}"
+PYTHON="${PYTHON:-python}"
+
+echo "Bundling Hermes GUI runtime"
+echo "repo: $ROOT"
+echo "out:  $OUT"
+
+rm -rf "$OUT"
+mkdir -p "$OUT"
+
+echo "→ Building dashboard"
+npm --prefix "$ROOT/web" ci
+npm --prefix "$ROOT/web" run build
+cp -a "$ROOT/web/dist" "$OUT/web_dist"
+
+echo "→ Building TUI"
+npm --prefix "$ROOT/ui-tui" ci
+npm --prefix "$ROOT/ui-tui" run build
+mkdir -p "$OUT/ui-tui"
+cp -a "$ROOT/ui-tui/dist" "$OUT/ui-tui/dist"
+cp -a "$ROOT/ui-tui/package.json" "$ROOT/ui-tui/package-lock.json" "$OUT/ui-tui/"
+cp -a "$ROOT/ui-tui/node_modules" "$OUT/ui-tui/node_modules"
+
+echo "→ Creating Python runtime"
+"$PYTHON" -m venv "$OUT/venv"
+"$OUT/venv/bin/python" -m pip install --upgrade pip
+"$OUT/venv/bin/python" -m pip install -e "$ROOT[web,pty]"
+
+cat > "$OUT/README.md" <<EOF
+# Hermes GUI Runtime
+
+Generated by apps/shared/bundle-runtime.sh.
+
+Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
+EOF
+
+echo "✓ Runtime bundle ready: $OUT"
@@ -0,0 +1,33 @@
+# GUI Runtime Contract
+
+The GUI shell starts Hermes with a small, explicit environment.
+
+## Environment
+
+```text
+HERMES_GUI=1
+HERMES_WEB_DIST=<bundled web dist>
+HERMES_TUI_DIR=<bundled ui-tui dir>
+```
+
+The native shell uses `127.0.0.1:9120` as its initial GUI port during dev.
+Bundled builds should keep the port private to the local machine and expose it
+through `/api/health` and `/api/runtime`.
+
+The shell should also pass the selected profile through the normal Hermes CLI
+profile mechanism once the profile picker is wired.
+
+## Ports
+
+Use `127.0.0.1` only. Start with the GUI default port, then fall back to a
+free port if occupied. Show the chosen port in the tray menu.
+
+## User Data
+
+The installer owns app files. Hermes owns user state under `HERMES_HOME`.
+Uninstallers must not delete user state unless the user explicitly asks.
+
+## Update Model
+
+MVP does not use Tauri's native updater. GUI runs `hermes update`, tails the
+action log, notifies completion, then offers to restart the runtime.
@@ -951,13 +951,9 @@ class BatchRunner:
                    root_logger.setLevel(original_level)
        
        # Aggregate all batch statistics and update checkpoint
-        all_completed_prompts = list(completed_prompts_set)
        total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
-        
+
        for batch_result in results:
-            # Add newly completed prompts
-            all_completed_prompts.extend(batch_result.get("completed_prompts", []))
-            
            # Aggregate tool stats
            for tool_name, stats in batch_result.get("tool_stats", {}).items():
                if tool_name not in total_tool_stats:
@@ -977,7 +973,7 @@ class BatchRunner:
        
        # Save final checkpoint (best-effort; incremental writes already happened)
        try:
-            checkpoint_data["completed_prompts"] = all_completed_prompts
+            checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
            self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
        except Exception as ckpt_err:
            print(f"âš ï¸  Warning: Failed to save final checkpoint: {ckpt_err}")
@@ -790,9 +790,16 @@ code_execution:
 # Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
-  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # max_concurrent_children: 3                # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
+                                              # WARNING: values above 10 multiply API cost linearly.
+  # max_spawn_depth: 1                        # Delegation tree depth cap (range: 1-3, default: 1 = flat).
+                                              # Raise to 2 to allow workers to spawn their own subagents.
+                                              # Requires role="orchestrator" on intermediate agents.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # subagent_auto_approve: false              # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
+                                              # or auto-approve "once" (true) instead of blocking on stdin.
+                                              # The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
+                                              # Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
@@ -3176,7 +3176,14 @@ class HermesCLI:
        # the configured model (e.g. "qwen3.6-plus"), causing 400 errors.
        runtime_model = runtime.get("model")
        if runtime_model and isinstance(runtime_model, str):
-            self.model = runtime_model
+            # Only use runtime model if: model is unset, or model equals provider name
+            should_use_runtime_model = (
+                not self.model or  # No model configured yet
+                self.model == self.provider or  # Model is the provider slug
+                self.model == runtime.get("name")  # Model matches provider display name
+            )
+            if should_use_runtime_model:
+                self.model = runtime_model

        # If model is still empty (e.g. user ran `hermes auth add openai-codex`
        # without `hermes model`), fall back to the provider's first catalog
@@ -4661,10 +4668,6 @@ class HermesCLI:
    def new_session(self, silent=False):
        """Start a fresh session with a new session ID and cleared agent state."""
        if self.agent and self.conversation_history:
-            try:
-                self.agent.flush_memories(self.conversation_history)
-            except (Exception, KeyboardInterrupt):
-                pass
            # Trigger memory extraction on the old session before session_id rotates.
            self.agent.commit_memory_session(self.conversation_history)
            self._notify_session_boundary("on_session_finalize")
@@ -5374,29 +5377,26 @@ class HermesCLI:
        _cprint(f"  ✓ Model switched: {result.new_model}")
        _cprint(f"    Provider: {provider_label}")

-        # Rich metadata from models.dev
+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry
+        # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
        mi = result.model_info
+        from hermes_cli.model_switch import resolve_display_context_length
+        ctx = resolve_display_context_length(
+            result.new_model,
+            result.target_provider,
+            base_url=result.base_url or self.base_url or "",
+            api_key=result.api_key or self.api_key or "",
+            model_info=mi,
+        )
+        if ctx:
+            _cprint(f"    Context: {ctx:,} tokens")
        if mi:
-            if mi.context_window:
-                _cprint(f"    Context: {mi.context_window:,} tokens")
            if mi.max_output:
                _cprint(f"    Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                _cprint(f"    Cost: {mi.format_cost()}")
            _cprint(f"    Capabilities: {mi.format_capabilities()}")
-        else:
-            # Fallback to old context length lookup
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or self.base_url,
-                    api_key=result.api_key or self.api_key,
-                    provider=result.target_provider,
-                )
-                _cprint(f"    Context: {ctx:,} tokens")
-            except Exception:
-                pass

        # Cache notice
        cache_enabled = (
@@ -6165,6 +6165,8 @@ class HermesCLI:
            self._handle_skin_command(cmd_original)
        elif canonical == "voice":
            self._handle_voice_command(cmd_original)
+        elif canonical == "busy":
+            self._handle_busy_command(cmd_original)
        else:
            # Check for user-defined quick commands (bypass agent loop, no LLM call)
            base_cmd = cmd_lower.split()[0]
@@ -6901,6 +6903,36 @@ class HermesCLI:
        else:
            _cprint(f"  {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")

+    def _handle_busy_command(self, cmd: str):
+        """Handle /busy — control what Enter does while Hermes is working.
+
+        Usage:
+            /busy               Show current busy input mode
+            /busy status        Show current busy input mode
+            /busy queue         Queue input for the next turn instead of interrupting
+            /busy interrupt     Interrupt the current run on Enter (default)
+        """
+        parts = cmd.strip().split(maxsplit=1)
+        if len(parts) < 2 or parts[1].strip().lower() == "status":
+            _cprint(f"  {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}")
+            _cprint(f"  {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}")
+            _cprint(f"  {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
+            return
+
+        arg = parts[1].strip().lower()
+        if arg not in {"queue", "interrupt"}:
+            _cprint(f"  {_DIM}(._.) Unknown argument: {arg}{_RST}")
+            _cprint(f"  {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
+            return
+
+        self.busy_input_mode = arg
+        if save_config_value("display.busy_input_mode", arg):
+            behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy."
+            _cprint(f"  {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}")
+            _cprint(f"  {_DIM}{behavior}{_RST}")
+        else:
+            _cprint(f"  {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}")
+
    def _handle_fast_command(self, cmd: str):
        """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
        if not self._fast_command_available():
@@ -6979,51 +7011,52 @@ class HermesCLI:
                focus_topic = parts[1].strip()

        original_count = len(self.conversation_history)
-        try:
-            from agent.model_metadata import estimate_messages_tokens_rough
-            from agent.manual_compression_feedback import summarize_manual_compression
-            original_history = list(self.conversation_history)
-            approx_tokens = estimate_messages_tokens_rough(original_history)
-            if focus_topic:
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
-                      f"focus: \"{focus_topic}\"...")
-            else:
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+        with self._busy_command("Compressing context..."):
+            try:
+                from agent.model_metadata import estimate_messages_tokens_rough
+                from agent.manual_compression_feedback import summarize_manual_compression
+                original_history = list(self.conversation_history)
+                approx_tokens = estimate_messages_tokens_rough(original_history)
+                if focus_topic:
+                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
+                          f"focus: \"{focus_topic}\"...")
+                else:
+                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

-            compressed, _ = self.agent._compress_context(
-                original_history,
-                self.agent._cached_system_prompt or "",
-                approx_tokens=approx_tokens,
-                focus_topic=focus_topic or None,
-            )
-            self.conversation_history = compressed
-            # _compress_context ends the old session and creates a new child
-            # session on the agent (run_agent.py::_compress_context). Sync the
-            # CLI's session_id so /status, /resume, exit summary, and title
-            # generation all point at the live continuation session, not the
-            # ended parent. Without this, subsequent end_session() calls target
-            # the already-closed parent and the child is orphaned.
-            if (
-                getattr(self.agent, "session_id", None)
-                and self.agent.session_id != self.session_id
-            ):
-                self.session_id = self.agent.session_id
-                self._pending_title = None
-            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
-            summary = summarize_manual_compression(
-                original_history,
-                self.conversation_history,
-                approx_tokens,
-                new_tokens,
-            )
-            icon = "🗜️" if summary["noop"] else "✅"
-            print(f"  {icon} {summary['headline']}")
-            print(f"     {summary['token_line']}")
-            if summary["note"]:
-                print(f"     {summary['note']}")
+                compressed, _ = self.agent._compress_context(
+                    original_history,
+                    self.agent._cached_system_prompt or "",
+                    approx_tokens=approx_tokens,
+                    focus_topic=focus_topic or None,
+                )
+                self.conversation_history = compressed
+                # _compress_context ends the old session and creates a new child
+                # session on the agent (run_agent.py::_compress_context). Sync the
+                # CLI's session_id so /status, /resume, exit summary, and title
+                # generation all point at the live continuation session, not the
+                # ended parent. Without this, subsequent end_session() calls target
+                # the already-closed parent and the child is orphaned.
+                if (
+                    getattr(self.agent, "session_id", None)
+                    and self.agent.session_id != self.session_id
+                ):
+                    self.session_id = self.agent.session_id
+                    self._pending_title = None
+                new_tokens = estimate_messages_tokens_rough(self.conversation_history)
+                summary = summarize_manual_compression(
+                    original_history,
+                    self.conversation_history,
+                    approx_tokens,
+                    new_tokens,
+                )
+                icon = "🗜️" if summary["noop"] else "✅"
+                print(f"  {icon} {summary['headline']}")
+                print(f"     {summary['token_line']}")
+                if summary["note"]:
+                    print(f"     {summary['note']}")

-        except Exception as e:
-            print(f"  ❌ Compression failed: {e}")
+            except Exception as e:
+                print(f"  ❌ Compression failed: {e}")

    def _handle_debug_command(self):
        """Handle /debug — upload debug report + logs and print paste URLs."""
@@ -9525,9 +9558,20 @@ class HermesCLI:
        
        @kb.add('c-d')
        def handle_ctrl_d(event):
-            """Handle Ctrl+D - exit."""
-            self._should_exit = True
-            event.app.exit()
+            """Ctrl+D: delete char under cursor (standard readline behaviour).
+            Only exit when the input is empty — same as bash/zsh. Pending
+            attached images count as input and block the EOF-exit so the
+            user doesn't lose them silently.
+            """
+            buf = event.app.current_buffer
+            if buf.text:
+                buf.delete()
+            elif self._attached_images:
+                # Empty text but pending attachments — no-op, don't exit.
+                return
+            else:
+                self._should_exit = True
+                event.app.exit()

        _modal_prompt_active = Condition(
            lambda: bool(self._secret_state or self._sudo_state)
@@ -10740,12 +10784,6 @@ class HermesCLI:
                    self.agent.interrupt()
                except Exception:
                    pass
-            # Flush memories before exit (only for substantial conversations)
-            if self.agent and self.conversation_history:
-                try:
-                    self.agent.flush_memories(self.conversation_history)
-                except (Exception, KeyboardInterrupt):
-                    pass
            # Shut down voice recorder (release persistent audio stream)
            if hasattr(self, '_voice_recorder') and self._voice_recorder:
                try:
@@ -16,7 +16,7 @@ import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Optional, Dict, List, Any
+from typing import Optional, Dict, List, Any, Union

 logger = logging.getLogger(__name__)

@@ -417,6 +417,7 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
+    context_from: Optional[Union[str, List[str]]] = None,
    enabled_toolsets: Optional[List[str]] = None,
    workdir: Optional[str] = None,
 ) -> Dict[str, Any]:
@@ -438,6 +439,9 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
+        context_from: Optional job ID (or list of job IDs) whose most recent output
+                      is injected into the prompt as context before each run.
+                      Useful for chaining cron jobs: job A finds data, job B processes it.
        enabled_toolsets: Optional list of toolset names to restrict the agent to.
                          When set, only tools from these toolsets are loaded, reducing
                          token overhead. When omitted, all default tools are loaded.
@@ -481,6 +485,14 @@ def create_job(
    normalized_toolsets = normalized_toolsets or None
    normalized_workdir = _normalize_workdir(workdir)

+    # Normalize context_from: accept str or list of str, store as list or None
+    if isinstance(context_from, str):
+        context_from = [context_from.strip()] if context_from.strip() else None
+    elif isinstance(context_from, list):
+        context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
+    else:
+        context_from = None
+
    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
        "id": job_id,
@@ -492,6 +504,7 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
+        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -671,6 +671,47 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                f"{prompt}"
            )

+    # Inject output from referenced cron jobs as context.
+    context_from = job.get("context_from")
+    if context_from:
+        from cron.jobs import OUTPUT_DIR
+        if isinstance(context_from, str):
+            context_from = [context_from]
+        for source_job_id in context_from:
+            # Guard against path traversal — valid job IDs are 12-char hex strings
+            if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
+                logger.warning("context_from: skipping invalid job_id %r", source_job_id)
+                continue
+            try:
+                job_output_dir = OUTPUT_DIR / source_job_id
+                if not job_output_dir.exists():
+                    continue  # silent skip — no output yet
+                output_files = sorted(
+                    job_output_dir.glob("*.md"),
+                    key=lambda f: f.stat().st_mtime,
+                    reverse=True,
+                )
+                if not output_files:
+                    continue  # silent skip — no output yet
+                latest_output = output_files[0].read_text(encoding="utf-8").strip()
+                # Truncate to 8K characters to avoid prompt bloat
+                _MAX_CONTEXT_CHARS = 8000
+                if len(latest_output) > _MAX_CONTEXT_CHARS:
+                    latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
+                if latest_output:
+                    prompt = (
+                        f"## Output from job '{source_job_id}'\n"
+                        "The following is the most recent output from a preceding "
+                        "cron job. Use it as context for your analysis.\n\n"
+                        f"```\n{latest_output}\n```\n\n"
+                        f"{prompt}"
+                    )
+                else:
+                    continue  # silent skip — empty output
+            except (OSError, PermissionError) as e:
+                logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
+                # silent skip — do not pollute the prompt with error messages
+
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
@@ -135,7 +135,7 @@ class SessionResetPolicy:
            mode=mode if mode is not None else "both",
            at_hour=at_hour if at_hour is not None else 4,
            idle_minutes=idle_minutes if idle_minutes is not None else 1440,
-            notify=notify if notify is not None else True,
+            notify=_coerce_bool(notify, True),
            notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
        )

@@ -178,7 +178,7 @@ class PlatformConfig:
            home_channel = HomeChannel.from_dict(data["home_channel"])
        
        return cls(
-            enabled=data.get("enabled", False),
+            enabled=_coerce_bool(data.get("enabled"), False),
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
@@ -435,7 +435,7 @@ class GatewayConfig:
            reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
            quick_commands=quick_commands,
            sessions_dir=sessions_dir,
-            always_log_local=data.get("always_log_local", True),
+            always_log_local=_coerce_bool(data.get("always_log_local"), True),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
            thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
+                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
+                    gac = telegram_cfg["group_allowed_chats"]
+                    if isinstance(gac, list):
+                        gac = ",".join(str(v) for v in gac)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter):

        If the client disconnects mid-stream, ``agent.interrupt()`` is
        called so the agent stops issuing upstream LLM calls, then the
-        asyncio task is cancelled.  When ``store=True`` the full response
-        is persisted to the ResponseStore in a ``finally`` block so GET
-        /v1/responses/{id} and ``previous_response_id`` chaining work the
-        same as the batch path.
+        asyncio task is cancelled.  When ``store=True`` an initial
+        ``in_progress`` snapshot is persisted immediately after
+        ``response.created`` and disconnects update it to an
+        ``incomplete`` snapshot so GET /v1/responses/{id} and
+        ``previous_response_id`` chaining still have something to
+        recover from.
        """
        import queue as _q

@@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter):
        final_response_text = ""
        agent_error: Optional[str] = None
        usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        terminal_snapshot_persisted = False
+
+        def _persist_response_snapshot(
+            response_env: Dict[str, Any],
+            *,
+            conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
+        ) -> None:
+            if not store:
+                return
+            if conversation_history_snapshot is None:
+                conversation_history_snapshot = list(conversation_history)
+                conversation_history_snapshot.append({"role": "user", "content": user_message})
+            self._response_store.put(response_id, {
+                "response": response_env,
+                "conversation_history": conversation_history_snapshot,
+                "instructions": instructions,
+                "session_id": session_id,
+            })
+            if conversation:
+                self._response_store.set_conversation(conversation, response_id)
+
+        def _persist_incomplete_if_needed() -> None:
+            """Persist an ``incomplete`` snapshot if no terminal one was written.
+
+            Called from both the client-disconnect (``ConnectionResetError``)
+            and server-cancellation (``asyncio.CancelledError``) paths so
+            GET /v1/responses/{id} and ``previous_response_id`` chaining keep
+            working after abrupt stream termination.
+            """
+            if not store or terminal_snapshot_persisted:
+                return
+            incomplete_text = "".join(final_text_parts) or final_response_text
+            incomplete_items: List[Dict[str, Any]] = list(emitted_items)
+            if incomplete_text:
+                incomplete_items.append({
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{"type": "output_text", "text": incomplete_text}],
+                })
+            incomplete_env = _envelope("incomplete")
+            incomplete_env["output"] = incomplete_items
+            incomplete_env["usage"] = {
+                "input_tokens": usage.get("input_tokens", 0),
+                "output_tokens": usage.get("output_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+            }
+            incomplete_history = list(conversation_history)
+            incomplete_history.append({"role": "user", "content": user_message})
+            if incomplete_text:
+                incomplete_history.append({"role": "assistant", "content": incomplete_text})
+            _persist_response_snapshot(
+                incomplete_env,
+                conversation_history_snapshot=incomplete_history,
+            )

        try:
            # response.created — initial envelope, status=in_progress
@@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter):
                "type": "response.created",
                "response": created_env,
            })
+            _persist_response_snapshot(created_env)
            last_activity = time.monotonic()

            async def _open_message_item() -> None:
@@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                _failed_history = list(conversation_history)
+                _failed_history.append({"role": "user", "content": user_message})
+                if final_response_text or agent_error:
+                    _failed_history.append({
+                        "role": "assistant",
+                        "content": final_response_text or agent_error,
+                    })
+                _persist_response_snapshot(
+                    failed_env,
+                    conversation_history_snapshot=_failed_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.failed", {
                    "type": "response.failed",
                    "response": failed_env,
@@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                full_history = list(conversation_history)
+                full_history.append({"role": "user", "content": user_message})
+                if isinstance(result, dict) and result.get("messages"):
+                    full_history.extend(result["messages"])
+                else:
+                    full_history.append({"role": "assistant", "content": final_response_text})
+                _persist_response_snapshot(
+                    completed_env,
+                    conversation_history_snapshot=full_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.completed", {
                    "type": "response.completed",
                    "response": completed_env,
                })

-                # Persist for future chaining / GET retrieval, mirroring
-                # the batch path behavior.
-                if store:
-                    full_history = list(conversation_history)
-                    full_history.append({"role": "user", "content": user_message})
-                    if isinstance(result, dict) and result.get("messages"):
-                        full_history.extend(result["messages"])
-                    else:
-                        full_history.append({"role": "assistant", "content": final_response_text})
-                    self._response_store.put(response_id, {
-                        "response": completed_env,
-                        "conversation_history": full_history,
-                        "instructions": instructions,
-                        "session_id": session_id,
-                    })
-                    if conversation:
-                        self._response_store.set_conversation(conversation, response_id)
-
        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
+            _persist_incomplete_if_needed()
            # Client disconnected — interrupt the agent so it stops
            # making upstream LLM calls, then cancel the task.
            agent = agent_ref[0] if agent_ref else None
@@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter):
                except (asyncio.CancelledError, Exception):
                    pass
            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
+        except asyncio.CancelledError:
+            # Server-side cancellation (e.g. shutdown, request timeout) —
+            # persist an incomplete snapshot so GET /v1/responses/{id} and
+            # previous_response_id chaining still work, then re-raise so the
+            # runtime's cancellation semantics are respected.
+            _persist_incomplete_if_needed()
+            agent = agent_ref[0] if agent_ref else None
+            if agent is not None:
+                try:
+                    agent.interrupt("SSE task cancelled")
+                except Exception:
+                    pass
+            if not agent_task.done():
+                agent_task.cancel()
+            logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
+            raise

        return response

@@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
    return None


-def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
+def _split_host_port(value: str) -> tuple[str, int | None]:
+    raw = str(value or "").strip()
+    if not raw:
+        return "", None
+    if "://" in raw:
+        parsed = urlsplit(raw)
+        return (parsed.hostname or "").lower().rstrip("."), parsed.port
+    if raw.startswith("[") and "]" in raw:
+        host, _, rest = raw[1:].partition("]")
+        port = None
+        if rest.startswith(":") and rest[1:].isdigit():
+            port = int(rest[1:])
+        return host.lower().rstrip("."), port
+    if raw.count(":") == 1:
+        host, _, maybe_port = raw.rpartition(":")
+        if maybe_port.isdigit():
+            return host.lower().rstrip("."), int(maybe_port)
+    return raw.lower().strip("[]").rstrip("."), None
+
+
+def _no_proxy_entries() -> list[str]:
+    entries: list[str] = []
+    for key in ("NO_PROXY", "no_proxy"):
+        raw = os.environ.get(key, "")
+        entries.extend(part.strip() for part in raw.split(",") if part.strip())
+    return entries
+
+
+def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
+    token = str(entry or "").strip().lower()
+    if not token:
+        return False
+    if token == "*":
+        return True
+
+    token_host, token_port = _split_host_port(token)
+    if token_port is not None and port is not None and token_port != port:
+        return False
+    if token_port is not None and port is None:
+        return False
+    if not token_host:
+        return False
+
+    try:
+        network = ipaddress.ip_network(token_host, strict=False)
+        try:
+            return ipaddress.ip_address(host) in network
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    try:
+        token_ip = ipaddress.ip_address(token_host)
+        try:
+            return ipaddress.ip_address(host) == token_ip
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    if token_host.startswith("*."):
+        suffix = token_host[1:]
+        return host.endswith(suffix)
+    if token_host.startswith("."):
+        return host == token_host[1:] or host.endswith(token_host)
+    return host == token_host or host.endswith(f".{token_host}")
+
+
+def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
+    """Return True when NO_PROXY/no_proxy matches at least one target host.
+
+    Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
+    CIDR ranges, optional host:port entries, and ``*``.
+    """
+    entries = _no_proxy_entries()
+    if not entries or not target_hosts:
+        return False
+    if isinstance(target_hosts, str):
+        candidates = [target_hosts]
+    else:
+        candidates = list(target_hosts)
+    for candidate in candidates:
+        host, port = _split_host_port(str(candidate))
+        if not host:
+            continue
+        if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
+            return True
+    return False
+
+
+def resolve_proxy_url(
+    platform_env_var: str | None = None,
+    *,
+    target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
+) -> str | None:
    """Return a proxy URL from env vars, or macOS system proxy.

    Check order:
@@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
      1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
      2. macOS system proxy via ``scutil --proxy`` (auto-detect)

-    Returns *None* if no proxy is found.
+    Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
+    of ``target_hosts``.
    """
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
-    return normalize_proxy_url(_detect_macos_system_proxy())
+    detected = normalize_proxy_url(_detect_macos_system_proxy())
+    if detected and should_bypass_proxy(target_hosts):
+        return None
+    return detected


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -2440,6 +2543,9 @@ class BasePlatformAdapter(ABC):
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
+        guild_id: Optional[str] = None,
+        parent_chat_id: Optional[str] = None,
+        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2457,6 +2563,9 @@ class BasePlatformAdapter(ABC):
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
+            guild_id=str(guild_id) if guild_id else None,
+            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
+            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
@@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:

 class BlueBubblesAdapter(BasePlatformAdapter):
    platform = Platform.BLUEBUBBLES
+    SUPPORTS_MESSAGE_EDITING = False
    MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH

    def __init__(self, config: PlatformConfig):
@@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    # Text sending
    # ------------------------------------------------------------------

+    @staticmethod
+    def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
+        # Use the base splitter but skip pagination indicators — iMessage
+        # bubbles flow naturally without "(1/3)" suffixes.
+        chunks = BasePlatformAdapter.truncate_message(content, max_length)
+        return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
+
    async def send(
        self,
        chat_id: str,
@@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        text = strip_markdown(content or "")
+        text = self.format_message(content)
        if not text:
            return SendResult(success=False, error="BlueBubbles send requires text")
-        chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
+        # Split on paragraph breaks first (double newlines) so each thought
+        # becomes its own iMessage bubble, then truncate any that are still
+        # too long.
+        paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
+        chunks: List[str] = []
+        for para in (paragraphs or [text]):
+            if len(para) <= self.MAX_MESSAGE_LENGTH:
+                chunks.append(para)
+            else:
+                chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
        last = SendResult(success=True)
        for chunk in chunks:
            guid = await self._resolve_chat_guid(chat_id)
@@ -3261,6 +3261,7 @@ class DiscordAdapter(BasePlatformAdapter):
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
+                    parent_channel_id = str(message.channel.id)
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
@@ -3320,6 +3321,9 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
+            guild_id=str(message.guild.id) if message.guild else None,
+            parent_chat_id=parent_channel_id,
+            message_id=str(message.id),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -532,6 +532,20 @@ class MatrixAdapter(BasePlatformAdapter):
                )
                await crypto_store.open()

+                # Bind the store to the runtime device_id before any
+                # put_account() runs. PgCryptoStore defaults _device_id
+                # to "" and its crypto_account UPSERT never updates the
+                # device_id column on conflict — so once put_account
+                # writes blank, it stays blank forever. That breaks
+                # every downstream device-scoped olm operation: peer
+                # to-device ciphertext can't find our identity key and
+                # no megolm sessions ever land. Setting _device_id here
+                # (in-memory; the on-disk row may not exist yet) makes
+                # the first put_account write the correct value.
+                # DeviceID is a NewType(str) so plain str works at runtime.
+                if client.device_id:
+                    await crypto_store.put_device_id(client.device_id)
+
                crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
                olm = OlmMachine(client, crypto_store, crypto_state)

@@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )

+            proxy_targets = ["api.telegram.org", *fallback_ips]
+            proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
            if fallback_ips and not proxy_url and not disable_fallback:
                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
 _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]


-def _resolve_proxy_url() -> str | None:
+def _resolve_proxy_url(target_hosts=None) -> str | None:
    # Delegate to shared implementation (env vars + macOS system proxy detection)
    from gateway.platforms.base import resolve_proxy_url
-    return resolve_proxy_url("TELEGRAM_PROXY")
+    return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)


 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):

    def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
        self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
-        proxy_url = _resolve_proxy_url()
+        proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
        if proxy_url and "proxy" not in transport_kwargs:
            transport_kwargs["proxy"] = proxy_url
        self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
@@ -298,50 +298,16 @@ from gateway.restart import (
 )


-def _normalize_whatsapp_identifier(value: str) -> str:
-    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
-    return (
-        str(value or "")
-        .strip()
-        .replace("+", "", 1)
-        .split(":", 1)[0]
-        .split("@", 1)[0]
-    )
+from gateway.whatsapp_identity import (
+    canonical_whatsapp_identifier as _canonical_whatsapp_identifier,  # noqa: F401
+    expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
+    normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
+)


-def _expand_whatsapp_auth_aliases(identifier: str) -> set:
-    """Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
-    normalized = _normalize_whatsapp_identifier(identifier)
-    if not normalized:
-        return set()
-
-    session_dir = _hermes_home / "whatsapp" / "session"
-    resolved = set()
-    queue = [normalized]
-
-    while queue:
-        current = queue.pop(0)
-        if not current or current in resolved:
-            continue
-
-        resolved.add(current)
-        for suffix in ("", "_reverse"):
-            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
-            if not mapping_path.exists():
-                continue
-            try:
-                mapped = _normalize_whatsapp_identifier(
-                    json.loads(mapping_path.read_text(encoding="utf-8"))
-                )
-            except Exception:
-                continue
-            if mapped and mapped not in resolved:
-                queue.append(mapped)
-
-    return resolved
-
 logger = logging.getLogger(__name__)

+
 # Sentinel placed into _running_agents immediately when a session starts
 # processing, *before* any await.  Prevents a second message for the same
 # session from bypassing the "already running" guard during the async gap
@@ -558,7 +524,7 @@ def _load_gateway_config() -> dict:
 def _resolve_gateway_model(config: dict | None = None) -> str:
    """Read model from config.yaml — single source of truth.

-    Without this, temporary AIAgent instances (memory flush, /compress) fall
+    Without this, temporary AIAgent instances (e.g. /compress) fall
    back to the hardcoded default which fails when the active provider is
    openai-codex.
    """
@@ -949,129 +915,6 @@ class GatewayRunner:
                e,
            )

-    # -----------------------------------------------------------------
-
-    def _flush_memories_for_session(
-        self,
-        old_session_id: str,
-        session_key: Optional[str] = None,
-    ):
-        """Prompt the agent to save memories/skills before context is lost.
-
-        Synchronous worker — meant to be called via run_in_executor from
-        an async context so it doesn't block the event loop.
-        """
-        # Skip cron sessions — they run headless with no meaningful user
-        # conversation to extract memories from.
-        if old_session_id and old_session_id.startswith("cron_"):
-            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
-            return
-
-        try:
-            history = self.session_store.load_transcript(old_session_id)
-            if not history or len(history) < 4:
-                return
-
-            from run_agent import AIAgent
-            model, runtime_kwargs = self._resolve_session_agent_runtime(
-                session_key=session_key,
-            )
-            if not runtime_kwargs.get("api_key"):
-                return
-
-            tmp_agent = AIAgent(
-                **runtime_kwargs,
-                model=model,
-                max_iterations=8,
-                quiet_mode=True,
-                skip_memory=True,  # Flush agent — no memory provider
-                enabled_toolsets=["memory", "skills"],
-                session_id=old_session_id,
-            )
-            try:
-                # Fully silence the flush agent — quiet_mode only suppresses init
-                # messages; tool call output still leaks to the terminal through
-                # _safe_print → _print_fn.  Set a no-op to prevent that.
-                tmp_agent._print_fn = lambda *a, **kw: None
-
-                # Build conversation history from transcript
-                msgs = [
-                    {"role": m.get("role"), "content": m.get("content")}
-                    for m in history
-                    if m.get("role") in ("user", "assistant") and m.get("content")
-                ]
-
-                # Read live memory state from disk so the flush agent can see
-                # what's already saved and avoid overwriting newer entries.
-                _current_memory = ""
-                try:
-                    from tools.memory_tool import get_memory_dir
-                    _mem_dir = get_memory_dir()
-                    for fname, label in [
-                        ("MEMORY.md", "MEMORY (your personal notes)"),
-                        ("USER.md", "USER PROFILE (who the user is)"),
-                    ]:
-                        fpath = _mem_dir / fname
-                        if fpath.exists():
-                            content = fpath.read_text(encoding="utf-8").strip()
-                            if content:
-                                _current_memory += f"\n\n## Current {label}:\n{content}"
-                except Exception:
-                    pass  # Non-fatal — flush still works, just without the guard
-
-                # Give the agent a real turn to think about what to save
-                flush_prompt = (
-                    "[System: This session is about to be automatically reset due to "
-                    "inactivity or a scheduled daily reset. The conversation context "
-                    "will be cleared after this turn.\n\n"
-                    "Review the conversation above and:\n"
-                    "1. Save any important facts, preferences, or decisions to memory "
-                    "(user profile or your notes) that would be useful in future sessions.\n"
-                    "2. If you discovered a reusable workflow or solved a non-trivial "
-                    "problem, consider saving it as a skill.\n"
-                    "3. If nothing is worth saving, that's fine — just skip.\n\n"
-                )
-
-                if _current_memory:
-                    flush_prompt += (
-                        "IMPORTANT — here is the current live state of memory. Other "
-                        "sessions, cron jobs, or the user may have updated it since this "
-                        "conversation ended. Do NOT overwrite or remove entries unless "
-                        "the conversation above reveals something that genuinely "
-                        "supersedes them. Only add new information that is not already "
-                        "captured below."
-                        f"{_current_memory}\n\n"
-                    )
-
-                flush_prompt += (
-                    "Do NOT respond to the user. Just use the memory and skill_manage "
-                    "tools if needed, then stop.]"
-                )
-
-                tmp_agent.run_conversation(
-                    user_message=flush_prompt,
-                    conversation_history=msgs,
-                )
-            finally:
-                self._cleanup_agent_resources(tmp_agent)
-            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
-        except Exception as e:
-            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
-
-    async def _async_flush_memories(
-        self,
-        old_session_id: str,
-        session_key: Optional[str] = None,
-    ):
-        """Run the sync memory flush in a thread pool so it won't block the event loop."""
-        loop = asyncio.get_running_loop()
-        await loop.run_in_executor(
-            None,
-            self._flush_memories_for_session,
-            old_session_id,
-            session_key,
-        )
-
    @property
    def should_exit_cleanly(self) -> bool:
        return self._exit_cleanly
@@ -1137,7 +980,7 @@ class GatewayRunner:
            if override_runtime.get("api_key"):
                logger.debug(
                    "Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
-                    (resolved_session_key or "")[:30], model, override_model,
+                    resolved_session_key or "", model, override_model,
                    override_runtime.get("provider"),
                )
                return override_model, override_runtime
@@ -1145,12 +988,12 @@ class GatewayRunner:
            # resolution and apply model/provider from the override on top.
            logger.debug(
                "Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
-                (resolved_session_key or "")[:30], model, override_model,
+                resolved_session_key or "", model, override_model,
            )
        else:
            logger.debug(
                "No session model override: session=%s config_model=%s override_keys=%s",
-                (resolved_session_key or "")[:30], model,
+                resolved_session_key or "", model,
                list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
            )

@@ -1721,7 +1564,7 @@ class GatewayRunner:
                continue
            try:
                agent.interrupt(reason)
-                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
+                logger.debug("Interrupted running agent for session %s during shutdown", session_key)
            except Exception as e:
                logger.debug("Failed interrupting agent during shutdown: %s", e)

@@ -1893,7 +1736,7 @@ class GatewayRunner:
                    logger.warning(
                        "Auto-suspended stuck session %s (active across %d "
                        "consecutive restarts — likely a stuck loop)",
-                        session_key[:30], counts[session_key],
+                        session_key, counts[session_key],
                    )
            except Exception:
                pass
@@ -2306,7 +2149,7 @@ class GatewayRunner:
        except Exception as e:
            logger.error("Recovered watcher setup error: %s", e)

-        # Start background session expiry watcher for proactive memory flushing
+        # Start background session expiry watcher to finalize expired sessions
        asyncio.create_task(self._session_expiry_watcher())

        # Start background reconnection watcher for platforms that failed at startup
@@ -2323,25 +2166,24 @@ class GatewayRunner:
        return True
    
    async def _session_expiry_watcher(self, interval: int = 300):
-        """Background task that proactively flushes memories for expired sessions.
-        
-        Runs every `interval` seconds (default 5 min).  For each session that
-        has expired according to its reset policy, flushes memories in a thread
-        pool and marks the session so it won't be flushed again.
+        """Background task that finalizes expired sessions.

-        This means memories are already saved by the time the user sends their
-        next message, so there's no blocking delay.
+        Runs every ``interval`` seconds (default 5 min).  For each session
+        whose reset policy has expired, invokes ``on_session_finalize``
+        hooks, cleans up the cached AIAgent's tool resources, evicts the
+        cache entry so it can be garbage-collected, and marks the session
+        so it won't be finalized again.
        """
        await asyncio.sleep(60)  # initial delay — let the gateway fully start
-        _flush_failures: dict[str, int] = {}  # session_id -> consecutive failure count
-        _MAX_FLUSH_RETRIES = 3
+        _finalize_failures: dict[str, int] = {}  # session_id -> consecutive failure count
+        _MAX_FINALIZE_RETRIES = 3
        while self._running:
            try:
                self.session_store._ensure_loaded()
                # Collect expired sessions first, then log a single summary.
                _expired_entries = []
                for key, entry in list(self.session_store._entries.items()):
-                    if entry.memory_flushed:
+                    if entry.expiry_finalized:
                        continue
                    if not self.session_store._is_session_expired(entry):
                        continue
@@ -2359,13 +2201,12 @@ class GatewayRunner:
                        f"{p}:{c}" for p, c in sorted(_platforms.items())
                    )
                    logger.info(
-                        "Session expiry: %d sessions to flush (%s)",
+                        "Session expiry: %d sessions to finalize (%s)",
                        len(_expired_entries), _plat_summary,
                    )

                for key, entry in _expired_entries:
                    try:
-                        await self._async_flush_memories(entry.session_id, key)
                        try:
                            from hermes_cli.plugins import invoke_hook as _invoke_hook
                            _parts = key.split(":")
@@ -2397,48 +2238,48 @@ class GatewayRunner:
                        # be garbage-collected.  Otherwise the cache grows
                        # unbounded across the gateway's lifetime.
                        self._evict_cached_agent(key)
-                        # Mark as flushed and persist to disk so the flag
+                        # Mark as finalized and persist to disk so the flag
                        # survives gateway restarts.
                        with self.session_store._lock:
-                            entry.memory_flushed = True
+                            entry.expiry_finalized = True
                            self.session_store._save()
                        logger.debug(
-                            "Memory flush completed for session %s",
+                            "Session expiry finalized for %s",
                            entry.session_id,
                        )
-                        _flush_failures.pop(entry.session_id, None)
+                        _finalize_failures.pop(entry.session_id, None)
                    except Exception as e:
-                        failures = _flush_failures.get(entry.session_id, 0) + 1
-                        _flush_failures[entry.session_id] = failures
-                        if failures >= _MAX_FLUSH_RETRIES:
+                        failures = _finalize_failures.get(entry.session_id, 0) + 1
+                        _finalize_failures[entry.session_id] = failures
+                        if failures >= _MAX_FINALIZE_RETRIES:
                            logger.warning(
-                                "Memory flush gave up after %d attempts for %s: %s. "
-                                "Marking as flushed to prevent infinite retry loop.",
+                                "Session finalize gave up after %d attempts for %s: %s. "
+                                "Marking as finalized to prevent infinite retry loop.",
                                failures, entry.session_id, e,
                            )
                            with self.session_store._lock:
-                                entry.memory_flushed = True
+                                entry.expiry_finalized = True
                                self.session_store._save()
-                            _flush_failures.pop(entry.session_id, None)
+                            _finalize_failures.pop(entry.session_id, None)
                        else:
                            logger.debug(
-                                "Memory flush failed (%d/%d) for %s: %s",
-                                failures, _MAX_FLUSH_RETRIES, entry.session_id, e,
+                                "Session finalize failed (%d/%d) for %s: %s",
+                                failures, _MAX_FINALIZE_RETRIES, entry.session_id, e,
                            )

                if _expired_entries:
-                    _flushed = sum(
-                        1 for _, e in _expired_entries if e.memory_flushed
+                    _done = sum(
+                        1 for _, e in _expired_entries if e.expiry_finalized
                    )
-                    _failed = len(_expired_entries) - _flushed
+                    _failed = len(_expired_entries) - _done
                    if _failed:
                        logger.info(
-                            "Session expiry done: %d flushed, %d pending retry",
-                            _flushed, _failed,
+                            "Session expiry done: %d finalized, %d pending retry",
+                            _done, _failed,
                        )
                    else:
                        logger.info(
-                            "Session expiry done: %d flushed", _flushed,
+                            "Session expiry done: %d finalized", _done,
                        )

                # Sweep agents that have been idle beyond the TTL regardless
@@ -2715,7 +2556,7 @@ class GatewayRunner:
                    except Exception as _e:
                        logger.debug(
                            "mark_resume_pending failed for %s: %s",
-                            _sk[:20], _e,
+                            _sk, _e,
                        )
                self._interrupt_running_agents(
                    _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
@@ -3037,6 +2878,7 @@ class GatewayRunner:
            Platform.QQBOT: "QQ_ALLOWED_USERS",
        }
        platform_group_env_map = {
+            Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS",
            Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
        }
        platform_allow_all_map = {
@@ -3093,7 +2935,7 @@ class GatewayRunner:
        # Check platform-specific and global allowlists
        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
        group_allowlist = ""
-        if source.chat_type == "group":
+        if source.chat_type in {"group", "forum"}:
            group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip()
        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()

@@ -3102,7 +2944,7 @@ class GatewayRunner:
            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")

        # Some platforms authorize group traffic by chat ID rather than sender ID.
-        if group_allowlist and source.chat_type == "group" and source.chat_id:
+        if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id:
            allowed_group_ids = {
                chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip()
            }
@@ -3380,7 +3222,7 @@ class GatewayRunner:
                logger.warning(
                    "Evicting stale _running_agents entry for %s "
                    "(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
-                    _quick_key[:30], _stale_age, _stale_idle,
+                    _quick_key, _stale_age, _stale_idle,
                    _raw_stale_timeout, _stale_detail,
                )
                self._invalidate_session_run_generation(
@@ -3416,7 +3258,7 @@ class GatewayRunner:
                    interrupt_reason=_INTERRUPT_REASON_STOP,
                    invalidation_reason="stop_command",
                )
-                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
+                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key)
                return "⚡ Stopped. You can continue this session."

            # /reset and /new must bypass the running-agent guard so they
@@ -3482,7 +3324,7 @@ class GatewayRunner:
                    try:
                        accepted = running_agent.steer(steer_text)
                    except Exception as exc:
-                        logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
+                        logger.warning("Steer failed for session %s: %s", _quick_key, exc)
                        return f"⚠️ Steer failed: {exc}"
                    if accepted:
                        preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
@@ -3565,7 +3407,7 @@ class GatewayRunner:
                )

            if event.message_type == MessageType.PHOTO:
-                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
+                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key)
                adapter = self.adapters.get(source.platform)
                if adapter:
                    merge_pending_message_event(adapter._pending_messages, _quick_key, event)
@@ -3585,7 +3427,7 @@ class GatewayRunner:
                logger.debug(
                    "Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
                    time.time() - _started_at,
-                    _quick_key[:20],
+                    _quick_key,
                )
                adapter = self.adapters.get(source.platform)
                if adapter:
@@ -3603,7 +3445,7 @@ class GatewayRunner:
                if event.get_command() == "stop":
                    # Force-clean the sentinel so the session is unlocked.
                    self._release_running_agent_state(_quick_key)
-                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
+                    logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key)
                    return "⚡ Force-stopped. The agent was still starting — session unlocked."
                # Queue the message so it will be picked up after the
                # agent starts.
@@ -3624,7 +3466,11 @@ class GatewayRunner:
                    if self._queue_during_drain_enabled()
                    else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
                )
-            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
+            if self._busy_input_mode == "queue":
+                logger.debug("PRIORITY queue follow-up for session %s", _quick_key)
+                self._queue_or_replace_pending_event(_quick_key, event)
+                return None
+            logger.debug("PRIORITY interrupt for session %s", _quick_key)
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
                self._pending_messages[_quick_key] += "\n" + event.text
@@ -4622,7 +4468,7 @@ class GatewayRunner:
            if not self._is_session_run_current(_quick_key, run_generation):
                logger.info(
                    "Discarding stale agent result for %s — generation %d is no longer current",
-                    _quick_key[:20] if _quick_key else "?",
+                    _quick_key or "?",
                    run_generation,
                )
                _stale_adapter = self.adapters.get(source.platform)
@@ -4673,7 +4519,7 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug(
                        "clear_resume_pending failed for %s: %s",
-                        session_key[:20], _e,
+                        session_key, _e,
                    )

            # Surface error details when the agent failed silently (final_response=None)
@@ -5050,19 +4896,11 @@ class GatewayRunner:
        # Get existing session key
        session_key = self._session_key_for_source(source)
        self._invalidate_session_run_generation(session_key, reason="session_reset")
-        
-        # Flush memories in the background (fire-and-forget) so the user
-        # gets the "Session reset!" response immediately.
-        try:
-            old_entry = self.session_store._entries.get(session_key)
-            if old_entry:
-                _flush_task = asyncio.create_task(
-                    self._async_flush_memories(old_entry.session_id, session_key)
-                )
-                self._background_tasks.add(_flush_task)
-                _flush_task.add_done_callback(self._background_tasks.discard)
-        except Exception as e:
-            logger.debug("Gateway memory flush on reset failed: %s", e)
+
+        # Snapshot the old entry so on_session_finalize can report the
+        # expiring session id before reset_session() rotates it.
+        old_entry = self.session_store._entries.get(session_key)
+
        # Close tool resources on the old agent (terminal sandboxes, browser
        # daemons, background processes) before evicting from cache.
        # Guard with getattr because test fixtures may skip __init__.
@@ -5320,7 +5158,7 @@ class GatewayRunner:
                interrupt_reason=_INTERRUPT_REASON_STOP,
                invalidation_reason="stop_command_pending",
            )
-            logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
+            logger.info("STOP (pending) for session %s — sentinel cleared", session_key)
            return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
        if agent:
            # Force-clean the session lock so a truly hung agent doesn't
@@ -5688,9 +5526,17 @@ class GatewayRunner:
                        lines = [f"Model switched to `{result.new_model}`"]
                        lines.append(f"Provider: {plabel}")
                        mi = result.model_info
+                        from hermes_cli.model_switch import resolve_display_context_length
+                        ctx = resolve_display_context_length(
+                            result.new_model,
+                            result.target_provider,
+                            base_url=result.base_url or current_base_url or "",
+                            api_key=result.api_key or current_api_key or "",
+                            model_info=mi,
+                        )
+                        if ctx:
+                            lines.append(f"Context: {ctx:,} tokens")
                        if mi:
-                            if mi.context_window:
-                                lines.append(f"Context: {mi.context_window:,} tokens")
                            if mi.max_output:
                                lines.append(f"Max output: {mi.max_output:,} tokens")
                            if mi.has_cost_data():
@@ -5824,28 +5670,25 @@ class GatewayRunner:
        lines = [f"Model switched to `{result.new_model}`"]
        lines.append(f"Provider: {provider_label}")

-        # Rich metadata from models.dev
+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry.
        mi = result.model_info
+        from hermes_cli.model_switch import resolve_display_context_length
+        ctx = resolve_display_context_length(
+            result.new_model,
+            result.target_provider,
+            base_url=result.base_url or current_base_url or "",
+            api_key=result.api_key or current_api_key or "",
+            model_info=mi,
+        )
+        if ctx:
+            lines.append(f"Context: {ctx:,} tokens")
        if mi:
-            if mi.context_window:
-                lines.append(f"Context: {mi.context_window:,} tokens")
            if mi.max_output:
                lines.append(f"Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                lines.append(f"Cost: {mi.format_cost()}")
            lines.append(f"Capabilities: {mi.format_capabilities()}")
-        else:
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or current_base_url,
-                    api_key=result.api_key or current_api_key,
-                    provider=result.target_provider,
-                )
-                lines.append(f"Context: {ctx:,} tokens")
-            except Exception:
-                pass

        # Cache notice
        cache_enabled = (
@@ -7257,29 +7100,25 @@ class GatewayRunner:
                logger.debug("Failed to list titled sessions: %s", e)
                return f"Could not list sessions: {e}"

-        # Resolve the name to a session ID
+        # Resolve the name to a session ID.
        target_id = self._session_db.resolve_session_by_title(name)
        if not target_id:
            return (
                f"No session found matching '**{name}**'.\n"
                "Use `/resume` with no arguments to see available sessions."
            )
+        # Compression creates child continuations that hold the live transcript.
+        # Follow that chain so gateway /resume matches CLI behavior (#15000).
+        try:
+            target_id = self._session_db.resolve_resume_session_id(target_id)
+        except Exception as e:
+            logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e)

        # Check if already on that session
        current_entry = self.session_store.get_or_create_session(source)
        if current_entry.session_id == target_id:
            return f"📌 Already on session **{name}**."

-        # Flush memories for current session before switching
-        try:
-            _flush_task = asyncio.create_task(
-                self._async_flush_memories(current_entry.session_id, session_key)
-            )
-            self._background_tasks.add(_flush_task)
-            _flush_task.add_done_callback(self._background_tasks.discard)
-        except Exception as e:
-            logger.debug("Memory flush on resume failed: %s", e)
-
        # Clear any running agent for this session key
        self._release_running_agent_state(session_key)

@@ -8816,7 +8655,7 @@ class GatewayRunner:
        if reason:
            logger.info(
                "Invalidated run generation for %s → %d (%s)",
-                session_key[:20],
+                session_key,
                generation,
                reason,
            )
@@ -9223,7 +9062,7 @@ class GatewayRunner:
                        if not _run_still_current():
                            logger.info(
                                "Discarding stale proxy stream for %s — generation %d is no longer current",
-                                session_key[:20] if session_key else "?",
+                                session_key or "?",
                                run_generation or 0,
                            )
                            return {
@@ -9287,7 +9126,7 @@ class GatewayRunner:
        if not _run_still_current():
            logger.info(
                "Discarding stale proxy result for %s — generation %d is no longer current",
-                session_key[:20] if session_key else "?",
+                session_key or "?",
                run_generation or 0,
            )
            return {
@@ -9729,7 +9568,7 @@ class GatewayRunner:
                )
                logger.debug(
                    "run_agent resolved: model=%s provider=%s session=%s",
-                    model, runtime_kwargs.get("provider"), (session_key or "")[:30],
+                    model, runtime_kwargs.get("provider"), session_key or "",
                )
            except Exception as exc:
                return {
@@ -10340,7 +10179,7 @@ class GatewayRunner:
            ):
                logger.info(
                    "Skipping stale agent promotion for %s — generation %s is no longer current",
-                    (session_key or "")[:20],
+                    session_key or "",
                    run_generation,
                )
                return
@@ -10487,7 +10326,7 @@ class GatewayRunner:
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
-                                session_key[:20],
+                                session_key,
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
@@ -10547,7 +10386,7 @@ class GatewayRunner:
                            logger.info(
                                "Backup interrupt detected for session %s "
                                "(monitor task state: %s)",
-                                session_key[:20],
+                                session_key,
                                "done" if interrupt_monitor.done() else "running",
                            )
                            _backup_agent.interrupt(_bp_text)
@@ -10649,7 +10488,7 @@ class GatewayRunner:
                    if _is_control_interrupt_message(interrupt_message):
                        logger.info(
                            "Ignoring control interrupt message for session %s: %s",
-                            session_key[:20] if session_key else "?",
+                            session_key or "?",
                            interrupt_message,
                        )
                    else:
@@ -10693,7 +10532,7 @@ class GatewayRunner:
            if self._draining and (pending_event or pending):
                logger.info(
                    "Discarding pending follow-up for session %s during gateway %s",
-                    session_key[:20] if session_key else "?",
+                    session_key or "?",
                    self._status_action_label(),
                )
                pending_event = None
@@ -10750,7 +10589,7 @@ class GatewayRunner:
                        try:
                            logger.info(
                                "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
-                                session_key[:20] if session_key else "?",
+                                session_key or "?",
                            )
                            await adapter.send(
                                source.chat_id,
@@ -10762,7 +10601,7 @@ class GatewayRunner:
                    elif first_response:
                        logger.info(
                            "Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
-                            session_key[:20] if session_key else "?",
+                            session_key or "?",
                        )
                    # Release deferred bg-review notifications now that the
                    # first response has been delivered.  Pop from the
@@ -10897,7 +10736,7 @@ class GatewayRunner:
            if not _is_empty_sentinel and (_streamed or _previewed):
                logger.info(
                    "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
-                    session_key[:20] if session_key else "?",
+                    session_key or "?",
                    _streamed,
                    _previewed,
                )
@@ -60,6 +60,10 @@ from .config import (
    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
    HomeChannel,
 )
+from .whatsapp_identity import (
+    canonical_whatsapp_identifier,
+    normalize_whatsapp_identifier,
+)


@dataclass
@@ -83,6 +87,9 @@ class SessionSource:
    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
+    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
+    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
+    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
@@ -120,8 +127,14 @@ class SessionSource:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
+        if self.guild_id:
+            d["guild_id"] = self.guild_id
+        if self.parent_chat_id:
+            d["parent_chat_id"] = self.parent_chat_id
+        if self.message_id:
+            d["message_id"] = self.message_id
        return d
-    
+
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
@@ -135,6 +148,9 @@ class SessionSource:
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
+            guild_id=data.get("guild_id"),
+            parent_chat_id=data.get("parent_chat_id"),
+            message_id=data.get("message_id"),
        )
    

@@ -186,6 +202,31 @@ that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 and the LLM needs the real ID to tag users."""


+def _discord_tools_loaded() -> bool:
+    """True iff the agent will actually have Discord tools this session.
+
+    Two conditions must hold:
+      1. The `discord` or `discord_admin` toolset is enabled for the
+         Discord platform via `hermes tools` (opt-in, default OFF).
+      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
+         at registry time, so the toolset being enabled in config is not
+         enough if the token isn't configured.
+
+    Returns False (safe default — keeps the stale-API disclaimer) on any
+    error so a bad config can't silently promise tools the agent lacks.
+    """
+    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
+        return False
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.tools_config import _get_platform_tools
+        cfg = load_config()
+        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
+        return "discord" in enabled or "discord_admin" in enabled
+    except Exception:
+        return False
+
+
 def build_session_context_prompt(
    context: SessionContext,
    *,
@@ -273,13 +314,44 @@ def build_session_context_prompt(
            "that you can only read messages sent directly to you and respond."
        )
    elif context.source.platform == Platform.DISCORD:
+        # Inject the Discord IDs block only when the agent actually has
+        # Discord tools loaded this session — i.e. the user opted into
+        # `discord` / `discord_admin` via `hermes tools` AND the bot
+        # token is configured.  Otherwise keep the stale-API disclaimer
+        # honest so we never promise tools the agent lacks.
+        if _discord_tools_loaded():
+            src = context.source
+            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
+            if src.guild_id:
+                id_lines.append(f"  - Guild: `{src.guild_id}`")
+            if src.thread_id and src.parent_chat_id:
+                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
+                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
+            else:
+                id_lines.append(f"  - Channel: `{src.chat_id}`")
+            if src.message_id:
+                id_lines.append(f"  - Triggering message: `{src.message_id}`")
+            lines.extend(id_lines)
+        else:
+            lines.append("")
+            lines.append(
+                "**Platform notes:** You are running inside Discord. "
+                "You do NOT have access to Discord-specific APIs — you cannot search "
+                "channel history, pin messages, manage roles, or list server members. "
+                "Do not promise to perform these actions. If the user asks, explain "
+                "that you can only read messages sent directly to you and respond."
+            )
+    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
-            "**Platform notes:** You are running inside Discord. "
-            "You do NOT have access to Discord-specific APIs — you cannot search "
-            "channel history, pin messages, manage roles, or list server members. "
-            "Do not promise to perform these actions. If the user asks, explain "
-            "that you can only read messages sent directly to you and respond."
+            "**Platform notes:** You are responding via iMessage. "
+            "Keep responses short and conversational — think texts, not essays. "
+            "Structure longer replies as separate short thoughts, each separated "
+            "by a blank line (double newline). Each block between blank lines "
+            "will be delivered as its own iMessage bubble, so write accordingly: "
+            "one idea per bubble, 1–3 sentences each. "
+            "If the user needs a detailed answer, give the short version first "
+            "and offer to elaborate."
        )

    # Connected platforms
@@ -367,11 +439,11 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
-    # Set by the background expiry watcher after it successfully flushes
-    # memories for this session.  Persisted to sessions.json so the flag
-    # survives gateway restarts (the old in-memory _pre_flushed_sessions
-    # set was lost on restart, causing redundant re-flushes).
-    memory_flushed: bool = False
+    # Set by the background expiry watcher after it finalizes an expired
+    # session (invoking on_session_finalize hooks and evicting the cached
+    # agent).  Persisted to sessions.json so the flag survives gateway
+    # restarts — prevents redundant finalization runs.
+    expiry_finalized: bool = False

    # When True the next call to get_or_create_session() will auto-reset
    # this session (create a new session_id) so the user starts fresh.
@@ -407,7 +479,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
-            "memory_flushed": self.memory_flushed,
+            "expiry_finalized": self.expiry_finalized,
            "suspended": self.suspended,
            "resume_pending": self.resume_pending,
            "resume_reason": self.resume_reason,
@@ -459,7 +531,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
-            memory_flushed=data.get("memory_flushed", False),
+            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
            suspended=data.get("suspended", False),
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
@@ -518,15 +590,24 @@ def build_session_key(
    """
    platform = source.platform.value
    if source.chat_type == "dm":
-        if source.chat_id:
+        dm_chat_id = source.chat_id
+        if source.platform == Platform.WHATSAPP:
+            dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
+
+        if dm_chat_id:
            if source.thread_id:
-                return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{source.chat_id}"
+                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
+            return f"agent:main:{platform}:dm:{dm_chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
        return f"agent:main:{platform}:dm"

    participant_id = source.user_id_alt or source.user_id
+    if participant_id and source.platform == Platform.WHATSAPP:
+        # Same JID/LID-flip bug as the DM case: without canonicalisation, a
+        # single group member gets two isolated per-user sessions when the
+        # bridge reshuffles alias forms.
+        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
    key_parts = ["agent:main", platform, source.chat_type]

    if source.chat_id:
@@ -0,0 +1,135 @@
+"""Shared helpers for canonicalising WhatsApp sender identity.
+
+WhatsApp's bridge can surface the same human under two different JID shapes
+within a single conversation:
+
+- LID form: ``999999999999999@lid``
+- Phone form: ``15551234567@s.whatsapp.net``
+
+Both the authorisation path (:mod:`gateway.run`) and the session-key path
+(:mod:`gateway.session`) need to collapse these aliases to a single stable
+identity. This module is the single source of truth for that resolution so
+the two paths can never drift apart.
+
+Public helpers:
+
+- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
+  down to the bare numeric identifier.
+- :func:`canonical_whatsapp_identifier` — walk the bridge's
+  ``lid-mapping-*.json`` files and return a stable canonical identity
+  across phone/LID variants.
+- :func:`expand_whatsapp_aliases` — return the full alias set for an
+  identifier. Used by authorisation code that needs to match any known
+  form of a sender against an allow-list.
+
+Plugins that need per-sender behaviour on WhatsApp (role-based routing,
+per-contact authorisation, policy gating in a gateway hook) should use
+``canonical_whatsapp_identifier`` so their bookkeeping lines up with
+Hermes' own session keys.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Set
+
+from hermes_constants import get_hermes_home
+
+
+def normalize_whatsapp_identifier(value: str) -> str:
+    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
+
+    Accepts any of the identifier shapes the WhatsApp bridge may emit:
+    ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
+    ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
+    Returns just the numeric identifier (``"60123456789"``) suitable for
+    equality comparisons.
+
+    Useful for plugins that want to match sender IDs against
+    user-supplied config (phone numbers in ``config.yaml``) without
+    worrying about which variant the bridge happens to deliver.
+    """
+    return (
+        str(value or "")
+        .strip()
+        .replace("+", "", 1)
+        .split(":", 1)[0]
+        .split("@", 1)[0]
+    )
+
+
+def expand_whatsapp_aliases(identifier: str) -> Set[str]:
+    """Resolve WhatsApp phone/LID aliases via bridge session mapping files.
+
+    Returns the set of all identifiers transitively reachable through the
+    bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
+    starting from ``identifier``. The result always includes the
+    normalized input itself, so callers can safely ``in`` check against
+    the return value without a separate fallback branch.
+
+    Returns an empty set if ``identifier`` normalizes to empty.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return set()
+
+    session_dir = get_hermes_home() / "whatsapp" / "session"
+    resolved: Set[str] = set()
+    queue = [normalized]
+
+    while queue:
+        current = queue.pop(0)
+        if not current or current in resolved:
+            continue
+
+        resolved.add(current)
+        for suffix in ("", "_reverse"):
+            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
+            if not mapping_path.exists():
+                continue
+            try:
+                mapped = normalize_whatsapp_identifier(
+                    json.loads(mapping_path.read_text(encoding="utf-8"))
+                )
+            except Exception:
+                continue
+            if mapped and mapped not in resolved:
+                queue.append(mapped)
+
+    return resolved
+
+
+def canonical_whatsapp_identifier(identifier: str) -> str:
+    """Return a stable WhatsApp sender identity across phone-JID/LID variants.
+
+    WhatsApp may surface the same person under either a phone-format JID
+    (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
+    applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
+    member inside a group chat — both represent a user identity, and the
+    bridge may flip between the two for the same human.
+
+    This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
+    files, walks the mapping transitively, and picks the shortest
+    (numeric-preferred) alias as the canonical identity.
+    :func:`gateway.session.build_session_key` uses this for both WhatsApp
+    DM chat_ids and WhatsApp group participant_ids, so callers get the
+    same session-key identity Hermes itself uses.
+
+    Plugins that need per-sender behaviour (role-based routing,
+    authorisation, per-contact policy) should use this so their
+    bookkeeping lines up with Hermes' session bookkeeping even when
+    the bridge reshuffles aliases.
+
+    Returns an empty string if ``identifier`` normalizes to empty. If no
+    mapping files exist yet (fresh bridge install), returns the
+    normalized input unchanged.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return ""
+
+    # expand_whatsapp_aliases always includes `normalized` itself in the
+    # returned set, so the min() below degrades gracefully to `normalized`
+    # when no lid-mapping files are present.
+    aliases = expand_whatsapp_aliases(normalized)
+    return min(aliases, key=lambda candidate: (len(candidate), candidate))
@@ -743,7 +743,18 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:

    try:
        raw = json.loads(auth_file.read_text())
-    except Exception:
+    except Exception as exc:
+        corrupt_path = auth_file.with_suffix(".json.corrupt")
+        try:
+            import shutil
+            shutil.copy2(auth_file, corrupt_path)
+        except Exception:
+            pass
+        logger.warning(
+            "auth: failed to parse %s (%s) — starting with empty store. "
+            "Corrupt file preserved at %s",
+            auth_file, exc, corrupt_path,
+        )
        return {"version": AUTH_STORE_VERSION, "providers": {}}

    if isinstance(raw, dict) and (
@@ -103,7 +103,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    # Configuration
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
-    CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
+    CommandDef("model", "Switch model for this session", "Configuration",
+               aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
               cli_only=True),

@@ -126,6 +127,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True, args_hint="[name]"),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
+    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
+               cli_only=True, args_hint="[queue|interrupt|status]",
+               subcommands=("queue", "interrupt", "status")),

    # Tools & Skills
    CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
@@ -612,14 +612,6 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
        },
-        "flush_memories": {
-            "provider": "auto",
-            "model": "",
-            "base_url": "",
-            "api_key": "",
-            "timeout": 30,
-            "extra_body": {},
-        },
        "title_generation": {
            "provider": "auto",
            "model": "",
@@ -783,6 +775,15 @@ DEFAULT_CONFIG = {
        # warning log if out of range.
        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
+        # When a subagent hits a dangerous-command approval prompt, the parent's
+        # prompt_toolkit TUI owns stdin — a thread-local input() call from the
+        # subagent worker would deadlock the parent UI. To avoid the deadlock,
+        # subagent threads ALWAYS resolve approvals non-interactively:
+        #   false (default) → auto-deny with a logger.warning audit line (safe)
+        #   true             → auto-approve "once" with a logger.warning audit line
+        # Flip to true only if you trust delegated work to run dangerous cmds
+        # without human review (cron pipelines, batch automation, etc.).
+        "subagent_auto_approve": False,
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -839,7 +840,7 @@ DEFAULT_CONFIG = {
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
-        # discord_server tool: restrict which actions the agent may call.
+        # discord / discord_admin tools: restrict which actions the agent may call.
        # Default (empty) = all actions allowed (subject to bot privileged intents).
        # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
        # or YAML list. Unknown names are dropped with a warning at load time.
@@ -51,6 +51,7 @@ import sys
 from pathlib import Path
 from typing import Optional

+
 def _add_accept_hooks_flag(parser) -> None:
    """Attach the ``--accept-hooks`` flag.  Shared across every agent
    subparser so the flag works regardless of CLI position."""
@@ -174,6 +175,7 @@ load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")
 try:
    if "HERMES_REDACT_SECRETS" not in os.environ:
        import yaml as _yaml_early
+
        _cfg_path = get_hermes_home() / "config.yaml"
        if _cfg_path.exists():
            with open(_cfg_path, encoding="utf-8") as _f:
@@ -839,6 +841,8 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:


 def _tui_build_needed(tui_dir: Path) -> bool:
+    if _hermes_ink_bundle_stale(tui_dir):
+        return True
    entry = tui_dir / "dist" / "entry.js"
    if not entry.exists():
        return True
@@ -1026,7 +1030,12 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
    return [node, str(root / "dist" / "entry.js")], root


-def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
+def _launch_tui(
+    resume_session_id: Optional[str] = None,
+    tui_dev: bool = False,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+):
    """Replace current process with the TUI."""
    tui_dir = PROJECT_ROOT / "ui-tui"

@@ -1036,6 +1045,12 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
    )
    env.setdefault("HERMES_PYTHON", sys.executable)
    env.setdefault("HERMES_CWD", os.getcwd())
+    if model:
+        env["HERMES_MODEL"] = model
+        env["HERMES_INFERENCE_MODEL"] = model
+    if provider:
+        env["HERMES_TUI_PROVIDER"] = provider
+        env["HERMES_INFERENCE_PROVIDER"] = provider
    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
    # large transcripts / reasoning blobs. Token-level merge: respect any
@@ -1174,6 +1189,8 @@ def cmd_chat(args):
        _launch_tui(
            getattr(args, "resume", None),
            tui_dev=getattr(args, "tui_dev", False),
+            model=getattr(args, "model", None),
+            provider=getattr(args, "provider", None),
        )

    # Import and run the CLI
@@ -1325,7 +1342,9 @@ def cmd_whatsapp(args):
        return

    if not (bridge_dir / "node_modules").exists():
-        print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
+        print(
+            "\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)..."
+        )
        npm = shutil.which("npm")
        if not npm:
            print("  ✗ npm not found on PATH — install Node.js first")
@@ -1701,15 +1720,14 @@ def _clear_stale_openai_base_url():

 # (task_key, display_name, short_description)
 _AUX_TASKS: list[tuple[str, str, str]] = [
-    ("vision",           "Vision",           "image/screenshot analysis"),
-    ("compression",      "Compression",      "context summarization"),
-    ("web_extract",      "Web extract",      "web page summarization"),
-    ("session_search",   "Session search",   "past-conversation recall"),
-    ("approval",         "Approval",         "smart command approval"),
-    ("mcp",              "MCP",              "MCP tool reasoning"),
-    ("flush_memories",   "Flush memories",   "memory consolidation"),
+    ("vision", "Vision", "image/screenshot analysis"),
+    ("compression", "Compression", "context summarization"),
+    ("web_extract", "Web extract", "web page summarization"),
+    ("session_search", "Session search", "past-conversation recall"),
+    ("approval", "Approval", "smart command approval"),
+    ("mcp", "MCP", "MCP tool reasoning"),
    ("title_generation", "Title generation", "session titles"),
-    ("skills_hub",       "Skills hub",       "skills search/install"),
+    ("skills_hub", "Skills hub", "skills search/install"),
 ]


@@ -1808,7 +1826,7 @@ def _aux_config_menu() -> None:
        print("  Auxiliary models — side-task routing")
        print()
        print("  Side tasks (vision, compression, web extraction, etc.) default")
-        print("  to your main chat model.  \"auto\" means \"use my main model\" —")
+        print('  to your main chat model.  "auto" means "use my main model" —')
        print("  Hermes only falls back to a lightweight backend (OpenRouter,")
        print("  Nous Portal) if the main model is unavailable.  Override a")
        print("  task below if you want it pinned to a specific provider/model.")
@@ -1819,15 +1837,20 @@ def _aux_config_menu() -> None:
        desc_col = max(len(desc) for _, _, desc in _AUX_TASKS) + 4
        entries: list[tuple[str, str]] = []
        for task_key, name, desc in _AUX_TASKS:
-            task_cfg = aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
+            task_cfg = (
+                aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
+            )
            current = _format_aux_current(task_cfg)
-            label = f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
+            label = (
+                f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
+            )
            entries.append((task_key, label))
        entries.append(("__reset__", "Reset all to auto"))
-        entries.append(("__back__",  "Back"))
+        entries.append(("__back__", "Back"))

        idx = _prompt_provider_choice(
-            [label for _, label in entries], default=0,
+            [label for _, label in entries],
+            default=0,
        )
        if idx is None:
            return
@@ -1875,7 +1898,9 @@ def _aux_select_for_task(task: str) -> None:

    entries: list[tuple[str, str, list[str]]] = []  # (slug, label, models)
    # "auto" always first
-    auto_marker = "  ← current" if current_provider == "auto" and not current_base_url else ""
+    auto_marker = (
+        "  ← current" if current_provider == "auto" and not current_base_url else ""
+    )
    entries.append(("__auto__", f"auto (recommended){auto_marker}", []))

    for p in providers:
@@ -1884,7 +1909,9 @@ def _aux_select_for_task(task: str) -> None:
        total = p.get("total_models", 0)
        models = p.get("models") or []
        model_hint = f" — {total} models" if total else ""
-        marker = "  ← current" if slug == current_provider and not current_base_url else ""
+        marker = (
+            "  ← current" if slug == current_provider and not current_base_url else ""
+        )
        entries.append((slug, f"{name}{model_hint}{marker}", list(models)))

    # Custom endpoint (raw base_url)
@@ -1952,14 +1979,17 @@ def _aux_flow_provider_model(
        selected = val or ""
    else:
        selected = _prompt_model_selection(
-            model_list, current_model=current_model, pricing=pricing,
+            model_list,
+            current_model=current_model,
+            pricing=pricing,
        )
        if selected is None:
            print("No change.")
            return

-    _save_aux_choice(task, provider=provider_slug, model=selected or "",
-                     base_url="", api_key="")
+    _save_aux_choice(
+        task, provider=provider_slug, model=selected or "", base_url="", api_key=""
+    )
    if selected:
        print(f"{display_name}: {provider_slug} · {selected}")
    else:
@@ -1979,7 +2009,9 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
    print("  Provide an OpenAI-compatible base URL (e.g. http://localhost:11434/v1)")
    print()
    try:
-        url_prompt = f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
+        url_prompt = (
+            f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
+        )
        url = input(url_prompt).strip()
    except (KeyboardInterrupt, EOFError):
        print()
@@ -1989,20 +2021,30 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
        print("No URL provided. No change.")
        return
    try:
-        model_prompt = f"Model slug (optional) [{current_model}]: " if current_model else "Model slug (optional): "
+        model_prompt = (
+            f"Model slug (optional) [{current_model}]: "
+            if current_model
+            else "Model slug (optional): "
+        )
        model = input(model_prompt).strip()
    except (KeyboardInterrupt, EOFError):
        print()
        return
    model = model or current_model
    try:
-        api_key = getpass.getpass("API key (optional, blank = use OPENAI_API_KEY): ").strip()
+        api_key = getpass.getpass(
+            "API key (optional, blank = use OPENAI_API_KEY): "
+        ).strip()
    except (KeyboardInterrupt, EOFError):
        print()
        return

    _save_aux_choice(
-        task, provider="custom", model=model, base_url=url, api_key=api_key,
+        task,
+        provider="custom",
+        model=model,
+        base_url=url,
+        api_key=api_key,
    )
    short_url = url.replace("https://", "").replace("http://", "").rstrip("/")
    print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))
@@ -2118,7 +2160,9 @@ def _model_flow_ai_gateway(config, current_model=""):
    api_key = get_env_value("AI_GATEWAY_API_KEY")
    if not api_key:
        print("No Vercel AI Gateway API key configured.")
-        print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
+        print(
+            "Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway"
+        )
        print("Add a payment method to get $5 in free credits.")
        print()
        try:
@@ -2918,7 +2962,9 @@ def _model_flow_named_custom(config, provider_info):

    print("Fetching available models...")
    models = fetch_api_models(
-        api_key, base_url, timeout=8.0,
+        api_key,
+        base_url,
+        timeout=8.0,
        api_mode=api_mode or None,
    )

@@ -3589,7 +3635,12 @@ def _model_flow_stepfun(config, current_model=""):
        _save_model_choice,
        deactivate_provider,
    )
-    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.config import (
+        get_env_value,
+        save_env_value,
+        load_config,
+        save_config,
+    )
    from hermes_cli.models import fetch_api_models

    provider_id = "stepfun"
@@ -3608,6 +3659,7 @@ def _model_flow_stepfun(config, current_model=""):
        if key_env:
            try:
                import getpass
+
                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
            except (KeyboardInterrupt, EOFError):
                print()
@@ -3633,7 +3685,10 @@ def _model_flow_stepfun(config, current_model=""):
    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)

    region_choices = [
-        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
+        (
+            "international",
+            f"International ({_stepfun_base_url_for_region('international')})",
+        ),
        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
    ]
    ordered_regions = []
@@ -4476,6 +4531,7 @@ def cmd_webhook(args):
 def cmd_hooks(args):
    """Shell-hook inspection and management."""
    from hermes_cli.hooks import hooks_command
+
    hooks_command(args)


@@ -6046,6 +6102,86 @@ def _cmd_update_impl(args, gateway_mode: bool):
            )
            import signal as _signal

+            def _wait_for_service_active(
+                scope_cmd_: list,
+                svc_name_: str,
+                timeout: float = 10.0,
+            ) -> bool:
+                """Poll ``systemctl is-active`` until the unit reports active.
+
+                systemd's Stopped -> Started transition after a graceful exit
+                (or a hard restart) is not instantaneous; a one-shot check
+                races that window and falsely reports the unit as down.
+                Poll every 0.5s up to ``timeout`` seconds before giving up.
+                """
+                deadline = _time.monotonic() + max(timeout, 0.5)
+                while True:
+                    try:
+                        _verify = subprocess.run(
+                            scope_cmd_ + ["is-active", svc_name_],
+                            capture_output=True,
+                            text=True,
+                            timeout=5,
+                        )
+                        if _verify.stdout.strip() == "active":
+                            return True
+                    except (FileNotFoundError, subprocess.TimeoutExpired):
+                        pass
+                    if _time.monotonic() >= deadline:
+                        return False
+                    _time.sleep(0.5)
+
+            def _service_restart_sec(
+                scope_cmd_: list,
+                svc_name_: str,
+                default: float = 0.0,
+            ) -> float:
+                """Read the unit's ``RestartUSec`` (RestartSec) in seconds.
+
+                After a graceful exit-75, systemd waits ``RestartSec`` before
+                respawning the unit.  Callers that poll for ``is-active``
+                must use a timeout >= ``RestartSec`` + transition slack, or
+                they'll give up *during* the cooldown window and wrongly
+                conclude the unit didn't relaunch.
+                """
+                try:
+                    _show = subprocess.run(
+                        scope_cmd_
+                        + [
+                            "show",
+                            svc_name_,
+                            "--property=RestartUSec",
+                            "--value",
+                        ],
+                        capture_output=True,
+                        text=True,
+                        timeout=5,
+                    )
+                except (FileNotFoundError, subprocess.TimeoutExpired):
+                    return default
+                raw = (_show.stdout or "").strip()
+                # systemd emits values like "30s", "100ms", "1min 30s", or
+                # "infinity".  Parse conservatively; on any miss return default.
+                if not raw or raw == "infinity":
+                    return default
+                total = 0.0
+                matched = False
+                for part in raw.split():
+                    for _suf, _mult in (
+                        ("ms", 0.001),
+                        ("us", 0.000001),
+                        ("min", 60.0),
+                        ("s", 1.0),
+                    ):
+                        if part.endswith(_suf):
+                            try:
+                                total += float(part[: -len(_suf)]) * _mult
+                                matched = True
+                            except ValueError:
+                                pass
+                            break
+                return total if matched else default
+
            # Drain budget for graceful SIGUSR1 restarts.  The gateway drains
            # for up to ``agent.restart_drain_timeout`` (default 60s) before
            # exiting with code 75; we wait slightly longer so the drain
@@ -6061,12 +6197,17 @@ def _cmd_update_impl(args, gateway_mode: bool):
            _cfg_drain = None
            try:
                from hermes_cli.config import load_config
-                _cfg_agent = (load_config().get("agent") or {})
+
+                _cfg_agent = load_config().get("agent") or {}
                _cfg_drain = _cfg_agent.get("restart_drain_timeout")
            except Exception:
                pass
            try:
-                _drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
+                _drain_budget = (
+                    float(_cfg_drain)
+                    if _cfg_drain is not None
+                    else float(_DEFAULT_DRAIN)
+                )
            except (TypeError, ValueError):
                _drain_budget = float(_DEFAULT_DRAIN)
            # Add a 15s margin so the drain loop + final exit finish before
@@ -6131,14 +6272,23 @@ def _cmd_update_impl(args, gateway_mode: bool):
                            _main_pid = 0
                            try:
                                _show = subprocess.run(
-                                    scope_cmd + [
-                                        "show", svc_name,
-                                        "--property=MainPID", "--value",
+                                    scope_cmd
+                                    + [
+                                        "show",
+                                        svc_name,
+                                        "--property=MainPID",
+                                        "--value",
                                    ],
-                                    capture_output=True, text=True, timeout=5,
+                                    capture_output=True,
+                                    text=True,
+                                    timeout=5,
                                )
                                _main_pid = int((_show.stdout or "").strip() or 0)
-                            except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
+                            except (
+                                ValueError,
+                                subprocess.TimeoutExpired,
+                                FileNotFoundError,
+                            ):
                                _main_pid = 0

                            _graceful_ok = False
@@ -6147,19 +6297,33 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    f"  → {svc_name}: draining (up to {int(_drain_budget)}s)..."
                                )
                                _graceful_ok = _graceful_restart_via_sigusr1(
-                                    _main_pid, drain_timeout=_drain_budget,
+                                    _main_pid,
+                                    drain_timeout=_drain_budget,
                                )

                            if _graceful_ok:
                                # Gateway exited 75; systemd should relaunch
-                                # via Restart=on-failure.  Verify the new
-                                # process came up.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
-                                    capture_output=True, text=True, timeout=5,
+                                # via Restart=on-failure.  The unit's
+                                # RestartSec (default 30s on ours) gates the
+                                # respawn — poll past that + slack so we
+                                # don't give up mid-cooldown and falsely
+                                # print "drained but didn't relaunch".  For
+                                # units without RestartSec set we fall back
+                                # to the original 10s budget.
+                                _restart_sec = _service_restart_sec(
+                                    scope_cmd,
+                                    svc_name,
+                                    default=0.0,
                                )
-                                if verify.stdout.strip() == "active":
+                                _post_drain_timeout = max(
+                                    10.0,
+                                    _restart_sec + 10.0,
+                                )
+                                if _wait_for_service_active(
+                                    scope_cmd,
+                                    svc_name,
+                                    timeout=_post_drain_timeout,
+                                ):
                                    restarted_services.append(svc_name)
                                    continue
                                # Process exited but wasn't respawned (older
@@ -6185,14 +6349,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                # Verify the service actually survived the
                                # restart.  systemctl restart returns 0 even
                                # if the new process crashes immediately.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
-                                    capture_output=True,
-                                    text=True,
-                                    timeout=5,
-                                )
-                                if verify.stdout.strip() == "active":
+                                if _wait_for_service_active(
+                                    scope_cmd,
+                                    svc_name,
+                                    timeout=10.0,
+                                ):
                                    restarted_services.append(svc_name)
                                else:
                                    # Retry once — transient startup failures
@@ -6207,14 +6368,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                        text=True,
                                        timeout=15,
                                    )
-                                    _time.sleep(3)
-                                    verify2 = subprocess.run(
-                                        scope_cmd + ["is-active", svc_name],
-                                        capture_output=True,
-                                        text=True,
-                                        timeout=5,
-                                    )
-                                    if verify2.stdout.strip() == "active":
+                                    if _wait_for_service_active(
+                                        scope_cmd,
+                                        svc_name,
+                                        timeout=10.0,
+                                    ):
                                        restarted_services.append(svc_name)
                                        print(f"  ✓ {svc_name} recovered on retry")
                                    else:
@@ -6715,9 +6873,15 @@ def cmd_dashboard(args):
    try:
        import fastapi  # noqa: F401
        import uvicorn  # noqa: F401
-    except ImportError:
-        print("Web UI dependencies not installed.")
-        print(f"Install them with:  {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
+    except ImportError as e:
+        print("Web UI dependencies not installed (need fastapi + uvicorn).")
+        print(
+            f"Re-install the package into this interpreter so metadata updates apply:\n"
+            f"  cd {PROJECT_ROOT}\n"
+            f"  {sys.executable} -m pip install -e .\n"
+            "If `pip` is missing in this venv, use:  uv pip install -e ."
+        )
+        print(f"Import error: {e}")
        sys.exit(1)

    if "HERMES_WEB_DIST" not in os.environ:
@@ -6726,11 +6890,17 @@ def cmd_dashboard(args):

    from hermes_cli.web_server import start_server

+    gui_mode = getattr(args, "gui", False)
+    embedded_chat = (
+        gui_mode or args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
+    )
    start_server(
        host=args.host,
        port=args.port,
        open_browser=not args.no_open,
        allow_public=getattr(args, "insecure", False),
+        embedded_chat=embedded_chat,
+        gui_mode=gui_mode,
    )


@@ -6813,6 +6983,40 @@ For more help on a command:
    parser.add_argument(
        "--version", "-V", action="store_true", help="Show version and exit"
    )
+    parser.add_argument(
+        "-z",
+        "--oneshot",
+        metavar="PROMPT",
+        default=None,
+        help=(
+            "One-shot mode: send a single prompt and print ONLY the final "
+            "response text to stdout. No banner, no spinner, no tool "
+            "previews, no session_id line. Tools, memory, rules, and "
+            "AGENTS.md in the CWD are loaded as normal; approvals are "
+            "auto-bypassed. Intended for scripts / pipes."
+        ),
+    )
+    # --model / --provider are accepted at the top level so they can pair
+    # with -z without needing the `chat` subcommand.  If neither -z nor a
+    # subcommand consumes them, they fall through harmlessly as None.
+    # Mirrors `hermes chat --model ... --provider ...` semantics.
+    parser.add_argument(
+        "-m",
+        "--model",
+        default=None,
+        help=(
+            "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
+        ),
+    )
+    parser.add_argument(
+        "--provider",
+        default=None,
+        help=(
+            "Provider override for this invocation (e.g. openrouter, anthropic). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
+        ),
+    )
    parser.add_argument(
        "--resume",
        "-r",
@@ -7390,17 +7594,39 @@ For more help on a command:
        "reset", help="Clear exhaustion status for all credentials for a provider"
    )
    auth_reset.add_argument("provider", help="Provider id")
-    auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider")
+    auth_status = auth_subparsers.add_parser(
+        "status", help="Show auth status for a provider"
+    )
    auth_status.add_argument("provider", help="Provider id")
-    auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state")
+    auth_logout = auth_subparsers.add_parser(
+        "logout", help="Log out a provider and clear stored auth state"
+    )
    auth_logout.add_argument("provider", help="Provider id")
-    auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE")
-    auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login")
-    auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)")
-    auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app")
+    auth_spotify = auth_subparsers.add_parser(
+        "spotify", help="Authenticate Hermes with Spotify via PKCE"
+    )
+    auth_spotify.add_argument(
+        "spotify_action",
+        nargs="?",
+        choices=["login", "status", "logout"],
+        default="login",
+    )
+    auth_spotify.add_argument(
+        "--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)"
+    )
+    auth_spotify.add_argument(
+        "--redirect-uri",
+        help="Allow-listed localhost redirect URI for your Spotify app",
+    )
    auth_spotify.add_argument("--scope", help="Override requested Spotify scopes")
-    auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically")
-    auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds")
+    auth_spotify.add_argument(
+        "--no-browser",
+        action="store_true",
+        help="Do not attempt to open the browser automatically",
+    )
+    auth_spotify.add_argument(
+        "--timeout", type=float, help="Callback/token exchange timeout in seconds"
+    )
    auth_parser.set_defaults(func=cmd_auth)

    # =========================================================================
@@ -7610,7 +7836,8 @@ For more help on a command:
    hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")

    hooks_subparsers.add_parser(
-        "list", aliases=["ls"],
+        "list",
+        aliases=["ls"],
        help="List configured hooks with matcher, timeout, and consent status",
    )

@@ -7623,14 +7850,18 @@ For more help on a command:
        help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
    )
    _hk_test.add_argument(
-        "--for-tool", dest="for_tool", default=None,
+        "--for-tool",
+        dest="for_tool",
+        default=None,
        help=(
            "Only fire hooks whose matcher matches this tool name "
            "(used for pre_tool_call / post_tool_call)"
        ),
    )
    _hk_test.add_argument(
-        "--payload-file", dest="payload_file", default=None,
+        "--payload-file",
+        dest="payload_file",
+        default=None,
        help=(
            "Path to a JSON file whose contents are merged into the "
            "synthetic payload before execution"
@@ -7638,7 +7869,8 @@ For more help on a command:
    )

    _hk_revoke = hooks_subparsers.add_parser(
-        "revoke", aliases=["remove", "rm"],
+        "revoke",
+        aliases=["remove", "rm"],
        help="Remove a command's allowlist entries (takes effect on next restart)",
    )
    _hk_revoke.add_argument(
@@ -8916,6 +9148,19 @@ Examples:
        action="store_true",
        help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
    )
+    dashboard_parser.add_argument(
+        "--tui",
+        action="store_true",
+        help=(
+            "Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). "
+            "Alternatively set HERMES_DASHBOARD_TUI=1."
+        ),
+    )
+    dashboard_parser.add_argument(
+        "--gui",
+        action="store_true",
+        help="Run dashboard in GUI-shell mode; implies --tui",
+    )
    dashboard_parser.set_defaults(func=cmd_dashboard)

    # =========================================================================
@@ -9058,26 +9303,28 @@ Examples:
    # the nested subcommand (dest varies by parser).
    _AGENT_COMMANDS = {None, "chat", "acp", "rl"}
    _AGENT_SUBCOMMANDS = {
-        "cron":    ("cron_command",    {"run", "tick"}),
+        "cron": ("cron_command", {"run", "tick"}),
        "gateway": ("gateway_command", {"run"}),
-        "mcp":     ("mcp_action",      {"serve"}),
+        "mcp": ("mcp_action", {"serve"}),
    }
    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
-    if (
-        args.command in _AGENT_COMMANDS
-        or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
+    if args.command in _AGENT_COMMANDS or (
+        _sub_attr and getattr(args, _sub_attr, None) in _sub_set
    ):
        _accept_hooks = bool(getattr(args, "accept_hooks", False))
        try:
            from hermes_cli.plugins import discover_plugins
+
            discover_plugins()
        except Exception:
            logger.debug(
-                "plugin discovery failed at CLI startup", exc_info=True,
+                "plugin discovery failed at CLI startup",
+                exc_info=True,
            )
        try:
            from hermes_cli.config import load_config
            from agent.shell_hooks import register_from_config
+
            register_from_config(load_config(), accept_hooks=_accept_hooks)
        except Exception:
            logger.debug(
@@ -9085,6 +9332,19 @@ Examples:
                exc_info=True,
            )

+    # Handle top-level --oneshot / -z: single-shot mode, stdout = final
+    # response only, nothing else. Bypasses cli.py entirely.
+    if getattr(args, "oneshot", None):
+        from hermes_cli.oneshot import run_oneshot
+
+        sys.exit(
+            run_oneshot(
+                args.oneshot,
+                model=getattr(args, "model", None),
+                provider=getattr(args, "provider", None),
+            )
+        )
+
    # Handle top-level --resume / --continue as shortcut to chat
    if (args.resume or args.continue_last) and args.command is None:
        args.command = "chat"
@@ -527,6 +527,42 @@ def _resolve_alias_fallback(
    return None


+def resolve_display_context_length(
+    model: str,
+    provider: str,
+    base_url: str = "",
+    api_key: str = "",
+    model_info: Optional[ModelInfo] = None,
+) -> Optional[int]:
+    """Resolve the context length to show in /model output.
+
+    models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai)
+    but provider-enforced limits can be lower (e.g. Codex OAuth caps the
+    same slug at 272k). The authoritative source is
+    ``agent.model_metadata.get_model_context_length`` which already knows
+    about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
+    rest.
+
+    Prefer the provider-aware value; fall back to ``model_info.context_window``
+    only if the resolver returns nothing.
+    """
+    try:
+        from agent.model_metadata import get_model_context_length
+        ctx = get_model_context_length(
+            model,
+            base_url=base_url or "",
+            api_key=api_key or "",
+            provider=provider or None,
+        )
+        if ctx:
+            return int(ctx)
+    except Exception:
+        pass
+    if model_info is not None and model_info.context_window:
+        return int(model_info.context_window)
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Core model-switching pipeline
 # ---------------------------------------------------------------------------
@@ -42,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
    ("openrouter/elephant-alpha",       "free"),
-    ("openai/gpt-5.4",                  ""),
+    ("openai/gpt-5.5",                  ""),
    ("openai/gpt-5.4-mini",             ""),
    ("xiaomi/mimo-v2.5-pro",             ""),
    ("xiaomi/mimo-v2.5",                 ""),
@@ -65,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
    ("arcee-ai/trinity-large-preview:free", "free"),
    ("arcee-ai/trinity-large-thinking",  ""),
-    ("openai/gpt-5.4-pro",              ""),
+    ("openai/gpt-5.5-pro",              ""),
    ("openai/gpt-5.4-nano",             ""),
 ]

@@ -120,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
-        "openai/gpt-5.4",
+        "openai/gpt-5.5",
        "openai/gpt-5.4-mini",
        "openai/gpt-5.3-codex",
        "google/gemini-3-pro-preview",
@@ -139,7 +139,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
        "arcee-ai/trinity-large-thinking",
-        "openai/gpt-5.4-pro",
+        "openai/gpt-5.5-pro",
        "openai/gpt-5.4-nano",
    ],
    # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
@@ -1379,27 +1379,93 @@ def curated_models_for_provider(
    return [(m, "") for m in models]


-def detect_provider_for_model(
+def _provider_keys(provider: str) -> set[str]:
+    key = (provider or "").strip().lower()
+    normalized = normalize_provider(provider)
+    return {k for k in (key, normalized) if k}
+
+
+def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
+    return any(
+        name_lower == model.lower()
+        for provider in providers
+        for model in _PROVIDER_MODELS.get(provider, [])
+    )
+
+
+_AGGREGATOR_PROVIDERS = frozenset(
+    {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
+)
+
+
+def _resolve_static_model_alias(
+    name_lower: str,
+    current_keys: set[str],
+) -> Optional[tuple[str, str]]:
+    """Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
+    try:
+        from hermes_cli.model_switch import MODEL_ALIASES
+    except Exception:
+        return None
+
+    identity = MODEL_ALIASES.get(name_lower)
+    if identity is None:
+        return None
+
+    vendor = identity.vendor
+    family = identity.family
+
+    def _match(provider: str) -> Optional[str]:
+        models = _PROVIDER_MODELS.get(provider, [])
+        if not models:
+            return None
+        prefix = (
+            f"{vendor}/{family}"
+            if provider in _AGGREGATOR_PROVIDERS
+            else family
+        ).lower()
+        for model in models:
+            if model.lower().startswith(prefix):
+                return model
+        return None
+
+    for provider in current_keys:
+        if matched := _match(provider):
+            return provider, matched
+
+    for provider in _PROVIDER_MODELS:
+        if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
+            continue
+        if matched := _match(provider):
+            return provider, matched
+
+    for provider in _AGGREGATOR_PROVIDERS:
+        if provider in current_keys and (matched := _match(provider)):
+            return provider, matched
+
+    return None
+
+
+def detect_static_provider_for_model(
    model_name: str,
    current_provider: str,
 ) -> Optional[tuple[str, str]]:
-    """Auto-detect the best provider for a model name.
+    """Auto-detect a provider from static catalogs only.

-    Returns ``(provider_id, model_name)`` — the model name may be remapped
-    (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
+    Returns ``(provider_id, model_name)``. The model name may be remapped
+    when a static alias or bare provider name resolves to a catalog default.
    Returns ``None`` when no confident match is found.
-
-    Priority:
-    0. Bare provider name → switch to that provider's default model
-    1. Direct provider with credentials (highest)
-    2. Direct provider without credentials → remap to OpenRouter slug
-    3. OpenRouter catalog match
    """
    name = (model_name or "").strip()
    if not name:
        return None

    name_lower = name.lower()
+    current_keys = _provider_keys(current_provider)
+
+    alias_match = _resolve_static_model_alias(name_lower, current_keys)
+    if alias_match:
+        return alias_match

    # --- Step 0: bare provider name typed as model ---
    # If someone types `/model nous` or `/model anthropic`, treat it as a
@@ -1412,64 +1478,49 @@ def detect_provider_for_model(
        if (
            resolved_provider in _PROVIDER_LABELS
            and default_models
-            and resolved_provider != normalize_provider(current_provider)
+            and resolved_provider not in current_keys
        ):
            return (resolved_provider, default_models[0])

    # Aggregators list other providers' models — never auto-switch TO them
-    _AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
-
    # If the model belongs to the current provider's catalog, don't suggest switching
-    current_models = _PROVIDER_MODELS.get(current_provider, [])
-    if any(name_lower == m.lower() for m in current_models):
+    if _model_in_provider_catalog(name_lower, current_keys):
        return None

    # --- Step 1: check static provider catalogs for a direct match ---
-    direct_match: Optional[str] = None
    for pid, models in _PROVIDER_MODELS.items():
-        if pid == current_provider or pid in _AGGREGATORS:
+        if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
            continue
        if any(name_lower == m.lower() for m in models):
-            direct_match = pid
-            break
+            return (pid, name)

-    if direct_match:
-        # Check if we have credentials for this provider — env vars,
-        # credential pool, or auth store entries.
-        has_creds = False
-        try:
-            from hermes_cli.auth import PROVIDER_REGISTRY
-            pconfig = PROVIDER_REGISTRY.get(direct_match)
-            if pconfig:
-                for env_var in pconfig.api_key_env_vars:
-                    if os.getenv(env_var, "").strip():
-                        has_creds = True
-                        break
-        except Exception:
-            pass
-        # Also check credential pool and auth store — covers OAuth,
-        # Claude Code tokens, and other non-env-var credentials (#10300).
-        if not has_creds:
-            try:
-                from agent.credential_pool import load_pool
-                pool = load_pool(direct_match)
-                if pool.has_credentials():
-                    has_creds = True
-            except Exception:
-                pass
-        if not has_creds:
-            try:
-                from hermes_cli.auth import _load_auth_store
-                store = _load_auth_store()
-                if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
-                    has_creds = True
-            except Exception:
-                pass
+    return None

-        # Always return the direct provider match.  If credentials are
-        # missing, the client init will give a clear error rather than
-        # silently routing through the wrong provider (#10300).
-        return (direct_match, name)
+
+def detect_provider_for_model(
+    model_name: str,
+    current_provider: str,
+) -> Optional[tuple[str, str]]:
+    """Auto-detect the best provider for a model name.
+
+    Returns ``(provider_id, model_name)`` — the model name may be remapped
+    (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
+    Returns ``None`` when no confident match is found.
+
+    Priority:
+    0. Bare provider name → switch to that provider's default model
+    1. Direct provider static catalog match
+    2. OpenRouter catalog match
+    """
+    name = (model_name or "").strip()
+    if not name:
+        return None
+
+    static_match = detect_static_provider_for_model(name, current_provider)
+    if static_match:
+        return static_match
+    if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
+        return None

    # --- Step 2: check OpenRouter catalog ---
    # First try exact match (handles provider/model format)
@@ -0,0 +1,202 @@
+"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
+
+Bypasses cli.py entirely.  No banner, no spinner, no session_id line,
+no stderr chatter.  Just the agent's final text to stdout.
+
+Toolsets = whatever the user has configured for "cli" in `hermes tools`.
+Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
+Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
+Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
+
+Model / provider selection mirrors `hermes chat`:
+    - Both optional. If omitted, use the user's configured default.
+    - If both given, pair them exactly as given.
+    - If only --model given, auto-detect the provider that serves it.
+    - If only --provider given, error out (ambiguous — caller must pick a model).
+
+Env var fallbacks (used when the corresponding arg is not passed):
+    - HERMES_INFERENCE_MODEL
+    - HERMES_INFERENCE_PROVIDER  (already read by resolve_runtime_provider)
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from contextlib import redirect_stderr, redirect_stdout
+from typing import Optional
+
+
+def run_oneshot(
+    prompt: str,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+) -> int:
+    """Execute a single prompt and print only the final content block.
+
+    Args:
+        prompt: The user message to send.
+        model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
+            env var, then config.yaml's model.default / model.model.
+        provider: Optional provider override. Falls back to
+            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
+            then "auto".
+
+    Returns the exit code.  Caller should sys.exit() with the return.
+    """
+    # Silence every stdlib logger for the duration.  AIAgent, tools, and
+    # provider adapters all log to stderr through the root logger; file
+    # handlers added by setup_logging() keep working (they're attached to
+    # the root logger's handler list, not affected by level), but no
+    # bytes reach the terminal.
+    logging.disable(logging.CRITICAL)
+
+    # --provider without --model is ambiguous: carrying the user's configured
+    # model across to a different provider is usually wrong (that provider may
+    # not host it), and silently picking the provider's catalog default hides
+    # the mismatch.  Require the caller to be explicit.  Validate BEFORE the
+    # stderr redirect so the message actually reaches the terminal.
+    env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
+    if provider and not ((model or "").strip() or env_model_early):
+        sys.stderr.write(
+            "hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
+            "Pass both explicitly, or neither to use your configured defaults.\n"
+        )
+        return 2
+
+    # Auto-approve any shell / tool approvals.  Non-interactive by
+    # definition — a prompt would hang forever.
+    os.environ["HERMES_YOLO_MODE"] = "1"
+    os.environ["HERMES_ACCEPT_HOOKS"] = "1"
+
+    # Redirect stderr AND stdout to devnull for the entire call tree.
+    # We'll print the final response to the real stdout at the end.
+    real_stdout = sys.stdout
+    devnull = open(os.devnull, "w")
+
+    try:
+        with redirect_stdout(devnull), redirect_stderr(devnull):
+            response = _run_agent(prompt, model=model, provider=provider)
+    finally:
+        try:
+            devnull.close()
+        except Exception:
+            pass
+
+    if response:
+        real_stdout.write(response)
+        if not response.endswith("\n"):
+            real_stdout.write("\n")
+        real_stdout.flush()
+    return 0
+
+
+def _run_agent(
+    prompt: str,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+) -> str:
+    """Build an AIAgent exactly like a normal CLI chat turn would, then
+    run a single conversation.  Returns the final response string."""
+    # Imports are local so they don't run when hermes is invoked for
+    # other commands (keeps top-level CLI startup cheap).
+    from hermes_cli.config import load_config
+    from hermes_cli.models import detect_provider_for_model
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+    from hermes_cli.tools_config import _get_platform_tools
+    from run_agent import AIAgent
+
+    cfg = load_config()
+
+    # Resolve effective model: explicit arg → env var → config.
+    model_cfg = cfg.get("model") or {}
+    if isinstance(model_cfg, str):
+        cfg_model = model_cfg
+    else:
+        cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
+
+    env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
+    effective_model = (model or "").strip() or env_model or cfg_model
+
+    # Resolve effective provider: explicit arg → (auto-detect from model if
+    # model was explicit) → env / config (handled inside resolve_runtime_provider).
+    #
+    # When --model is given without --provider, auto-detect the provider that
+    # serves that model — same semantic as `/model <name>` in an interactive
+    # session.  Without this, resolve_runtime_provider() would fall back to
+    # the user's configured default provider, which may not host the model
+    # the caller just asked for.
+    effective_provider = (provider or "").strip() or None
+    if effective_provider is None and (model or env_model):
+        # Only auto-detect when the model was explicitly requested via arg or
+        # env var (not when it came from config — that's the "use my defaults"
+        # path and the configured provider is already correct).
+        explicit_model = (model or "").strip() or env_model
+        if explicit_model:
+            cfg_provider = ""
+            if isinstance(model_cfg, dict):
+                cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+            current_provider = (
+                cfg_provider
+                or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+                or "auto"
+            )
+            detected = detect_provider_for_model(explicit_model, current_provider)
+            if detected:
+                effective_provider, effective_model = detected
+
+    runtime = resolve_runtime_provider(
+        requested=effective_provider,
+        target_model=effective_model or None,
+    )
+
+    # Pull in whatever toolsets the user has enabled for "cli".
+    # sorted() gives stable ordering; set→list for AIAgent's signature.
+    toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
+
+    agent = AIAgent(
+        api_key=runtime.get("api_key"),
+        base_url=runtime.get("base_url"),
+        provider=runtime.get("provider"),
+        api_mode=runtime.get("api_mode"),
+        model=effective_model,
+        enabled_toolsets=toolsets_list,
+        quiet_mode=True,
+        platform="cli",
+        credential_pool=runtime.get("credential_pool"),
+        # Interactive callbacks are intentionally NOT wired beyond this
+        # one.  In oneshot mode there's no user sitting at a terminal:
+        #   - clarify  → returns a synthetic "pick a default" instruction
+        #                so the agent continues instead of stalling on
+        #                the tool's built-in "not available" error
+        #   - sudo password prompt → terminal_tool gates on
+        #                HERMES_INTERACTIVE which we never set
+        #   - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
+        #                (set above); also falls back to deny on non-tty
+        #   - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
+        #   - skill secret capture → returns gracefully when no callback set
+        clarify_callback=_oneshot_clarify_callback,
+    )
+
+    # Belt-and-braces: make sure AIAgent doesn't invoke any streaming
+    # display callbacks that would bypass our stdout capture.
+    agent.suppress_status_output = True
+    agent.stream_delta_callback = None
+    agent.tool_gen_callback = None
+
+    return agent.chat(prompt) or ""
+
+
+def _oneshot_clarify_callback(question: str, choices=None) -> str:
+    """Clarify is disabled in oneshot mode — tell the agent to pick a
+    default and proceed instead of stalling or erroring."""
+    if choices:
+        return (
+            f"[oneshot mode: no user available. Pick the best option from "
+            f"{choices} using your own judgment and continue.]"
+        )
+    return (
+        "[oneshot mode: no user available. Make the most reasonable "
+        "assumption you can and continue.]"
+    )
@@ -0,0 +1,229 @@
+"""PTY bridge for `hermes dashboard` chat tab.
+
+Wraps a child process behind a pseudo-terminal so its ANSI output can be
+streamed to a browser-side terminal emulator (xterm.js) and typed
+keystrokes can be fed back in.  The only caller today is the
+``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
+
+Design constraints:
+
+* **POSIX-only.**  Hermes Agent supports Windows exclusively via WSL, which
+  exposes a native POSIX PTY via ``openpty(3)``.  Native Windows Python
+  has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
+  install/platform message so the dashboard can render a banner instead of
+  crashing.
+* **Zero Node dependency on the server side.**  We use :mod:`ptyprocess`,
+  which is a pure-Python wrapper around the OS calls.  The browser talks
+  to the same ``hermes --tui`` binary it would launch from the CLI, so
+  every TUI feature (slash popover, model picker, tool rows, markdown,
+  skin engine, clarify/sudo/approval prompts) ships automatically.
+* **Byte-safe I/O.**  Reads and writes go through the PTY master fd
+  directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
+  streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
+  mid-read.
+"""
+
+from __future__ import annotations
+
+import errno
+import fcntl
+import os
+import select
+import signal
+import struct
+import sys
+import termios
+import time
+from typing import Optional, Sequence
+
+try:
+    import ptyprocess  # type: ignore
+    _PTY_AVAILABLE = not sys.platform.startswith("win")
+except ImportError:  # pragma: no cover - dev env without ptyprocess
+    ptyprocess = None  # type: ignore
+    _PTY_AVAILABLE = False
+
+
+__all__ = ["PtyBridge", "PtyUnavailableError"]
+
+
+class PtyUnavailableError(RuntimeError):
+    """Raised when a PTY cannot be created on this platform.
+
+    Today this means native Windows (no ConPTY bindings) or a dev
+    environment missing the ``ptyprocess`` dependency.  The dashboard
+    surfaces the message to the user as a chat-tab banner.
+    """
+
+
+class PtyBridge:
+    """Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
+
+    Not thread-safe.  A single bridge is owned by the WebSocket handler
+    that spawned it; the reader runs in an executor thread while writes
+    happen on the event-loop thread.  Both sides are OK because the
+    kernel PTY is the actual synchronization point — we never call
+    :mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
+    ``os.write`` on the master fd, which is safe.
+    """
+
+    def __init__(self, proc: "ptyprocess.PtyProcess"):  # type: ignore[name-defined]
+        self._proc = proc
+        self._fd: int = proc.fd
+        self._closed = False
+
+    # -- lifecycle --------------------------------------------------------
+
+    @classmethod
+    def is_available(cls) -> bool:
+        """True if a PTY can be spawned on this platform."""
+        return bool(_PTY_AVAILABLE)
+
+    @classmethod
+    def spawn(
+        cls,
+        argv: Sequence[str],
+        *,
+        cwd: Optional[str] = None,
+        env: Optional[dict] = None,
+        cols: int = 80,
+        rows: int = 24,
+    ) -> "PtyBridge":
+        """Spawn ``argv`` behind a new PTY and return a bridge.
+
+        Raises :class:`PtyUnavailableError` if the platform can't host a
+        PTY.  Raises :class:`FileNotFoundError` or :class:`OSError` for
+        ordinary exec failures (missing binary, bad cwd, etc.).
+        """
+        if not _PTY_AVAILABLE:
+            if sys.platform.startswith("win"):
+                raise PtyUnavailableError(
+                    "Pseudo-terminals are unavailable on this platform. "
+                    "Hermes Agent supports Windows only via WSL."
+                )
+            if ptyprocess is None:
+                raise PtyUnavailableError(
+                    "The `ptyprocess` package is missing. "
+                    "Install with: pip install ptyprocess "
+                    "(or pip install -e '.[pty]')."
+                )
+            raise PtyUnavailableError("Pseudo-terminals are unavailable.")
+        # Let caller-supplied env fully override inheritance; if they pass
+        # None we inherit the server's env (same semantics as subprocess).
+        spawn_env = os.environ.copy() if env is None else env
+        proc = ptyprocess.PtyProcess.spawn(  # type: ignore[union-attr]
+            list(argv),
+            cwd=cwd,
+            env=spawn_env,
+            dimensions=(rows, cols),
+        )
+        return cls(proc)
+
+    @property
+    def pid(self) -> int:
+        return int(self._proc.pid)
+
+    def is_alive(self) -> bool:
+        if self._closed:
+            return False
+        try:
+            return bool(self._proc.isalive())
+        except Exception:
+            return False
+
+    # -- I/O --------------------------------------------------------------
+
+    def read(self, timeout: float = 0.2) -> Optional[bytes]:
+        """Read up to 64 KiB of raw bytes from the PTY master.
+
+        Returns:
+            * bytes — zero or more bytes of child output
+            * empty bytes (``b""``) — no data available within ``timeout``
+            * None — child has exited and the master fd is at EOF
+
+        Never blocks longer than ``timeout`` seconds.  Safe to call after
+        :meth:`close`; returns ``None`` in that case.
+        """
+        if self._closed:
+            return None
+        try:
+            readable, _, _ = select.select([self._fd], [], [], timeout)
+        except (OSError, ValueError):
+            return None
+        if not readable:
+            return b""
+        try:
+            data = os.read(self._fd, 65536)
+        except OSError as exc:
+            # EIO on Linux = slave side closed.  EBADF = already closed.
+            if exc.errno in (errno.EIO, errno.EBADF):
+                return None
+            raise
+        if not data:
+            return None
+        return data
+
+    def write(self, data: bytes) -> None:
+        """Write raw bytes to the PTY master (i.e. the child's stdin)."""
+        if self._closed or not data:
+            return
+        # os.write can return a short write under load; loop until drained.
+        view = memoryview(data)
+        while view:
+            try:
+                n = os.write(self._fd, view)
+            except OSError as exc:
+                if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
+                    return
+                raise
+            if n <= 0:
+                return
+            view = view[n:]
+
+    def resize(self, cols: int, rows: int) -> None:
+        """Forward a terminal resize to the child via ``TIOCSWINSZ``."""
+        if self._closed:
+            return
+        # struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
+        winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
+        try:
+            fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
+        except OSError:
+            pass
+
+    # -- teardown ---------------------------------------------------------
+
+    def close(self) -> None:
+        """Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
+
+        Idempotent.  Reaping the child is important so we don't leak
+        zombies across the lifetime of the dashboard process.
+        """
+        if self._closed:
+            return
+        self._closed = True
+
+        # SIGHUP is the conventional "your terminal went away" signal.
+        # We escalate if the child ignores it.
+        for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
+            if not self._proc.isalive():
+                break
+            try:
+                self._proc.kill(sig)
+            except Exception:
+                pass
+            deadline = time.monotonic() + 0.5
+            while self._proc.isalive() and time.monotonic() < deadline:
+                time.sleep(0.02)
+
+        try:
+            self._proc.close(force=True)
+        except Exception:
+            pass
+
+    # Context-manager sugar — handy in tests and ad-hoc scripts.
+    def __enter__(self) -> "PtyBridge":
+        return self
+
+    def __exit__(self, *_exc) -> None:
+        self.close()
@@ -68,25 +68,58 @@ CONFIGURABLE_TOOLSETS = [
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
    ("spotify",          "🎵 Spotify",                  "playback, search, playlists, library"),
+    ("discord",         "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
+    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
 ]

 # Toolsets that are OFF by default for new installs.
 # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
 # but the setup checklist won't pre-select them for first-time users.
-_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
+
+# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
+# these platforms, and only resolve/save for these platforms.  A toolset
+# absent from this map is available on every platform (current behaviour).
+#
+# Use this for tools whose APIs only make sense on one platform (Discord
+# server admin, Slack workspace admin, etc.).  Keeps every other platform's
+# checklist from filling up with irrelevant toggles.
+_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
+    "discord": {"discord"},
+    "discord_admin": {"discord"},
+}
+
+
+def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
+    """Return True if ``ts_key`` is configurable on ``platform``.
+
+    Toolsets without a restriction entry are allowed everywhere (the default).
+    """
+    allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
+    return allowed is None or platform in allowed


 def _get_effective_configurable_toolsets():
    """Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.

    Plugin toolsets are appended at the end so they appear after the
-    built-in toolsets in the TUI checklist.
+    built-in toolsets in the TUI checklist. A plugin whose toolset key
+    already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
+    plugins (e.g. ``plugins/spotify``) share their toolset key with the
+    built-in entry, and we want the built-in label/description to win.
+    Without the dedupe, ``hermes tools`` → "reconfigure existing" would
+    list the same toolset twice.
    """
    result = list(CONFIGURABLE_TOOLSETS)
+    seen = {ts_key for ts_key, _, _ in result}
    try:
        from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
        discover_plugins()  # idempotent — ensures plugins are loaded
-        result.extend(get_plugin_toolsets())
+        for entry in get_plugin_toolsets():
+            if entry[0] in seen:
+                continue
+            seen.add(entry[0])
+            result.append(entry)
    except Exception:
        pass
    return result
@@ -368,13 +401,9 @@ TOOL_CATEGORIES = {
        "providers": [
            {
                "name": "Spotify Web API",
-                "tag": "PKCE OAuth — run `hermes auth spotify` after this",
-                "env_vars": [
-                    {"key": "HERMES_SPOTIFY_CLIENT_ID", "prompt": "Spotify app client_id",
-                     "url": "https://developer.spotify.com/dashboard"},
-                    {"key": "HERMES_SPOTIFY_REDIRECT_URI", "prompt": "Redirect URI (must be allow-listed in your Spotify app)",
-                     "default": "http://127.0.0.1:43827/spotify/callback"},
-                ],
+                "tag": "PKCE OAuth — opens the setup wizard",
+                "env_vars": [],
+                "post_setup": "spotify",
            },
        ],
    },
@@ -478,6 +507,35 @@ def _run_post_setup(post_setup_key: str):
            _print_warning("    kittentts install timed out (>5min)")
            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")

+    elif post_setup_key == "spotify":
+        # Run the full `hermes auth spotify` flow — if the user has no
+        # client_id yet, this drops them into the interactive wizard
+        # (opens the Spotify dashboard, prompts for client_id, persists
+        # to ~/.hermes/.env), then continues straight into PKCE. If they
+        # already have an app, it skips the wizard and just does OAuth.
+        from types import SimpleNamespace
+        try:
+            from hermes_cli.auth import login_spotify_command
+        except Exception as exc:
+            _print_warning(f"    Could not load Spotify auth: {exc}")
+            _print_info("    Run manually: hermes auth spotify")
+            return
+        _print_info("    Starting Spotify login...")
+        try:
+            login_spotify_command(SimpleNamespace(
+                client_id=None, redirect_uri=None, scope=None,
+                no_browser=False, timeout=None,
+            ))
+            _print_success("    Spotify authenticated")
+        except SystemExit as exc:
+            # User aborted the wizard, or OAuth failed — don't fail the
+            # toolset enable; they can retry with `hermes auth spotify`.
+            _print_warning(f"    Spotify login did not complete: {exc}")
+            _print_info("    Run later: hermes auth spotify")
+        except Exception as exc:
+            _print_warning(f"    Spotify login failed: {exc}")
+            _print_info("    Run manually: hermes auth spotify")
+
    elif post_setup_key == "rl_training":
        try:
            __import__("tinker_atropos")
@@ -566,7 +624,7 @@ def _get_platform_tools(
    include_default_mcp_servers: bool = True,
 ) -> Set[str]:
    """Resolve which individual toolset names are enabled for a platform."""
-    from toolsets import resolve_toolset
+    from toolsets import resolve_toolset, TOOLSETS

    platform_toolsets = config.get("platform_toolsets") or {}
    toolset_names = platform_toolsets.get(platform)
@@ -580,6 +638,8 @@ def _get_platform_tools(
    toolset_names = [str(ts) for ts in toolset_names]

    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    plugin_ts_keys = _get_plugin_toolset_keys()
+    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}

    # If the saved list contains any configurable keys directly, the user
    # has explicitly configured this platform — use direct membership.
@@ -589,7 +649,10 @@ def _get_platform_tools(
    has_explicit_config = any(ts in configurable_keys for ts in toolset_names)

    if has_explicit_config:
-        enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
+        enabled_toolsets = {
+            ts for ts in toolset_names
+            if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
+        }
    else:
        # No explicit config — fall back to resolving composite toolset names
        # (e.g. "hermes-cli") to individual tool names and reverse-mapping.
@@ -599,14 +662,52 @@ def _get_platform_tools(

        enabled_toolsets = set()
        for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
+            if not _toolset_allowed_for_platform(ts_key, platform):
+                continue
            ts_tools = set(resolve_toolset(ts_key))
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)
+
        default_off = set(_DEFAULT_OFF_TOOLSETS)
-        if platform in default_off:
+        # Legacy safety: if the platform's own name matches a default-off
+        # toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
+        # keep that toolset enabled on first install.  Skip this dodge for
+        # platform-restricted toolsets — those are always opt-in even on
+        # their own platform (e.g. `discord` + `discord` should stay OFF).
+        if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
            default_off.remove(platform)
        enabled_toolsets -= default_off

+    # Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
+    # feishu_drive).  These are part of the platform's default composite but
+    # absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
+    # checklist or in a user-saved config.  Must run in BOTH branches —
+    # otherwise saving via `hermes tools` (which flips has_explicit_config
+    # to True) silently drops them.
+    platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
+    configurable_tool_universe = set()
+    for ck in configurable_keys:
+        configurable_tool_universe.update(resolve_toolset(ck))
+    claimed = set()
+    for ts_key in enabled_toolsets:
+        claimed.update(resolve_toolset(ts_key))
+    skip = configurable_keys | plugin_ts_keys | platform_default_keys
+    skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
+    skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
+    for ts_key, ts_def in TOOLSETS.items():
+        if ts_key in skip:
+            continue
+        if ts_def.get("includes"):
+            continue
+        ts_tools = set(resolve_toolset(ts_key))
+        if not ts_tools or not ts_tools.issubset(platform_tool_universe):
+            continue
+        if ts_tools.issubset(configurable_tool_universe):
+            continue
+        if not ts_tools.issubset(claimed):
+            enabled_toolsets.add(ts_key)
+            claimed.update(ts_tools)
+
    # Plugin toolsets: enabled by default unless explicitly disabled, or
    # unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
    # shipped as a bundled plugin but user must opt in via `hermes tools`
@@ -614,7 +715,6 @@ def _get_platform_tools(
    # A plugin toolset is "known" for a platform once `hermes tools`
    # has been saved for that platform (tracked via known_plugin_toolsets).
    # Unknown plugins default to enabled; known-but-absent = disabled.
-    plugin_ts_keys = _get_plugin_toolset_keys()
    if plugin_ts_keys:
        known_map = config.get("known_plugin_toolsets", {})
        known_for_platform = set(known_map.get(platform, []))
@@ -632,7 +732,6 @@ def _get_platform_tools(

    # Preserve any explicit non-configurable toolset entries (for example,
    # custom toolsets or MCP server names saved in platform_toolsets).
-    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
    explicit_passthrough = {
        ts
        for ts in toolset_names
@@ -678,6 +777,14 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    """
    config.setdefault("platform_toolsets", {})

+    # Drop platform-scoped toolsets that don't apply here.  Prevents the
+    # "Configure all platforms" checklist (or a hand-edited config.yaml)
+    # from turning on, say, the `discord` toolset for Telegram.
+    enabled_toolset_keys = {
+        ts for ts in enabled_toolset_keys
+        if _toolset_allowed_for_platform(ts, platform)
+    }
+
    # Get the set of all configurable toolset keys (built-in + plugin)
    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
    plugin_keys = _get_plugin_toolset_keys()
@@ -692,6 +799,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
    if not isinstance(existing_toolsets, list):
        existing_toolsets = []
+    existing_toolsets = [str(ts) for ts in existing_toolsets]

    # Preserve any entries that are NOT configurable toolsets and NOT platform
    # defaults (i.e. only MCP server names should be preserved)
@@ -699,6 +807,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
        entry for entry in existing_toolsets
        if entry not in configurable_keys and entry not in platform_default_keys
    }
+    # Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
+    # saving from the picker as consent to clear the "no_mcp" sentinel. The
+    # picker has no checkbox for no_mcp, so without this users who once set it
+    # by hand could never re-enable MCP servers through the UI.
+    preserved_entries.discard("no_mcp")

    # Merge preserved entries with new enabled toolsets
    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
@@ -806,7 +919,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    return _tool_token_cache


-def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
+def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
    """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
    from hermes_cli.curses_ui import curses_checklist
    from toolsets import resolve_toolset
@@ -814,7 +927,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
    # Pre-compute per-tool token counts (cached after first call).
    tool_tokens = _estimate_tool_tokens()

-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    # Drop platform-scoped toolsets that don't apply to this platform.
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]

    labels = []
    for ts_key, ts_label, ts_desc in effective:
@@ -1728,7 +1846,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS

            # Show checklist
-            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
+            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)

            added = new_enabled - current_enabled
            removed = current_enabled - new_enabled
@@ -2084,7 +2202,11 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]

 def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
    """Print a summary of enabled/disabled toolsets and MCP tool filters."""
-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]
    builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}

    print(f"Built-in toolsets ({platform}):")
@@ -2150,6 +2272,20 @@ def tools_disable_enable_command(args):
            _print_error(f"Unknown toolset '{name}'")
        toolset_targets = [t for t in toolset_targets if t in valid_toolsets]

+    # Reject platform-scoped toolsets on platforms that don't allow them.
+    restricted_targets = [
+        t for t in toolset_targets
+        if not _toolset_allowed_for_platform(t, platform)
+    ]
+    if restricted_targets:
+        for name in restricted_targets:
+            allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
+            _print_error(
+                f"Toolset '{name}' is not available on platform '{platform}' "
+                f"(only: {', '.join(allowed)})"
+            )
+        toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
+
    if toolset_targets:
        _apply_toolset_change(config, platform, toolset_targets, action)

@@ -288,30 +288,34 @@ def get_tool_definitions(
                filtered_tools[i] = {"type": "function", "function": dynamic_schema}
                break

-    # Rebuild discord_server schema based on the bot's privileged intents
-    # (detected from GET /applications/@me) and the user's action allowlist
-    # in config.  Hides actions the bot's intents don't support so the
-    # model never attempts them, and annotates fetch_messages when the
+    # Rebuild discord / discord_admin schemas based on the bot's privileged
+    # intents (detected from GET /applications/@me) and the user's action
+    # allowlist in config.  Hides actions the bot's intents don't support so
+    # the model never attempts them, and annotates fetch_messages when the
    # MESSAGE_CONTENT intent is missing.
-    if "discord_server" in available_tool_names:
-        try:
-            from tools.discord_tool import get_dynamic_schema
-            dynamic = get_dynamic_schema()
-        except Exception:  # pragma: no cover — defensive, fall back to static
-            dynamic = None
-        if dynamic is None:
-            # Tool filtered out entirely (empty allowlist or detection disabled
-            # the only remaining actions).  Drop it from the schema list.
-            filtered_tools = [
-                t for t in filtered_tools
-                if t.get("function", {}).get("name") != "discord_server"
-            ]
-            available_tool_names.discard("discord_server")
-        else:
-            for i, td in enumerate(filtered_tools):
-                if td.get("function", {}).get("name") == "discord_server":
-                    filtered_tools[i] = {"type": "function", "function": dynamic}
-                    break
+    _discord_schema_fns = {
+        "discord": "get_dynamic_schema_core",
+        "discord_admin": "get_dynamic_schema_admin",
+    }
+    for discord_tool_name in _discord_schema_fns:
+        if discord_tool_name in available_tool_names:
+            try:
+                from tools import discord_tool as _dt
+                schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
+                dynamic = schema_fn()
+            except Exception:
+                dynamic = None
+            if dynamic is None:
+                filtered_tools = [
+                    t for t in filtered_tools
+                    if t.get("function", {}).get("name") != discord_tool_name
+                ]
+                available_tool_names.discard(discord_tool_name)
+            else:
+                for i, td in enumerate(filtered_tools):
+                    if td.get("function", {}).get("name") == discord_tool_name:
+                        filtered_tools[i] = {"type": "function", "function": dynamic}
+                        break

    # Strip web tool cross-references from browser_navigate description when
    # web_search / web_extract are not available.  The static schema says
@@ -464,9 +468,9 @@ def _coerce_number(value: str, integer_only: bool = False):
        f = float(value)
    except (ValueError, OverflowError):
        return value
-    # Guard against inf/nan before int() conversion
+    # Guard against inf/nan — not JSON-serializable, keep original string
    if f != f or f == float("inf") or f == float("-inf"):
-        return f
+        return value
    # If it looks like an integer (no fractional part), return int
    if f == int(f):
        return int(f)
@@ -156,7 +156,7 @@
      for entry in "''${ENTRIES[@]}"; do
        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
-        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --rebuild --print-build-logs 2>&1)
        STATUS=$?
        if [ "$STATUS" -eq 0 ]; then
          echo "    ok"
@@ -4,7 +4,7 @@ let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
+    hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
@@ -91,4 +91,29 @@

  // Register this plugin — the dashboard picks it up automatically.
  window.__HERMES_PLUGINS__.register("example", ExamplePage);
+
+  // ─────────────────────────────────────────────────────────────────────
+  // Page-scoped slot demo: inject a small banner at the top of /sessions.
+  //
+  // Built-in pages expose named slots (<page>:top, <page>:bottom) that
+  // plugins can populate without overriding the whole route. The
+  // manifest lists the slots we use in its `slots` array so the shell
+  // knows to render <PluginSlot name="sessions:top" /> there.
+  // ─────────────────────────────────────────────────────────────────────
+  function SessionsTopBanner() {
+    return React.createElement(Card, {
+      className: "border-dashed",
+    },
+      React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
+        React.createElement(Badge, { variant: "outline" }, "Example"),
+        React.createElement("span", {
+          className: "text-xs text-muted-foreground",
+        }, "This banner was injected into the Sessions page by the example plugin via the ",
+          React.createElement("code", { className: "font-courier" }, "sessions:top"),
+          " slot."),
+      ),
+    );
+  }
+
+  window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
 })();
@@ -8,6 +8,7 @@
    "path": "/example",
    "position": "after:skills"
  },
+  "slots": ["sessions:top"],
  "entry": "dist/index.js",
  "api": "plugin_api.py"
 }
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
 # ---------------------------------------------------------------------------
 # Process-level atexit safety net — ensures pending sessions are committed
 # even if shutdown_memory_provider is never called (e.g. gateway crash,
-# SIGKILL, or exception in _async_flush_memories preventing shutdown).
+# SIGKILL, or exception in the session expiry watcher preventing shutdown).
 # ---------------------------------------------------------------------------
 _last_active_provider: Optional["OpenVikingMemoryProvider"] = None

@@ -78,6 +78,16 @@ termux = [
 ]
 dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
 feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
+google = [
+  # Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
+  # Sheets, Docs).  Declared here so packagers (Nix, Homebrew) ship them with
+  # the [all] extra and users don't hit runtime `pip install` paths that fail
+  # in environments without pip (e.g. Nix-managed Python).
+  "google-api-python-client>=2.100,<3",
+  "google-auth-oauthlib>=1.0,<2",
+  "google-auth-httplib2>=0.2,<1",
+]
+# `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
 web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
@@ -109,6 +119,7 @@ all = [
  "hermes-agent[voice]",
  "hermes-agent[dingtalk]",
  "hermes-agent[feishu]",
+  "hermes-agent[google]",
  "hermes-agent[mistral]",
  "hermes-agent[bedrock]",
  "hermes-agent[web]",
@@ -502,6 +502,48 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
    return found


+def _escape_invalid_chars_in_json_strings(raw: str) -> str:
+    """Escape unescaped control chars inside JSON string values.
+
+    Walks the raw JSON character-by-character, tracking whether we are
+    inside a double-quoted string. Inside strings, replaces literal
+    control characters (0x00-0x1F) that aren't already part of an escape
+    sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
+    else.
+
+    Ported from #12093 — complements the other repair passes in
+    ``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
+    not enough (e.g. llama.cpp backends that emit literal apostrophes or
+    tabs alongside other malformations).
+    """
+    out: list[str] = []
+    in_string = False
+    i = 0
+    n = len(raw)
+    while i < n:
+        ch = raw[i]
+        if in_string:
+            if ch == "\\" and i + 1 < n:
+                # Already-escaped char — pass through as-is
+                out.append(ch)
+                out.append(raw[i + 1])
+                i += 2
+                continue
+            if ch == '"':
+                in_string = False
+                out.append(ch)
+            elif ord(ch) < 0x20:
+                out.append(f"\\u{ord(ch):04x}")
+            else:
+                out.append(ch)
+        else:
+            if ch == '"':
+                in_string = True
+            out.append(ch)
+        i += 1
+    return "".join(out)
+
+
 def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
    """Attempt to repair malformed tool_call argument JSON.

@@ -523,6 +565,23 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
        logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
        return "{}"

+    # Repair pass 0: llama.cpp backends sometimes emit literal control
+    # characters (tabs, newlines) inside JSON string values. json.loads
+    # with strict=False accepts these and lets us re-serialise the
+    # result into wire-valid JSON without any string surgery. This is
+    # the most common local-model repair case (#12068).
+    try:
+        parsed = json.loads(raw_stripped, strict=False)
+        reserialised = json.dumps(parsed, separators=(",", ":"))
+        if reserialised != raw_stripped:
+            logger.warning(
+                "Repaired unescaped control chars in tool_call arguments for %s",
+                tool_name,
+            )
+        return reserialised
+    except (json.JSONDecodeError, TypeError, ValueError):
+        pass
+
    # Attempt common JSON repairs
    fixed = raw_stripped
    # 1. Strip trailing commas before } or ]
@@ -557,6 +616,21 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
    except json.JSONDecodeError:
        pass

+    # Repair pass 4: escape unescaped control chars inside JSON strings,
+    # then retry. Catches cases where strict=False alone fails because
+    # other malformations are present too.
+    try:
+        escaped = _escape_invalid_chars_in_json_strings(fixed)
+        if escaped != fixed:
+            json.loads(escaped)
+            logger.warning(
+                "Repaired control-char-laced tool_call arguments for %s: %s → %s",
+                tool_name, raw_stripped[:80], escaped[:80],
+            )
+            return escaped
+    except (json.JSONDecodeError, TypeError, ValueError):
+        pass
+
    # Last resort: replace with empty object so the API request doesn't
    # crash the entire session.
    logger.warning(
@@ -740,6 +814,11 @@ class AIAgent:
    for AI models that support function calling.
    """

+    _TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER = (
+        "[hermes-agent: tool call arguments were corrupted in this session and "
+        "have been dropped to keep the conversation alive. See issue #15236.]"
+    )
+
    @property
    def base_url(self) -> str:
        return self._base_url
@@ -1437,6 +1516,8 @@ class AIAgent:
        
        # Track conversation messages for session logging
        self._session_messages: List[Dict[str, Any]] = []
+        self._memory_write_origin = "assistant_tool"
+        self._memory_write_context = "foreground"
        
        # Cached system prompt -- built once per session, only rebuilt on compression
        self._cached_system_prompt: Optional[str] = None
@@ -1497,7 +1578,6 @@ class AIAgent:
        self._memory_enabled = False
        self._user_profile_enabled = False
        self._memory_nudge_interval = 10
-        self._memory_flush_min_turns = 6
        self._turns_since_memory = 0
        self._iters_since_skill = 0
        if not skip_memory:
@@ -1506,7 +1586,6 @@ class AIAgent:
                self._memory_enabled = mem_config.get("memory_enabled", False)
                self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
                self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
-                self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
                if self._memory_enabled or self._user_profile_enabled:
                    from tools.memory_tool import MemoryStore
                    self._memory_store = MemoryStore(
@@ -2231,6 +2310,34 @@ class AIAgent:
            except Exception:
                logger.debug("status_callback error in _emit_status", exc_info=True)

+    def _emit_warning(self, message: str) -> None:
+        """Emit a user-visible warning through the same status plumbing.
+
+        Unlike debug logs, these warnings are meant for degraded side paths
+        such as auxiliary compression or memory flushes where the main turn can
+        continue but the user needs to know something important failed.
+        """
+        try:
+            self._vprint(f"{self.log_prefix}{message}", force=True)
+        except Exception:
+            pass
+        if self.status_callback:
+            try:
+                self.status_callback("warn", message)
+            except Exception:
+                logger.debug("status_callback error in _emit_warning", exc_info=True)
+
+    def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
+        """Surface a compact warning for failed auxiliary work."""
+        try:
+            detail = self._summarize_api_error(exc)
+        except Exception:
+            detail = str(exc)
+        detail = (detail or exc.__class__.__name__).strip()
+        if len(detail) > 220:
+            detail = detail[:217].rstrip() + "..."
+        self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
+
    def _current_main_runtime(self) -> Dict[str, str]:
        """Return the live main runtime for session-scoped auxiliary routing."""
        return {
@@ -2290,6 +2397,7 @@ class AIAgent:
                base_url=aux_base_url,
                api_key=aux_api_key,
                config_context_length=getattr(self, "_aux_compression_context_length_config", None),
+                provider=getattr(self, "provider", ""),
            )

            # Hard floor: the auxiliary compression model must have at least
@@ -2316,6 +2424,11 @@ class AIAgent:
                # compression actually works this session.  The hard floor
                # above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
                # so the new threshold is always >= 64K.
+                #
+                # The compression summariser sends a single user-role
+                # prompt (no system prompt, no tools) to the aux model, so
+                # new_threshold == aux_context is safe: the request is
+                # the raw messages plus a small summarisation instruction.
                old_threshold = threshold
                new_threshold = aux_context
                self.context_compressor.threshold_tokens = new_threshold
@@ -3047,7 +3160,10 @@ class AIAgent:
                        quiet_mode=True,
                        platform=self.platform,
                        provider=self.provider,
+                        parent_session_id=self.session_id,
                    )
+                    review_agent._memory_write_origin = "background_review"
+                    review_agent._memory_write_context = "background_review"
                    review_agent._memory_store = self._memory_store
                    review_agent._memory_enabled = self._memory_enabled
                    review_agent._user_profile_enabled = self._user_profile_enabled
@@ -3081,7 +3197,8 @@ class AIAgent:
                            pass

            except Exception as e:
-                logger.debug("Background memory/skill review failed: %s", e)
+                logger.warning("Background memory/skill review failed: %s", e)
+                self._emit_auxiliary_failure("background review", e)
            finally:
                # Close all resources (httpx client, subprocesses, etc.) so
                # GC doesn't try to clean them up on a dead asyncio event
@@ -3095,6 +3212,32 @@ class AIAgent:
        t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
        t.start()

+    def _build_memory_write_metadata(
+        self,
+        *,
+        write_origin: Optional[str] = None,
+        execution_context: Optional[str] = None,
+        task_id: Optional[str] = None,
+        tool_call_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Build provenance metadata for external memory-provider mirrors."""
+        metadata: Dict[str, Any] = {
+            "write_origin": write_origin or getattr(self, "_memory_write_origin", "assistant_tool"),
+            "execution_context": (
+                execution_context
+                or getattr(self, "_memory_write_context", "foreground")
+            ),
+            "session_id": self.session_id or "",
+            "parent_session_id": self._parent_session_id or "",
+            "platform": self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+            "tool_name": "memory",
+        }
+        if task_id:
+            metadata["task_id"] = task_id
+        if tool_call_id:
+            metadata["tool_call_id"] = tool_call_id
+        return {k: v for k, v in metadata.items() if v not in (None, "")}
+
    def _apply_persist_user_message_override(self, messages: List[Dict]) -> None:
        """Rewrite the current-turn user message before persistence/return.

@@ -4023,6 +4166,49 @@ class AIAgent:
        except Exception:
            pass

+    def _sync_external_memory_for_turn(
+        self,
+        *,
+        original_user_message: Any,
+        final_response: Any,
+        interrupted: bool,
+    ) -> None:
+        """Mirror a completed turn into external memory providers.
+
+        Called at the end of ``run_conversation`` with the cleaned user
+        message (``original_user_message``) and the finalised assistant
+        response.  The external memory backend gets both ``sync_all`` (to
+        persist the exchange) and ``queue_prefetch_all`` (to start
+        warming context for the next turn) in one shot.
+
+        Uses ``original_user_message`` rather than ``user_message``
+        because the latter may carry injected skill content that bloats
+        or breaks provider queries.
+
+        Interrupted turns are skipped entirely (#15218).  A partial
+        assistant output, an aborted tool chain, or a mid-stream reset
+        is not durable conversational truth — mirroring it into an
+        external memory backend pollutes future recall with state the
+        user never saw completed.  The prefetch is gated on the same
+        flag: the user's next message is almost certainly a retry of
+        the same intent, and a prefetch keyed on the interrupted turn
+        would fire against stale context.
+
+        Normal completed turns still sync as before.  The whole body is
+        wrapped in ``try/except Exception`` because external memory
+        providers are strictly best-effort — a misconfigured or offline
+        backend must not block the user from seeing their response.
+        """
+        if interrupted:
+            return
+        if not (self._memory_manager and final_response and original_user_message):
+            return
+        try:
+            self._memory_manager.sync_all(original_user_message, final_response)
+            self._memory_manager.queue_prefetch_all(original_user_message)
+        except Exception:
+            pass
+
    def release_clients(self) -> None:
        """Release LLM client resources WITHOUT tearing down session tool state.

@@ -4955,6 +5141,8 @@ class AIAgent:
        # response.incomplete instead of response.completed).
        self._codex_streamed_text_parts: list = []
        for attempt in range(max_stream_retries + 1):
+            if self._interrupt_requested:
+                raise InterruptedError("Agent interrupted before Codex stream retry")
            collected_output_items: list = []
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
@@ -5432,6 +5620,26 @@ class AIAgent:
            self._try_refresh_anthropic_client_credentials()
        return self._anthropic_client.messages.create(**api_kwargs)

+    def _rebuild_anthropic_client(self) -> None:
+        """Rebuild the Anthropic client after an interrupt or stale call.
+
+        Handles both direct Anthropic and Bedrock-hosted Anthropic models
+        correctly — rebuilding with the Bedrock SDK when provider is bedrock,
+        rather than always falling back to build_anthropic_client() which
+        requires a direct Anthropic API key.
+        """
+        if getattr(self, "provider", None) == "bedrock":
+            from agent.anthropic_adapter import build_anthropic_bedrock_client
+            region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1"
+            self._anthropic_client = build_anthropic_bedrock_client(region)
+        else:
+            from agent.anthropic_adapter import build_anthropic_client
+            self._anthropic_client = build_anthropic_client(
+                self._anthropic_api_key,
+                getattr(self, "_anthropic_base_url", None),
+                timeout=get_provider_request_timeout(self.provider, self.model),
+            )
+
    def _interruptible_api_call(self, api_kwargs: dict):
        """
        Run the API call in a background thread so the main conversation loop
@@ -5467,12 +5675,21 @@ class AIAgent:
                    # bedrock responses like chat_completions responses.
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
+                        invalidate_runtime_client,
+                        is_stale_connection_error,
                        normalize_converse_response,
                    )
                    region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                    api_kwargs.pop("__bedrock_converse__", None)
                    client = _get_bedrock_runtime_client(region)
-                    raw_response = client.converse(**api_kwargs)
+                    try:
+                        raw_response = client.converse(**api_kwargs)
+                    except Exception as _bedrock_exc:
+                        # Evict the cached client on stale-connection failures
+                        # so the outer retry loop builds a fresh client/pool.
+                        if is_stale_connection_error(_bedrock_exc):
+                            invalidate_runtime_client(region)
+                        raise
                    result["response"] = normalize_converse_response(raw_response)
                else:
                    request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
@@ -5530,14 +5747,8 @@ class AIAgent:
                )
                try:
                    if self.api_mode == "anthropic_messages":
-                        from agent.anthropic_adapter import build_anthropic_client
-
                        self._anthropic_client.close()
-                        self._anthropic_client = build_anthropic_client(
-                            self._anthropic_api_key,
-                            getattr(self, "_anthropic_base_url", None),
-                            timeout=get_provider_request_timeout(self.provider, self.model),
-                        )
+                        self._rebuild_anthropic_client()
                    else:
                        rc = request_client_holder.get("client")
                        if rc is not None:
@@ -5562,14 +5773,8 @@ class AIAgent:
                # seed future retries.
                try:
                    if self.api_mode == "anthropic_messages":
-                        from agent.anthropic_adapter import build_anthropic_client
-
                        self._anthropic_client.close()
-                        self._anthropic_client = build_anthropic_client(
-                            self._anthropic_api_key,
-                            getattr(self, "_anthropic_base_url", None),
-                            timeout=get_provider_request_timeout(self.provider, self.model),
-                        )
+                        self._rebuild_anthropic_client()
                    else:
                        request_client = request_client_holder.get("client")
                        if request_client is not None:
@@ -5725,12 +5930,21 @@ class AIAgent:
                try:
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
+                        invalidate_runtime_client,
+                        is_stale_connection_error,
                        stream_converse_with_callbacks,
                    )
                    region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                    api_kwargs.pop("__bedrock_converse__", None)
                    client = _get_bedrock_runtime_client(region)
-                    raw_response = client.converse_stream(**api_kwargs)
+                    try:
+                        raw_response = client.converse_stream(**api_kwargs)
+                    except Exception as _bedrock_exc:
+                        # Evict the cached client on stale-connection failures
+                        # so the outer retry loop builds a fresh client/pool.
+                        if is_stale_connection_error(_bedrock_exc):
+                            invalidate_runtime_client(region)
+                        raise

                    def _on_text(text):
                        _fire_first()
@@ -5982,11 +6196,25 @@ class AIAgent:
                for idx in sorted(tool_calls_acc):
                    tc = tool_calls_acc[idx]
                    arguments = tc["function"]["arguments"]
+                    tool_name = tc["function"]["name"] or "?"
                    if arguments and arguments.strip():
                        try:
                            json.loads(arguments)
                        except json.JSONDecodeError:
-                            has_truncated_tool_args = True
+                            # Attempt repair before flagging as truncated.
+                            # Models like GLM-5.1 via Ollama produce trailing
+                            # commas, unclosed brackets, Python None, etc.
+                            # Without repair, these hit the truncation handler
+                            # and kill the session.  _repair_tool_call_arguments
+                            # returns "{}" for unrepairable args, which is far
+                            # better than a crashed session.
+                            repaired = _repair_tool_call_arguments(arguments, tool_name)
+                            if repaired != "{}":
+                                # Successfully repaired — use the fixed args
+                                arguments = repaired
+                            else:
+                                # Unrepairable — flag for truncation handling
+                                has_truncated_tool_args = True
                    mock_tool_calls.append(SimpleNamespace(
                        id=tc["id"],
                        type=tc["type"],
@@ -6084,6 +6312,14 @@ class AIAgent:

            try:
                for _stream_attempt in range(_max_stream_retries + 1):
+                    # Check for interrupt before each retry attempt.  Without
+                    # this, /stop closes the HTTP connection (outer poll loop),
+                    # but the retry loop opens a FRESH connection — negating the
+                    # interrupt entirely.  On slow providers (ollama-cloud) each
+                    # retry can block for the full stream-read timeout (120s+),
+                    # causing multi-minute delays between /stop and response.
+                    if self._interrupt_requested:
+                        raise InterruptedError("Agent interrupted before stream retry")
                    try:
                        if self.api_mode == "anthropic_messages":
                            self._try_refresh_anthropic_client_credentials()
@@ -6410,14 +6646,8 @@ class AIAgent:
            if self._interrupt_requested:
                try:
                    if self.api_mode == "anthropic_messages":
-                        from agent.anthropic_adapter import build_anthropic_client
-
                        self._anthropic_client.close()
-                        self._anthropic_client = build_anthropic_client(
-                            self._anthropic_api_key,
-                            getattr(self, "_anthropic_base_url", None),
-                            timeout=get_provider_request_timeout(self.provider, self.model),
-                        )
+                        self._rebuild_anthropic_client()
                    else:
                        request_client = request_client_holder.get("client")
                        if request_client is not None:
@@ -7409,6 +7639,12 @@ class AIAgent:
            raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
            if raw_reasoning_content is not None:
                msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
+            elif msg.get("tool_calls") and self._needs_deepseek_tool_reasoning():
+                # DeepSeek thinking mode requires reasoning_content on every
+                # assistant tool-call message. Without it, replaying the
+                # persisted message causes HTTP 400. Include empty string
+                # as a defensive compatibility fallback (refs #15250).
+                msg["reasoning_content"] = ""

        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
            # Pass reasoning_details back unmodified so providers (OpenRouter,
@@ -7484,6 +7720,35 @@ class AIAgent:

        return msg

+    def _needs_kimi_tool_reasoning(self) -> bool:
+        """Return True when the current provider is Kimi / Moonshot thinking mode.
+
+        Kimi ``/coding`` and Moonshot thinking mode both require
+        ``reasoning_content`` on every assistant tool-call message; omitting
+        it causes the next replay to fail with HTTP 400.
+        """
+        return (
+            self.provider in {"kimi-coding", "kimi-coding-cn"}
+            or base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
+
+    def _needs_deepseek_tool_reasoning(self) -> bool:
+        """Return True when the current provider is DeepSeek thinking mode.
+
+        DeepSeek V4 thinking mode requires ``reasoning_content`` on every
+        assistant tool-call turn; omitting it causes HTTP 400 when the
+        message is replayed in a subsequent API request (#15250).
+        """
+        provider = (self.provider or "").lower()
+        model = (self.model or "").lower()
+        return (
+            provider == "deepseek"
+            or "deepseek" in model
+            or base_url_host_matches(self.base_url, "api.deepseek.com")
+        )
+
    def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None:
        """Copy provider-facing reasoning fields onto an API replay message."""
        if source_msg.get("role") != "assistant":
@@ -7499,13 +7764,14 @@ class AIAgent:
            api_msg["reasoning_content"] = normalized_reasoning
            return

-        kimi_requires_reasoning = (
-            self.provider in {"kimi-coding", "kimi-coding-cn"}
-            or base_url_host_matches(self.base_url, "api.kimi.com")
-            or base_url_host_matches(self.base_url, "moonshot.ai")
-            or base_url_host_matches(self.base_url, "moonshot.cn")
-        )
-        if kimi_requires_reasoning and source_msg.get("tool_calls"):
+        # Providers that require an echoed reasoning_content on every
+        # assistant tool-call turn. Detection logic lives in the per-provider
+        # helpers so both the creation path (_build_assistant_message) and
+        # this replay path stay in sync.
+        if source_msg.get("tool_calls") and (
+            self._needs_kimi_tool_reasoning()
+            or self._needs_deepseek_tool_reasoning()
+        ):
            api_msg["reasoning_content"] = ""

    @staticmethod
@@ -7536,6 +7802,115 @@ class AIAgent:
        ]
        return api_msg

+    @staticmethod
+    def _sanitize_tool_call_arguments(
+        messages: list,
+        *,
+        logger=None,
+        session_id: str = None,
+    ) -> int:
+        """Repair corrupted assistant tool-call argument JSON in-place."""
+        log = logger or logging.getLogger(__name__)
+        if not isinstance(messages, list):
+            return 0
+
+        repaired = 0
+        marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
+
+        def _prepend_marker(tool_msg: dict) -> None:
+            existing = tool_msg.get("content")
+            if isinstance(existing, str):
+                if not existing:
+                    tool_msg["content"] = marker
+                elif not existing.startswith(marker):
+                    tool_msg["content"] = f"{marker}\n{existing}"
+                return
+            if existing is None:
+                tool_msg["content"] = marker
+                return
+            try:
+                existing_text = json.dumps(existing)
+            except TypeError:
+                existing_text = str(existing)
+            tool_msg["content"] = f"{marker}\n{existing_text}"
+
+        message_index = 0
+        while message_index < len(messages):
+            msg = messages[message_index]
+            if not isinstance(msg, dict) or msg.get("role") != "assistant":
+                message_index += 1
+                continue
+
+            tool_calls = msg.get("tool_calls")
+            if not isinstance(tool_calls, list) or not tool_calls:
+                message_index += 1
+                continue
+
+            insert_at = message_index + 1
+            for tool_call in tool_calls:
+                if not isinstance(tool_call, dict):
+                    continue
+                function = tool_call.get("function")
+                if not isinstance(function, dict):
+                    continue
+
+                arguments = function.get("arguments")
+                if arguments is None or arguments == "":
+                    function["arguments"] = "{}"
+                    continue
+                if isinstance(arguments, str) and not arguments.strip():
+                    function["arguments"] = "{}"
+                    continue
+                if not isinstance(arguments, str):
+                    continue
+
+                try:
+                    json.loads(arguments)
+                except json.JSONDecodeError:
+                    tool_call_id = tool_call.get("id")
+                    function_name = function.get("name", "?")
+                    preview = arguments[:80]
+                    log.warning(
+                        "Corrupted tool_call arguments repaired before request "
+                        "(session=%s, message_index=%s, tool_call_id=%s, function=%s, preview=%r)",
+                        session_id or "-",
+                        message_index,
+                        tool_call_id or "-",
+                        function_name,
+                        preview,
+                    )
+                    function["arguments"] = "{}"
+
+                    existing_tool_msg = None
+                    scan_index = message_index + 1
+                    while scan_index < len(messages):
+                        candidate = messages[scan_index]
+                        if not isinstance(candidate, dict) or candidate.get("role") != "tool":
+                            break
+                        if candidate.get("tool_call_id") == tool_call_id:
+                            existing_tool_msg = candidate
+                            break
+                        scan_index += 1
+
+                    if existing_tool_msg is None:
+                        messages.insert(
+                            insert_at,
+                            {
+                                "role": "tool",
+                                "tool_call_id": tool_call_id,
+                                "content": marker,
+                            },
+                        )
+                        insert_at += 1
+                    else:
+                        _prepend_marker(existing_tool_msg)
+
+                    repaired += 1
+
+            message_index += 1
+
+        return repaired
+
    def _should_sanitize_tool_calls(self) -> bool:
        """Determine if tool_calls need sanitization for strict APIs.

@@ -7549,201 +7924,6 @@ class AIAgent:
        """
        return self.api_mode != "codex_responses"

-    def flush_memories(self, messages: list = None, min_turns: int = None):
-        """Give the model one turn to persist memories before context is lost.
-
-        Called before compression, session reset, or CLI exit. Injects a flush
-        message, makes one API call, executes any memory tool calls, then
-        strips all flush artifacts from the message list.
-
-        Args:
-            messages: The current conversation messages. If None, uses
-                      self._session_messages (last run_conversation state).
-            min_turns: Minimum user turns required to trigger the flush.
-                       None = use config value (flush_min_turns).
-                       0 = always flush (used for compression).
-        """
-        if self._memory_flush_min_turns == 0 and min_turns is None:
-            return
-        if "memory" not in self.valid_tool_names or not self._memory_store:
-            return
-        effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
-        if self._user_turn_count < effective_min:
-            return
-
-        if messages is None:
-            messages = getattr(self, '_session_messages', None)
-        if not messages or len(messages) < 3:
-            return
-
-        flush_content = (
-            "[System: The session is being compressed. "
-            "Save anything worth remembering — prioritize user preferences, "
-            "corrections, and recurring patterns over task-specific details.]"
-        )
-        _sentinel = f"__flush_{id(self)}_{time.monotonic()}"
-        flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
-        messages.append(flush_msg)
-
-        try:
-            # Build API messages for the flush call
-            _needs_sanitize = self._should_sanitize_tool_calls()
-            api_messages = []
-            for msg in messages:
-                api_msg = msg.copy()
-                self._copy_reasoning_content_for_api(msg, api_msg)
-                api_msg.pop("reasoning", None)
-                api_msg.pop("finish_reason", None)
-                api_msg.pop("_flush_sentinel", None)
-                api_msg.pop("_thinking_prefill", None)
-                if _needs_sanitize:
-                    self._sanitize_tool_calls_for_strict_api(api_msg)
-                api_messages.append(api_msg)
-
-            if self._cached_system_prompt:
-                api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
-
-            # Make one API call with only the memory tool available
-            memory_tool_def = None
-            for t in (self.tools or []):
-                if t.get("function", {}).get("name") == "memory":
-                    memory_tool_def = t
-                    break
-
-            if not memory_tool_def:
-                messages.pop()  # remove flush msg
-                return
-
-            # Use auxiliary client for the flush call when available --
-            # it's cheaper and avoids Codex Responses API incompatibility.
-            from agent.auxiliary_client import (
-                call_llm as _call_llm,
-                _fixed_temperature_for_model,
-                OMIT_TEMPERATURE,
-            )
-            _aux_available = True
-            # Kimi models manage temperature server-side — omit it entirely.
-            # Other models with a fixed contract get that value; everyone else
-            # gets the historical 0.3 default.
-            _fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
-            _omit_temperature = _fixed_temp is OMIT_TEMPERATURE
-            if _omit_temperature:
-                _flush_temperature = None
-            elif _fixed_temp is not None:
-                _flush_temperature = _fixed_temp
-            else:
-                _flush_temperature = 0.3
-            try:
-                response = _call_llm(
-                    task="flush_memories",
-                    messages=api_messages,
-                    tools=[memory_tool_def],
-                    temperature=_flush_temperature,
-                    max_tokens=5120,
-                    # timeout resolved from auxiliary.flush_memories.timeout config
-                )
-            except RuntimeError:
-                _aux_available = False
-                response = None
-
-            if not _aux_available and self.api_mode == "codex_responses":
-                # No auxiliary client -- use the Codex Responses path directly
-                codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
-                if _flush_temperature is not None:
-                    codex_kwargs["temperature"] = _flush_temperature
-                else:
-                    codex_kwargs.pop("temperature", None)
-                if "max_output_tokens" in codex_kwargs:
-                    codex_kwargs["max_output_tokens"] = 5120
-                response = self._run_codex_stream(codex_kwargs)
-            elif not _aux_available and self.api_mode == "anthropic_messages":
-                # Native Anthropic — use the transport for kwargs
-                _tflush = self._get_transport()
-                ant_kwargs = _tflush.build_kwargs(
-                    model=self.model, messages=api_messages,
-                    tools=[memory_tool_def], max_tokens=5120,
-                    reasoning_config=None,
-                    preserve_dots=self._anthropic_preserve_dots(),
-                )
-                response = self._anthropic_messages_create(ant_kwargs)
-            elif not _aux_available:
-                api_kwargs = {
-                    "model": self.model,
-                    "messages": api_messages,
-                    "tools": [memory_tool_def],
-                    **self._max_tokens_param(5120),
-                }
-                if _flush_temperature is not None:
-                    api_kwargs["temperature"] = _flush_temperature
-                from agent.auxiliary_client import _get_task_timeout
-                response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
-                    **api_kwargs, timeout=_get_task_timeout("flush_memories")
-                )
-
-            # Extract tool calls from the response, handling all API formats
-            tool_calls = []
-            if self.api_mode == "codex_responses" and not _aux_available:
-                _ct_flush = self._get_transport()
-                _cnr_flush = _ct_flush.normalize_response(response)
-                if _cnr_flush and _cnr_flush.tool_calls:
-                    tool_calls = [
-                        SimpleNamespace(
-                            id=tc.id, type="function",
-                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _cnr_flush.tool_calls
-                    ]
-            elif self.api_mode == "anthropic_messages" and not _aux_available:
-                _tfn = self._get_transport()
-                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_result and _flush_result.tool_calls:
-                    tool_calls = [
-                        SimpleNamespace(
-                            id=tc.id, type="function",
-                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _flush_result.tool_calls
-                    ]
-            elif self.api_mode in ("chat_completions", "bedrock_converse"):
-                # chat_completions / bedrock — normalize through transport
-                _flush_result = self._get_transport().normalize_response(response)
-                if _flush_result.tool_calls:
-                    tool_calls = _flush_result.tool_calls
-            elif _aux_available and hasattr(response, "choices") and response.choices:
-                # Auxiliary client returned OpenAI-shaped response while main
-                # api_mode is codex/anthropic — extract tool_calls from .choices
-                _aux_msg = response.choices[0].message
-                if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
-                    tool_calls = _aux_msg.tool_calls
-
-            for tc in tool_calls:
-                if tc.function.name == "memory":
-                    try:
-                        args = json.loads(tc.function.arguments)
-                        flush_target = args.get("target", "memory")
-                        from tools.memory_tool import memory_tool as _memory_tool
-                        _memory_tool(
-                            action=args.get("action"),
-                            target=flush_target,
-                            content=args.get("content"),
-                            old_text=args.get("old_text"),
-                            store=self._memory_store,
-                        )
-                        if not self.quiet_mode:
-                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                    except Exception as e:
-                        logger.debug("Memory flush tool call failed: %s", e)
-        except Exception as e:
-            logger.debug("Memory flush API call failed: %s", e)
-        finally:
-            # Strip flush artifacts: remove everything from the flush message onward.
-            # Use sentinel marker instead of identity check for robustness.
-            while messages and messages[-1].get("_flush_sentinel") != _sentinel:
-                messages.pop()
-                if not messages:
-                    break
-            if messages and messages[-1].get("_flush_sentinel") == _sentinel:
-                messages.pop()
-
    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
        """Compress conversation context and split the session in SQLite.

@@ -7762,8 +7942,6 @@ class AIAgent:
            f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
            focus_topic,
        )
-        # Pre-compression memory flush: let the model save memories before they're lost
-        self.flush_memories(messages, min_turns=0)

        # Notify external memory provider before compression discards context
        if self._memory_manager:
@@ -7779,6 +7957,15 @@ class AIAgent:
            # focus_topic — fall back to calling without it.
            compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)

+        summary_error = getattr(self.context_compressor, "_last_summary_error", None)
+        if summary_error:
+            if getattr(self, "_last_compression_summary_warning", None) != summary_error:
+                self._last_compression_summary_warning = summary_error
+                self._emit_warning(
+                    f"⚠ Compression summary failed: {summary_error}. "
+                    "Inserted a fallback context marker."
+                )
+
        todo_snapshot = self._todo_store.format_for_injection()
        if todo_snapshot:
            compressed.append({"role": "user", "content": todo_snapshot})
@@ -7948,6 +8135,10 @@ class AIAgent:
                        function_args.get("action", ""),
                        target,
                        function_args.get("content", ""),
+                        metadata=self._build_memory_write_metadata(
+                            task_id=effective_task_id,
+                            tool_call_id=tool_call_id,
+                        ),
                    )
                except Exception:
                    pass
@@ -8459,6 +8650,10 @@ class AIAgent:
                            function_args.get("action", ""),
                            target,
                            function_args.get("content", ""),
+                            metadata=self._build_memory_write_metadata(
+                                task_id=effective_task_id,
+                                tool_call_id=getattr(tool_call, "id", None),
+                            ),
                        )
                    except Exception:
                        pass
@@ -8703,6 +8898,7 @@ class AIAgent:
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()
+                self._copy_reasoning_content_for_api(msg, api_msg)
                for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
                    api_msg.pop(internal_field, None)
                if _needs_sanitize:
@@ -9333,6 +9529,19 @@ class AIAgent:
            # Note: Reasoning is embedded in content via <think> tags for trajectory storage.
            # However, providers like Moonshot AI require a separate 'reasoning_content' field
            # on assistant messages with tool_calls. We handle both cases here.
+            request_logger = getattr(self, "logger", None) or logging.getLogger(__name__)
+            repaired_tool_calls = self._sanitize_tool_call_arguments(
+                messages,
+                logger=request_logger,
+                session_id=self.session_id,
+            )
+            if repaired_tool_calls > 0:
+                request_logger.info(
+                    "Sanitized %s corrupted tool_call arguments before request (session=%s)",
+                    repaired_tool_calls,
+                    self.session_id or "-",
+                )
+
            api_messages = []
            for idx, msg in enumerate(messages):
                api_msg = msg.copy()
@@ -12162,14 +12371,11 @@ class AIAgent:
            self._iters_since_skill = 0

        # External memory provider: sync the completed turn + queue next prefetch.
-        # Use original_user_message (clean input) — user_message may contain
-        # injected skill content that bloats / breaks provider queries.
-        if self._memory_manager and final_response and original_user_message:
-            try:
-                self._memory_manager.sync_all(original_user_message, final_response)
-                self._memory_manager.queue_prefetch_all(original_user_message)
-            except Exception:
-                pass
+        self._sync_external_memory_for_turn(
+            original_user_message=original_user_message,
+            final_response=final_response,
+            interrupted=interrupted,
+        )

        # Background memory/skill review — runs AFTER the response is delivered
        # so it never competes with the user's task for model attention.
@@ -1,377 +0,0 @@
-# Compression Eval — Design
-
-Status: proposal. Nothing under `scripts/compression_eval/` runs in CI.
-This is an offline tool authors run before merging prompt or algorithm
-changes to `agent/context_compressor.py`.
-
-## Why
-
-We tune the compressor prompt and the `_template_sections` checklist by
-hand, ship, and wait for the next real session to notice regressions.
-There is no automated check that a prompt edit still preserves file
-paths, error messages, or the active task across a compression.
-
-Factory.ai's December 2025 write-up
-(https://factory.ai/news/evaluating-compression) describes a
-probe-based eval that scores compressed state on six dimensions. The
-methodology is the valuable part — the benchmarks in the post are a
-marketing piece. We adopt the methodology and discard the scoreboard.
-
-## Goal
-
-Given a real session transcript and a bank of probe questions that
-exercise what the transcript contained, answer:
-
-1. After `ContextCompressor.compress()` runs, can the agent still
-   answer each probe correctly from the compressed state?
-2. Which of the six dimensions (accuracy, context awareness, artifact
-   trail, completeness, continuity, instruction following) is the
-   prompt weakest on?
-3. Does a prompt change improve or regress any dimension vs. the
-   previous run?
-
-That is the full scope. No "compare against OpenAI and Anthropic"
-benchmarking, no public scoreboard, no marketing claims.
-
-## Non-goals
-
- Not a pytest. Requires API credentials, costs money, takes minutes
-  per fixture, and output is LLM-graded and non-deterministic.
- Not part of `scripts/run_tests.sh`. Not invoked by CI.
- Not a replacement for the existing compressor unit tests in
-  `tests/agent/test_context_compressor.py` — those stay as the
-  structural / boundary / tool-pair-sanitization guard.
- Not a general trajectory eval. Scoped to context compaction only.
-
-## Where it lives
-
-```
-scripts/compression_eval/
-├── DESIGN.md                 # this file
-├── README.md                 # how to run, cost expectations, caveats
-├── run_eval.py               # entry point (fire CLI, like sample_and_compress.py)
-├── scrub_fixtures.py         # regenerate fixtures from ~/.hermes/sessions/*.jsonl
-├── fixtures/                 # checked-in scrubbed session snapshots
-│   ├── feature-impl-context-priority.json
-│   ├── debug-session-feishu-id-model.json
-│   └── config-build-competitive-scouts.json
-├── probes/                   # probe banks paired with fixtures
-│   └── <fixture>.probes.json
-├── rubric.py                 # grading prompt + dimension definitions
-├── grader.py                 # judge-model call + score parsing
-├── compressor_driver.py      # thin wrapper over ContextCompressor
-└── results/                  # gitignored; timestamped output per run
-    └── .gitkeep
-```
-
-`scripts/` is the right home: offline tooling, no CI involvement,
-precedent already set by `sample_and_compress.py`,
-`contributor_audit.py`, `discord-voice-doctor.py`.
-
-`environments/` is for Atropos RL training environments — wrong shape.
-`tests/` is hermetic and credential-free — incompatible with a
-probe-based eval that needs a judge model.
-
-## Fixture format
-
-A fixture is a single compressed-enough conversation captured from a
-real session. Stored as JSON (pretty-printed, reviewable in PRs):
-
-```json
-{
-  "name": "401-debug",
-  "description": "178-turn session debugging a 401 on /api/auth/login",
-  "model": "anthropic/claude-sonnet-4.6",
-  "context_length": 200000,
-  "messages": [
-    {"role": "system", "content": "..."},
-    {"role": "user", "content": "..."},
-    {"role": "assistant", "content": "...", "tool_calls": [...]},
-    {"role": "tool", "tool_call_id": "...", "content": "..."}
-  ],
-  "notes": "Captured 2026-04-24 from session 20260424_*.jsonl; \
-            PII scrubbed; secrets redacted via redact_sensitive_text."
-}
-```
-
-### Sourcing fixtures
-
-Fixtures are scrubbed snapshots of real sessions from the
-maintainer's `~/.hermes/sessions/*.jsonl` store, generated
-reproducibly by `scrub_fixtures.py` in this directory. Re-run the
-scrubber with `python3 scripts/compression_eval/scrub_fixtures.py`
-to regenerate them after a scrubber change.
-
-Three shipped fixtures cover three different session shapes:
-
-| Fixture | Source shape | Messages | Tokens (rough) | Tests |
-|---|---|---|---|---|
-| `feature-impl-context-priority` | investigate → patch → test → PR → merge | 75 | ~45k | continuation, artifact trail (2 files modified, 1 PR, ~16k skill_view in head) |
-| `debug-session-feishu-id-model` | PR triage + upstream docs + decision | 59 | ~28k | recall (PR #, error shape), decision (outcome + reason), large PR diff blocks |
-| `config-build-competitive-scouts` | iterative config: 11 cron jobs across 7 weekdays | 61 | ~26k | artifact trail (which jobs, which days), iterative-merge |
-
-The `~26k-45k` token range is below the default 50%-of-200k
-compression threshold, so the eval will always **force** a
-`compress()` call rather than wait for the natural trigger. That is
-the intended shape — we want a controlled single-shot compression so
-score deltas are attributable to the prompt change, not to whether
-the threshold happened to fire at the same boundary twice.
-
-### Scrubber pipeline
-
-`scrub_fixtures.py` applies, per message:
-
-1. `agent.redact.redact_sensitive_text` — API keys, tokens,
-   connection strings
-2. Username paths: `/home/teknium` → `/home/user`
-3. Personal handles: all case variants of the maintainer name → `user`
-4. Email addresses → `contributor@example.com`; git
-   `Author: Name <addr>` header lines normalised
-5. `<REASONING_SCRATCHPAD>...</REASONING_SCRATCHPAD>` and
-   `<think>...</think>` stripped from assistant content
-6. Messaging-platform user mentions (`<@123456>`, `<@***>`) →
-   `<@user>`
-7. First user message paraphrased to remove personal voice;
-   subsequent user turns kept verbatim after the redactions above
-8. System prompt replaced with a generic public-safe placeholder so
-   we don't check in the maintainer's tuned soul/skills/memory system
-   block
-9. Orphan empty-assistant messages (artifact of scratchpad-only
-   turns) and trailing tool messages with no matching assistant are
-   dropped
-10. Tool outputs preserved verbatim. An earlier iteration truncated
-    > 2KB tool bodies to keep fixture JSON small, but that defeats
-    the purpose: real sessions have 30KB `skill_view` dumps, 10KB
-    `read_file` outputs, 5KB `web_extract` bodies — compression has
-    to handle them. Truncation is now a no-op; the pipeline note
-    remains in `scrubbing_passes` for audit trail clarity.
-
-Before every fixture PR: grep the fixture for PII patterns. An
-audit is embedded at the bottom of the scrubber as comments.
-
-**Fixtures must stay small.** Target <200 KB per fixture, <500 KB
-total for the directory. Current total: ~410 KB across three
-fixtures.  Larger sessions are truncated with a
-`truncated_to: <index>` field in the fixture header so the cut is
-reviewable.
-
-## Probe format
-
-One probe file per fixture, so reviewers can see the question bank
-evolve alongside the fixture:
-
-```json
-{
-  "fixture": "401-debug",
-  "probes": [
-    {
-      "id": "recall-error-code",
-      "type": "recall",
-      "question": "What was the original error code and endpoint?",
-      "expected_facts": ["401", "/api/auth/login"]
-    },
-    {
-      "id": "artifact-files-modified",
-      "type": "artifact",
-      "question": "Which files have been modified in this session?",
-      "expected_facts": ["session_store.py", "redis_client.py"]
-    },
-    {
-      "id": "continuation-next-step",
-      "type": "continuation",
-      "question": "What should we do next?",
-      "expected_facts": ["re-run the integration tests", "restart the worker"]
-    },
-    {
-      "id": "decision-redis-approach",
-      "type": "decision",
-      "question": "What did we decide about the Redis issue?",
-      "expected_facts": ["switch to redis-py 5.x", "pooled connection"]
-    }
-  ]
-}
-```
-
-The four probe types come directly from Factory's methodology:
-**recall, artifact, continuation, decision**. `expected_facts` gives
-the grader concrete anchors instead of relying purely on LLM taste.
-
-Authoring a probe bank is a one-time cost per fixture. 8-12 probes per
-fixture is the target — enough to cover all four types, few enough to
-grade in under a minute at reasonable cost.
-
-## Grading
-
-Each probe gets scored 0-5 on **six dimensions** (Factory's six):
-
-| Dimension             | What it measures                                    |
-|-----------------------|-----------------------------------------------------|
-| accuracy              | File paths, function names, error codes are correct |
-| context_awareness     | Reflects current state, not a mid-session snapshot  |
-| artifact_trail        | Knows which files were read / modified / created    |
-| completeness          | Addresses all parts of the probe                    |
-| continuity            | Agent can continue without re-fetching              |
-| instruction_following | Probe answered in the requested form                |
-
-Grading is done by a single judge-model call per probe with a
-deterministic rubric prompt (see `rubric.py`). The rubric includes the
-`expected_facts` list so the judge has a concrete anchor. Default
-judge model: whatever the user has configured as their main model at
-run time (same resolution path as `auxiliary_client.call_llm`). A
-`--judge-model` flag allows overriding for consistency across runs.
-
-Non-determinism caveat: two runs of the same fixture will produce
-different scores. A single run means nothing. Report medians over
-N=3 runs by default, and require an improvement of >=0.3 on any
-dimension before claiming a prompt change is a win.
-
-## Run flow
-
-```
-python scripts/compression_eval/run_eval.py [OPTIONS]
-```
-
-Options (fire-style, mirroring `sample_and_compress.py`):
-
-| Flag                   | Default    | Purpose                                   |
-|------------------------|------------|-------------------------------------------|
-| `--fixtures`           | all        | Comma-separated fixture names             |
-| `--runs`               | 3          | Runs per fixture (for median)             |
-| `--judge-model`        | auto       | Override judge model                      |
-| `--compressor-model`   | auto       | Override model used *inside* the compressor |
-| `--label`              | timestamp  | Subdirectory under `results/`             |
-| `--focus-topic`        | none       | Pass-through to `compress(focus_topic=)`  |
-| `--compare-to`         | none       | Path to a previous run for diff output    |
-
-Steps per fixture per run:
-
-1. Load fixture JSON and probe bank.
-2. Construct a `ContextCompressor` against the fixture's model.
-3. Call `compressor.compress(messages)` — capture the compressed
-   message list.
-4. For each probe: ask the judge model to role-play as the continuing
-   agent with only the compressed state, then grade the answer on the
-   six dimensions using `rubric.py`.
-5. Write a per-run JSON to `results/<label>/<fixture>-run-N.json`.
-6. After all runs, emit a markdown summary to
-   `results/<label>/report.md`.
-
-## Report format
-
-Pasted verbatim into PR descriptions that touch the compressor:
-
-```
-## Compression eval — label 2026-04-25_13-40-02
-
-Main model: anthropic/claude-sonnet-4.6   Judge: same
-3 runs per fixture, medians reported.
-
-| Fixture        | Accuracy | Context | Artifact | Complete | Continuity | Instruction | Overall |
-|----------------|----------|---------|----------|----------|------------|-------------|---------|
-| 401-debug      | 4.1      | 4.0     | 2.5      | 4.3      | 3.8        | 5.0         | 3.95    |
-| pr-review      | 3.9      | 3.8     | 3.1      | 4.2      | 3.9        | 5.0         | 3.98    |
-| feature-impl   | 4.0      | 3.9     | 2.9      | 4.1      | 4.0        | 5.0         | 3.98    |
-
-Per-probe misses (score < 3.0):
- 401-debug / artifact-files-modified: 1.7 — summary dropped redis_client.py
- pr-review / decision-auth-rewrite: 2.3 — outcome captured, reasoning dropped
-```
-
-## Cost expectations
-
-Dominated by the judge calls. For 3 fixtures × 10 probes × 3 runs =
-90 judge calls per eval run. On Claude Sonnet 4.6 that is roughly
-$0.50-$1.50 per full eval depending on probe length. The compressor
-itself makes 1 call per fixture × 3 runs = 9 additional calls.
-
-**This is not a check to run after every commit.** It is a
-before-merge check for PRs that touch:
-
- `agent/context_compressor.py` — any change to `_template_sections`,
-  `_generate_summary`, or `compress()`.
- `agent/auxiliary_client.py` — when changing how compression tasks
-  are routed.
- `agent/prompt_builder.py` — when the compression-note phrasing
-  changes.
-
-## Open questions (to resolve before implementing)
-
-1. **Fixture scrubbing: manual or scripted?** A scripted scrub that
-   also replaces project names / hostnames would lower the cost of
-   contributing a new fixture. Risk: over-aggressive replacement
-   destroys the signal the probe depends on. Propose: start manual,
-   add scripted helpers once we have 3 fixtures and know the common
-   PII shapes.
-
-2. **Judge model selection.** Factory uses GPT-5.2. We can't pin one
-   — user's main model changes. Options: (a) grade with main model
-   (cheap, inconsistent across users), (b) require a specific judge
-   model (e.g. `claude-sonnet-4.6`), inconsistent for users without
-   access. Propose (a) with a `--judge-model` override, and make the
-   model name prominent in the report so comparisons across machines
-   are legible.
-
-3. **Noise floor.** Before landing prompt changes, run the current
-   prompt N=10 times to measure per-dimension stddev. That tells us
-   the minimum delta to call a change significant. Suspect 0.2-0.3 on
-   a 0-5 scale. Decision deferred until after the first fixture is
-   landed.
-
-4. **Iterative-merge coverage.** The real Factory-vs-Anthropic
-   difference is incremental merge vs. regenerate. A fixture that
-   only compresses once doesn't exercise our iterative path. Add a
-   fourth fixture that forces two compressions (manually chained),
-   with probes that test whether information from the first
-   compression survives the second. Deferred to a follow-up PR.
-
-## Implementation status
-
-This PR ships the full eval end-to-end:
-
- `scrub_fixtures.py` — reproducible scrubber
- `fixtures/` — three scrubbed session fixtures
- `probes/` — three probe banks (10-11 probes each, all four types)
- `rubric.py` — six-dimension grading rubric + judge-prompt builder + response parser
- `compressor_driver.py` — thin wrapper around `ContextCompressor` for forced single-shot compression
- `grader.py` — two-phase continuation + grading calls via OpenAI SDK
- `report.py` — markdown report renderer + `--compare-to` delta mode + per-run JSON dumper
- `run_eval.py` — entry point (`fire`-style CLI)
- `tests/scripts/test_compression_eval.py` — 33 unit tests covering rubric parsing, report rendering, fixture/probe loading, and a PII smoke test on the fixtures (LLM paths not tested — they require credentials and are exercised by the eval itself)
-
-### Noise floor — one empirical data point
-
-A single same-inputs re-run of `debug-session-feishu-id-model`
-(compressor + judge = `openai/gpt-5.4-mini` via Nous Portal,
-runs=1) produced:
-
- Run A overall: 3.25
- Run B overall: 3.17 (delta -0.08)
-
-Individual dimensions varied by up to ±0.5 between the two runs on
-single-run medians. This confirms DESIGN.md's "< 0.3 is noise"
-guidance is the right order of magnitude for a single-run
-comparison. With `runs=3` default, per-dimension variance should
-tighten; noise-floor measurement at N=10 is still a useful
-follow-up to calibrate precisely.
-
-## Open follow-ups (not blocking this PR)
-
-1. **Iterative-merge fixture** — our actual compression win over
-   "regenerate from scratch" approaches is only exercised when
-   `_previous_summary` is re-used on a second compression. None of
-   the three shipped fixtures force two compressions. The natural
-   basis is `config-build-competitive-scouts` (already iterative by
-   shape); splitting it at the Monday/Tuesday boundary would force
-   the second compression to merge rather than regenerate.
-2. **Noise-floor precision** — run the current prompt N=10 times
-   against one fixture to pin down per-dimension stddev and publish
-   the numbers in README.
-3. **Scripted scrubber helpers** — the current scrubber is manual
-   per-fixture. A helper that identifies candidate sessions to
-   scrub (by shape or by keyword) would lower the cost of adding
-   fixture #4+.
-4. **Judge model selection policy** — current code uses whatever
-   the user passes as `--judge-model` (default: same as compressor).
-   Pinning the judge across users would stabilise cross-machine
-   comparisons, at the cost of blocking users without access to
-   the pinned model.
@@ -1,110 +0,0 @@
-# compression_eval
-
-Offline eval harness for `agent/context_compressor.py`. Runs a real
-conversation transcript through the compressor, then probes the
-compressed state with targeted questions graded on six dimensions.
-
-## When to run
-
-Before merging changes to:
-
- `agent/context_compressor.py` — any change to `_template_sections`,
-  `_generate_summary`, `compress()`, or its boundary logic
- `agent/auxiliary_client.py` — when changing how compression tasks
-  are routed
- `agent/prompt_builder.py` — when the compression-note phrasing
-  changes
-
-## Not for CI
-
-This harness makes real model calls (compressor + continuation +
-judge = ~3 calls per probe × probes per fixture × runs). Costs ~$0.50
-to ~$1.50 per full run depending on models, takes minutes, is
-LLM-graded (non-deterministic). It lives in `scripts/` and is
-invoked by hand. `tests/` and `scripts/run_tests.sh` do not touch it.
-
-`tests/scripts/test_compression_eval.py` covers the non-LLM code
-paths (rubric parsing, report rendering, fixture/probe loading, PII
-smoke check on the checked-in fixtures) and DOES run in CI.
-
-## Usage
-
-```bash
-# Run all three fixtures, 3 runs each, with your configured provider
-python3 scripts/compression_eval/run_eval.py
-
-# Faster iteration — one fixture, one run
-python3 scripts/compression_eval/run_eval.py \
-    --fixtures=debug-session-feishu-id-model --runs=1
-
-# Pin a cheap model for both compression + judge (recommended)
-python3 scripts/compression_eval/run_eval.py \
-    --compressor-provider=nous --compressor-model=openai/gpt-5.4-mini \
-    --judge-provider=nous      --judge-model=openai/gpt-5.4-mini \
-    --runs=3 --label=baseline
-
-# After editing context_compressor.py, rerun with a new label and diff
-python3 scripts/compression_eval/run_eval.py \
-    --compressor-provider=nous --compressor-model=openai/gpt-5.4-mini \
-    --judge-provider=nous      --judge-model=openai/gpt-5.4-mini \
-    --runs=3 --label=my-prompt-tweak \
-    --compare-to=results/baseline
-```
-
-Results land in `results/<label>/report.md` and are intended to be
-pasted verbatim into PR descriptions. `--compare-to` renders a delta
-column per dimension so reviewers can see "did this actually help?"
-at a glance.
-
-Rule of thumb: dimension deltas below ±0.3 are within run-to-run
-noise on `runs=3`. Publish a bigger N if you want tighter bounds.
-
-## Fixtures
-
-Three scrubbed session snapshots live under `fixtures/`:
-
- `feature-impl-context-priority.json` — 75 msgs, investigate →
-  patch → test → PR → merge
- `debug-session-feishu-id-model.json` — 59 msgs, PR triage +
-  upstream docs + decision
- `config-build-competitive-scouts.json` — 61 msgs, iterative
-  config accumulation (11 cron jobs)
-
-Regenerate them from the maintainer's `~/.hermes/sessions/*.jsonl`
-with `python3 scripts/compression_eval/scrub_fixtures.py`. The
-scrubber pipeline and PII-audit checklist are documented in
-`DESIGN.md` under **Scrubber pipeline**.
-
-## Probes
-
-One probe bank per fixture under `probes/`, 10-11 probes each,
-covering all four types: **recall**, **artifact**, **continuation**,
-**decision**. Each probe carries an `expected_facts` list of concrete
-anchors (PR numbers, file paths, error codes, commands run) that the
-judge sees alongside the assistant's answer.
-
-## How it scores
-
-Six dimensions, 0-5 per probe:
-
-| Dimension             | What it measures                                     |
-|-----------------------|------------------------------------------------------|
-| accuracy              | File paths, function names, PR/issue numbers correct |
-| context_awareness     | Reflects current session state, not a snapshot       |
-| artifact_trail        | Correctly enumerates files / commands / PRs          |
-| completeness          | Addresses ALL parts of the probe                     |
-| continuity            | Next assistant could continue without re-fetching    |
-| instruction_following | Answer in the requested form                         |
-
-Report renders medians across N runs; probes scoring below 3.0
-overall surface in a separate section with the judge's specific
-complaint noted inline.
-
-## Related
-
- `agent/context_compressor.py` — the thing under test
- `tests/agent/test_context_compressor.py` — structural unit tests
-  that do run in CI
- `scripts/sample_and_compress.py` — the closest existing script in
-  shape (offline, credential-requiring, not in CI)
- `DESIGN.md` — full architecture + methodology + open follow-ups
@@ -1,114 +0,0 @@
-"""Wraps ContextCompressor to run a single forced compression on a fixture.
-
-The real agent loop checks ``should_compress()`` before calling ``compress()``.
-Fixtures are intentionally sized below the 100k threshold so ``compress()``
-runs in a controlled, single-shot mode — score deltas attribute to the
-prompt change, not to whether the threshold happened to fire at the same
-boundary twice.
-
-Resolves the provider for the compression call via the same path the real
-agent uses (``hermes_cli.runtime_provider.resolve_runtime_provider``) so
-behaviour matches production aside from being a single call.
-"""
-from __future__ import annotations
-
-import sys
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-# Make sibling imports work whether invoked as a script or as a module.
-_REPO_ROOT = Path(__file__).resolve().parents[2]
-if str(_REPO_ROOT) not in sys.path:
-    sys.path.insert(0, str(_REPO_ROOT))
-
-from agent.context_compressor import (  # noqa: E402
-    ContextCompressor,
-    estimate_messages_tokens_rough,
-)
-
-
-def run_compression(
-    *,
-    messages: List[Dict[str, Any]],
-    compressor_model: str,
-    compressor_provider: str,
-    compressor_base_url: str,
-    compressor_api_key: str,
-    compressor_api_mode: str,
-    context_length: int,
-    focus_topic: Optional[str] = None,
-    summary_model_override: Optional[str] = None,
-) -> Dict[str, Any]:
-    """Run a single forced compression pass over the fixture messages.
-
-    Returns a dict with:
-      - compressed_messages: the post-compression message list
-      - summary_text: the summary produced (extracted from the compressed head)
-      - pre_tokens, post_tokens: rough token counts before/after
-      - compression_ratio: 1 - (post/pre)
-      - pre_message_count, post_message_count
-    """
-    compressor = ContextCompressor(
-        model=compressor_model,
-        threshold_percent=0.50,
-        protect_first_n=3,
-        protect_last_n=20,
-        summary_target_ratio=0.20,
-        quiet_mode=True,
-        summary_model_override=summary_model_override or "",
-        base_url=compressor_base_url,
-        api_key=compressor_api_key,
-        config_context_length=context_length,
-        provider=compressor_provider,
-        api_mode=compressor_api_mode,
-    )
-
-    pre_tokens = estimate_messages_tokens_rough(messages)
-    compressed = compressor.compress(
-        messages,
-        current_tokens=pre_tokens,
-        focus_topic=focus_topic,
-    )
-    post_tokens = estimate_messages_tokens_rough(compressed)
-
-    summary_text = _extract_summary_from_messages(compressed)
-
-    ratio = (1.0 - (post_tokens / pre_tokens)) if pre_tokens > 0 else 0.0
-
-    return {
-        "compressed_messages": compressed,
-        "summary_text": summary_text,
-        "pre_tokens": pre_tokens,
-        "post_tokens": post_tokens,
-        "compression_ratio": ratio,
-        "pre_message_count": len(messages),
-        "post_message_count": len(compressed),
-    }
-
-
-_SUMMARY_MARKERS = (
-    "## Active Task",
-    "## Goal",
-    "## Completed Actions",
-)
-
-
-def _extract_summary_from_messages(messages: List[Dict[str, Any]]) -> str:
-    """Find the structured summary block inside the compressed message list.
-
-    The compressor injects the summary as a user (or system-appended) message
-    near the head. We look for the section-header markers from
-    ``_template_sections`` in ``agent/context_compressor.py``.
-    """
-    for msg in messages:
-        content = msg.get("content")
-        if not isinstance(content, str):
-            if isinstance(content, list):
-                content = "\n".join(
-                    p.get("text", "") for p in content if isinstance(p, dict)
-                )
-            else:
-                continue
-        if any(marker in content for marker in _SUMMARY_MARKERS):
-            return content
-    return ""
@@ -1,181 +0,0 @@
-"""Two-phase probe grading.
-
-Phase 1 — **Continuation**: simulate the next assistant turn. Feed the
-compressed message list plus the probe question and ask the continuing
-model to answer using only the compressed context. This is exactly what
-a real next-turn call would look like.
-
-Phase 2 — **Grading**: a separate judge-model call scores the answer on
-the six rubric dimensions using ``rubric.build_judge_prompt``.
-
-Both phases use the OpenAI SDK directly against the resolved provider
-endpoint, so the explicit api_key + base_url we pass always reaches the
-wire. (``agent.auxiliary_client.call_llm`` is designed for task-tagged
-auxiliary calls backed by config lookups; for eval we need the explicit
-credentials to win unconditionally.)
-"""
-from __future__ import annotations
-
-import logging
-import sys
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-_REPO_ROOT = Path(__file__).resolve().parents[2]
-if str(_REPO_ROOT) not in sys.path:
-    sys.path.insert(0, str(_REPO_ROOT))
-
-from openai import OpenAI  # noqa: E402
-
-from rubric import build_judge_prompt, parse_judge_response  # noqa: E402
-
-logger = logging.getLogger(__name__)
-
-
-_CONTINUATION_SYSTEM = (
-    "You are the continuing assistant in a long session. Earlier turns have "
-    "been compacted into a handoff summary that is now part of the "
-    "conversation history. The user has just asked you a question. "
-    "Answer using ONLY what you can determine from the conversation history "
-    "you see (including the handoff summary). Do NOT invent details. If the "
-    "summary does not contain a specific fact, say so explicitly rather "
-    "than guessing. Be direct and concrete — cite file paths, PR numbers, "
-    "error codes, and exact values when they are present in the summary."
-)
-
-
-def answer_probe(
-    *,
-    compressed_messages: List[Dict[str, Any]],
-    probe_question: str,
-    model: str,
-    provider: str,
-    base_url: str,
-    api_key: str,
-    max_tokens: int = 1024,
-    timeout: Optional[float] = 120.0,
-) -> str:
-    """Run the continuation call: what does the next assistant answer?
-
-    Builds a messages list of [system_continuation, *compressed, probe_user]
-    and asks the configured model. Returns the answer content as a string.
-    """
-    # Strip any pre-existing system message from the compressed list and
-    # replace with our continuation system prompt. The fixture's generic
-    # system is not the right frame for the continuation simulation.
-    history = [m for m in compressed_messages if m.get("role") != "system"]
-    messages = (
-        [{"role": "system", "content": _CONTINUATION_SYSTEM}]
-        + _sanitize_for_chat_api(history)
-        + [{"role": "user", "content": probe_question}]
-    )
-
-    client = OpenAI(api_key=api_key, base_url=base_url, timeout=timeout)
-    response = client.chat.completions.create(
-        model=model,
-        messages=messages,
-        max_tokens=max_tokens,
-    )
-    content = response.choices[0].message.content
-    if not isinstance(content, str):
-        content = "" if content is None else str(content)
-    return content.strip()
-
-
-def grade_probe(
-    *,
-    probe_question: str,
-    probe_type: str,
-    expected_facts: List[str],
-    assistant_answer: str,
-    judge_model: str,
-    judge_provider: str,
-    judge_base_url: str,
-    judge_api_key: str,
-    max_tokens: int = 512,
-    timeout: Optional[float] = 120.0,
-) -> Dict[str, Any]:
-    """Run the judge call and parse the six dimension scores.
-
-    Returns dict {scores: {dim: int}, notes: str, overall: float,
-    raw: str, parse_error: str|None}. On parse failure, scores are zeros
-    and parse_error is populated — the caller decides whether to retry
-    or accept.
-    """
-    prompt = build_judge_prompt(
-        probe_question=probe_question,
-        probe_type=probe_type,
-        expected_facts=expected_facts,
-        assistant_answer=assistant_answer,
-    )
-    client = OpenAI(api_key=judge_api_key, base_url=judge_base_url, timeout=timeout)
-    response = client.chat.completions.create(
-        model=judge_model,
-        messages=[{"role": "user", "content": prompt}],
-        max_tokens=max_tokens,
-    )
-    raw = response.choices[0].message.content or ""
-    if not isinstance(raw, str):
-        raw = str(raw)
-
-    try:
-        parsed = parse_judge_response(raw)
-        parsed["raw"] = raw
-        parsed["parse_error"] = None
-        return parsed
-    except ValueError as exc:
-        logger.warning("Judge response parse failed: %s | raw=%r", exc, raw[:200])
-        from rubric import DIMENSIONS
-        return {
-            "scores": {d: 0 for d in DIMENSIONS},
-            "notes": "",
-            "overall": 0.0,
-            "raw": raw,
-            "parse_error": str(exc),
-        }
-
-
-def _sanitize_for_chat_api(
-    messages: List[Dict[str, Any]],
-) -> List[Dict[str, Any]]:
-    """Drop tool_calls/tool pairs that are incomplete.
-
-    A compressed message list may contain tool_call references whose matching
-    ``tool`` result was summarized away, which breaks strict-validator
-    providers (Anthropic, OpenAI). Easiest correct behaviour for the eval:
-    strip tool_calls entirely and drop ``tool`` role messages — the
-    continuation model only needs the summary + recent turns to answer the
-    probe, not the precise tool-call bookkeeping.
-    """
-    clean: List[Dict[str, Any]] = []
-    for m in messages:
-        role = m.get("role")
-        if role == "tool":
-            # Convert tool result to a plain user note so the continuation
-            # model still sees the content without needing the structured
-            # tool_call_id pairing.
-            content = m.get("content")
-            if isinstance(content, list):
-                content = "\n".join(
-                    p.get("text", "") for p in content if isinstance(p, dict)
-                )
-            clean.append({
-                "role": "user",
-                "content": f"[earlier tool result]\n{content or ''}",
-            })
-            continue
-        new = {"role": role, "content": m.get("content", "")}
-        # Drop tool_calls — the downstream assistant message's content
-        # still describes what the agent was doing.
-        clean.append(new)
-    # Collapse consecutive same-role turns into one (alternation rule)
-    merged: List[Dict[str, Any]] = []
-    for m in clean:
-        if merged and merged[-1]["role"] == m["role"]:
-            prev = merged[-1]
-            prev_c = prev.get("content") or ""
-            new_c = m.get("content") or ""
-            prev["content"] = f"{prev_c}\n\n{new_c}" if prev_c else new_c
-        else:
-            merged.append(m)
-    return merged
@@ -1,96 +0,0 @@
-{
-  "fixture": "config-build-competitive-scouts",
-  "description": "Probes for the competitive-scout cron-job setup session. Anchors are which agents were configured, which day of the week each runs, and the full final schedule. This fixture most directly tests artifact-trail and iterative-merge because the job list grows by one per user turn.",
-  "probes": [
-    {
-      "id": "recall-first-repo",
-      "type": "recall",
-      "question": "What was the first repository the user asked to create a scout cron for, and on what day of the week?",
-      "expected_facts": ["openclaw", "Sunday"]
-    },
-    {
-      "id": "recall-closed-source-target",
-      "type": "recall",
-      "question": "One of the scout targets does not have an open-source repository and had to be configured as a web scan instead. Which one, and on what day?",
-      "expected_facts": ["claude code", "Friday", "web scan"]
-    },
-    {
-      "id": "artifact-all-jobs",
-      "type": "artifact",
-      "question": "List every scout cron job created in this session.",
-      "expected_facts": [
-        "openclaw-pr-scout",
-        "nanoclaw-pr-scout",
-        "ironclaw-pr-scout",
-        "kilocode-pr-scout",
-        "codex-pr-scout",
-        "gemini-cli-pr-scout",
-        "cline-pr-scout",
-        "opencode-pr-scout",
-        "claude-code-scout",
-        "aider-pr-scout",
-        "roocode-pr-scout"
-      ]
-    },
-    {
-      "id": "artifact-final-schedule",
-      "type": "artifact",
-      "question": "What is the final weekly schedule? Give the day and the agents scanned on each day.",
-      "expected_facts": [
-        "Sun: openclaw, nanoclaw, ironclaw",
-        "Mon: kilo code",
-        "Tue: codex",
-        "Wed: gemini cli, cline",
-        "Thu: opencode",
-        "Fri: claude code",
-        "Sat: aider, roo"
-      ]
-    },
-    {
-      "id": "artifact-sunday-count",
-      "type": "artifact",
-      "question": "How many cron jobs run on Sunday?",
-      "expected_facts": ["3", "three", "openclaw, nanoclaw, ironclaw"]
-    },
-    {
-      "id": "artifact-total-count",
-      "type": "artifact",
-      "question": "How many scout cron jobs were created in total by the end of the session?",
-      "expected_facts": ["11", "eleven"]
-    },
-    {
-      "id": "decision-kilo-open-source",
-      "type": "decision",
-      "question": "The user asked whether Kilo Code is open source. What was the answer, and what did the user decide to do with it?",
-      "expected_facts": [
-        "yes, open source",
-        "Kilo-Org/kilocode",
-        "added as Monday scout"
-      ]
-    },
-    {
-      "id": "decision-saturday-fill",
-      "type": "decision",
-      "question": "Saturday was the last open day at one point. Which scout(s) were placed on Saturday, and why were those chosen?",
-      "expected_facts": ["aider", "roo", "filled in last based on openrouter popularity / cli comparison rankings"]
-    },
-    {
-      "id": "continuation-execution-time",
-      "type": "continuation",
-      "question": "At what local time of day do these scout cron jobs run?",
-      "expected_facts": ["10 AM Pacific", "17:00 UTC", "0 17 * * *"]
-    },
-    {
-      "id": "continuation-skill-used",
-      "type": "continuation",
-      "question": "Each scout job runs with a specific skill preloaded. Which one?",
-      "expected_facts": ["hermes-agent-dev"]
-    },
-    {
-      "id": "continuation-weekday-coverage",
-      "type": "continuation",
-      "question": "After the session ended, are there any weekdays still uncovered by a scout job?",
-      "expected_facts": ["no", "all 7 days covered", "full week loaded"]
-    }
-  ]
-}
@@ -1,72 +0,0 @@
-{
-  "fixture": "debug-session-feishu-id-model",
-  "description": "Probes for the Feishu identity-model PR #8388 triage session. Anchors are the PR number, what the PR actually contained, what upstream docs confirmed, and the final decision + reasoning.",
-  "probes": [
-    {
-      "id": "recall-pr-number",
-      "type": "recall",
-      "question": "What is the PR number under review in this session, and what repository is it against?",
-      "expected_facts": ["PR #8388", "NousResearch/hermes-agent", "hermes-agent"]
-    },
-    {
-      "id": "recall-bug-claim",
-      "type": "recall",
-      "question": "What is the core bug the PR claims to fix? Be specific about the identifier involved.",
-      "expected_facts": ["open_id", "app-scoped", "not canonical", "Feishu identity model"]
-    },
-    {
-      "id": "recall-upstream-confirmation",
-      "type": "recall",
-      "question": "Do upstream Feishu/Lark docs confirm that open_id is app-scoped rather than a canonical cross-app identity?",
-      "expected_facts": ["yes", "confirmed", "open.feishu.cn", "same user has different Open IDs in different apps"]
-    },
-    {
-      "id": "artifact-pr-scope",
-      "type": "artifact",
-      "question": "Roughly how large is PR #8388, and which gateway subsystems does it touch beyond the Feishu adapter?",
-      "expected_facts": ["4647 lines", "gateway/run.py", "cron/scheduler.py", "gateway/config.py", "multi-account", "bind"]
-    },
-    {
-      "id": "artifact-new-tool",
-      "type": "artifact",
-      "question": "Does the PR add a new tool file? If so, what is its path?",
-      "expected_facts": ["tools/feishu_id_tool.py", "new file"]
-    },
-    {
-      "id": "decision-pr-assessment",
-      "type": "decision",
-      "question": "What is the reviewer's overall assessment of PR #8388 — approve, reject, or something more nuanced? Explain in one sentence.",
-      "expected_facts": [
-        "core claim is correct",
-        "scope is wrong",
-        "bait-and-switch",
-        "overbuilt",
-        "implement cleaner ourselves"
-      ]
-    },
-    {
-      "id": "decision-core-claim-validity",
-      "type": "decision",
-      "question": "Setting aside the PR's size, is the underlying identity-model concern technically valid or not?",
-      "expected_facts": ["technically valid", "correct", "open_id is app-scoped"]
-    },
-    {
-      "id": "continuation-next-action",
-      "type": "continuation",
-      "question": "Based on the review outcome, what is the next action the agent has been asked to take regarding this PR?",
-      "expected_facts": ["close the PR", "implement ourselves", "cleaner", "less complex"]
-    },
-    {
-      "id": "continuation-implementation-scope",
-      "type": "continuation",
-      "question": "If implementing the Feishu fix cleanly ourselves, which specific behaviour needs to change — what should replace the current use of open_id?",
-      "expected_facts": ["use union_id", "or user_id", "canonical identity", "cross-app stable ID"]
-    },
-    {
-      "id": "continuation-sources-to-reference",
-      "type": "continuation",
-      "question": "Which upstream documentation sources were fetched during review that should be referenced when writing the clean implementation?",
-      "expected_facts": ["open.feishu.cn", "open.larkoffice.com", "user-identity-introduction"]
-    }
-  ]
-}
@@ -1,74 +0,0 @@
-{
-  "fixture": "feature-impl-context-priority",
-  "description": "Probes for the .hermes.md / AGENTS.md / CLAUDE.md / .cursorrules priority feature session. Anchors are the concrete facts the next assistant would need to continue: user's priority order, files modified, helper-function structure, live-test scenarios, and PR number.",
-  "probes": [
-    {
-      "id": "recall-priority-order",
-      "type": "recall",
-      "question": "What is the priority order the user asked for when multiple project-context files are present? List them from highest to lowest priority.",
-      "expected_facts": [".hermes.md", "AGENTS.md", "CLAUDE.md", ".cursorrules", "highest to lowest"]
-    },
-    {
-      "id": "recall-selection-mode",
-      "type": "recall",
-      "question": "When multiple context files exist in the same directory, does the agent now load all of them or pick only one?",
-      "expected_facts": ["only one", "priority-based selection", "highest-priority winner"]
-    },
-    {
-      "id": "artifact-files-modified",
-      "type": "artifact",
-      "question": "Which files in the hermes-agent repository were modified during this session? List them.",
-      "expected_facts": [
-        "agent/prompt_builder.py",
-        "tests/agent/test_prompt_builder.py"
-      ]
-    },
-    {
-      "id": "artifact-helper-functions",
-      "type": "artifact",
-      "question": "The session introduced separate helper functions for each context-file type. What are their names?",
-      "expected_facts": [
-        "_load_hermes_md",
-        "_load_agents_md",
-        "_load_claude_md",
-        "_load_cursorrules"
-      ]
-    },
-    {
-      "id": "artifact-test-scenarios",
-      "type": "artifact",
-      "question": "A scratch directory was created with scenario subdirectories to live-test the priority chain. Roughly how many scenarios, and what directory was it created under?",
-      "expected_facts": ["10 scenarios", "/tmp/context-priority-test"]
-    },
-    {
-      "id": "decision-claude-md-was-unsupported",
-      "type": "decision",
-      "question": "What was the finding about CLAUDE.md support in the existing loader before this session's changes?",
-      "expected_facts": ["CLAUDE.md was not handled", "not supported", "new handler added"]
-    },
-    {
-      "id": "decision-load-all-or-one",
-      "type": "decision",
-      "question": "Was the decision to load multiple context files when present, or to load only the highest-priority one? Explain the reasoning in one sentence.",
-      "expected_facts": ["load only one", "highest priority", "user preference", "do not want to load multiple"]
-    },
-    {
-      "id": "continuation-pr-number-and-status",
-      "type": "continuation",
-      "question": "A pull request was opened for this feature. What is the PR number and what is its merge status?",
-      "expected_facts": ["PR #2301", "merged", "squash"]
-    },
-    {
-      "id": "continuation-test-suite-result",
-      "type": "continuation",
-      "question": "What was the result of the full test suite run after the implementation changes?",
-      "expected_facts": ["5680 passed", "0 failures", "clean"]
-    },
-    {
-      "id": "continuation-next-step",
-      "type": "continuation",
-      "question": "If asked to pick up this session, what is the current state of main? Anything left to do?",
-      "expected_facts": ["merged to main", "main is current", "nothing outstanding", "pulled"]
-    }
-  ]
-}
@@ -1,235 +0,0 @@
-"""Markdown report rendering + diff-against-baseline for compression-eval runs.
-
-Report format is optimised for pasting directly into a PR description.
-Top-of-report table is the per-fixture medians; below that is the
-probe-by-probe miss list (scores < 3.0 on overall).
-
-Diff mode (``compare_to``) emits a second table with deltas per fixture
-per dimension against a previous run directory.
-"""
-from __future__ import annotations
-
-import json
-import statistics
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from rubric import DIMENSIONS
-
-
-def write_run_json(
-    *,
-    results_dir: Path,
-    fixture_name: str,
-    run_index: int,
-    payload: Dict[str, Any],
-) -> Path:
-    """Dump one fixture's per-run results as JSON for later diffing."""
-    results_dir.mkdir(parents=True, exist_ok=True)
-    path = results_dir / f"{fixture_name}-run-{run_index}.json"
-    with path.open("w") as fh:
-        json.dump(payload, fh, indent=2, ensure_ascii=False)
-    return path
-
-
-def _median(values: List[float]) -> float:
-    return statistics.median(values) if values else 0.0
-
-
-def _format_score(value: float) -> str:
-    return f"{value:.2f}"
-
-
-def _format_delta(baseline: float, current: float) -> str:
-    delta = current - baseline
-    if abs(delta) < 0.01:
-        return f"{current:.2f} (±0)"
-    sign = "+" if delta > 0 else ""
-    return f"{current:.2f} ({sign}{delta:.2f})"
-
-
-def summarize_fixture_runs(
-    fixture_runs: List[Dict[str, Any]],
-) -> Dict[str, Any]:
-    """Collapse N runs of one fixture into per-dimension medians + metadata.
-
-    Each run payload is {probes: [{id, type, scores: {...}, overall, ...}]}.
-    Returns {fixture_name, runs, dimension_medians, overall_median, misses}.
-    """
-    if not fixture_runs:
-        return {}
-
-    fixture_name = fixture_runs[0]["fixture_name"]
-    n_runs = len(fixture_runs)
-
-    # Per-probe-per-dimension aggregation across runs
-    probe_ids = [p["id"] for p in fixture_runs[0]["probes"]]
-    per_probe: Dict[str, Dict[str, List[float]]] = {
-        pid: {d: [] for d in DIMENSIONS} for pid in probe_ids
-    }
-    per_probe_overall: Dict[str, List[float]] = {pid: [] for pid in probe_ids}
-
-    for run in fixture_runs:
-        for p in run["probes"]:
-            pid = p["id"]
-            for d in DIMENSIONS:
-                per_probe[pid][d].append(p["scores"].get(d, 0))
-            per_probe_overall[pid].append(p["overall"])
-
-    # Median each probe across runs, then median those medians across probes
-    dim_medians: Dict[str, float] = {}
-    for d in DIMENSIONS:
-        per_probe_med = [_median(per_probe[pid][d]) for pid in probe_ids]
-        dim_medians[d] = _median(per_probe_med)
-    overall_median = _median([_median(per_probe_overall[pid]) for pid in probe_ids])
-
-    # Misses = probes whose median overall < 3.0
-    misses: List[Dict[str, Any]] = []
-    for pid in probe_ids:
-        med = _median(per_probe_overall[pid])
-        if med < 3.0:
-            # Pull the notes from the last run to give the reader a
-            # concrete clue. (Taking the most recent run is fine —
-            # notes vary across runs and any one is illustrative.)
-            notes = ""
-            probe_type = ""
-            for p in fixture_runs[-1]["probes"]:
-                if p["id"] == pid:
-                    notes = p.get("notes", "")
-                    probe_type = p.get("type", "")
-                    break
-            misses.append({
-                "id": pid,
-                "type": probe_type,
-                "overall_median": med,
-                "notes": notes,
-            })
-
-    return {
-        "fixture_name": fixture_name,
-        "runs": n_runs,
-        "dimension_medians": dim_medians,
-        "overall_median": overall_median,
-        "misses": misses,
-        "compression": fixture_runs[0].get("compression", {}),
-    }
-
-
-def render_report(
-    *,
-    label: str,
-    compressor_model: str,
-    judge_model: str,
-    runs_per_fixture: int,
-    summaries: List[Dict[str, Any]],
-    baseline_summaries: Optional[List[Dict[str, Any]]] = None,
-) -> str:
-    """Render the full markdown report.
-
-    baseline_summaries is the same shape as summaries, sourced from a
-    previous run (via --compare-to). When present, dimension scores in
-    the main table render with deltas.
-    """
-    lines: List[str] = []
-    lines.append(f"## Compression eval — label `{label}`")
-    lines.append("")
-    lines.append(f"- Compressor model: `{compressor_model}`")
-    lines.append(f"- Judge model: `{judge_model}`")
-    lines.append(f"- Runs per fixture: {runs_per_fixture}")
-    lines.append("- Medians over runs reported")
-    if baseline_summaries:
-        lines.append("- Deltas shown against baseline run")
-    lines.append("")
-
-    baseline_by_name: Dict[str, Dict[str, Any]] = {}
-    if baseline_summaries:
-        baseline_by_name = {s["fixture_name"]: s for s in baseline_summaries}
-
-    # Main table
-    header = ["Fixture"] + DIMENSIONS + ["overall"]
-    lines.append("| " + " | ".join(header) + " |")
-    lines.append("|" + "|".join(["---"] * len(header)) + "|")
-    for s in summaries:
-        row = [s["fixture_name"]]
-        baseline = baseline_by_name.get(s["fixture_name"])
-        for d in DIMENSIONS:
-            cur = s["dimension_medians"][d]
-            if baseline and d in baseline.get("dimension_medians", {}):
-                row.append(_format_delta(baseline["dimension_medians"][d], cur))
-            else:
-                row.append(_format_score(cur))
-        if baseline:
-            row.append(_format_delta(baseline["overall_median"], s["overall_median"]))
-        else:
-            row.append(_format_score(s["overall_median"]))
-        lines.append("| " + " | ".join(row) + " |")
-    lines.append("")
-
-    # Compression metadata
-    lines.append("### Compression summary")
-    lines.append("")
-    lines.append("| Fixture | Pre tokens | Post tokens | Ratio | Pre msgs | Post msgs |")
-    lines.append("|---|---|---|---|---|---|")
-    for s in summaries:
-        c = s.get("compression", {})
-        lines.append(
-            "| {name} | {pre} | {post} | {ratio:.1%} | {pm} | {pom} |".format(
-                name=s["fixture_name"],
-                pre=c.get("pre_tokens", 0),
-                post=c.get("post_tokens", 0),
-                ratio=c.get("compression_ratio", 0.0),
-                pm=c.get("pre_message_count", 0),
-                pom=c.get("post_message_count", 0),
-            )
-        )
-    lines.append("")
-
-    # Per-probe misses
-    any_misses = any(s["misses"] for s in summaries)
-    if any_misses:
-        lines.append("### Probes scoring below 3.0 overall (median)")
-        lines.append("")
-        for s in summaries:
-            if not s["misses"]:
-                continue
-            lines.append(f"**{s['fixture_name']}**")
-            for m in s["misses"]:
-                note_part = f" — {m['notes']}" if m["notes"] else ""
-                lines.append(
-                    f"- `{m['id']}` ({m['type']}): "
-                    f"{m['overall_median']:.2f}{note_part}"
-                )
-            lines.append("")
-
-    lines.append("### Methodology")
-    lines.append("")
-    lines.append(
-        "Probe-based eval adapted from "
-        "https://factory.ai/news/evaluating-compression. Each fixture is "
-        "compressed in a single forced `ContextCompressor.compress()` call, "
-        "then a continuation call asks the compressor model to answer each "
-        "probe from the compressed state, then the judge model scores the "
-        "answer 0-5 on six dimensions. A single run is noisy; medians "
-        "across multiple runs are the meaningful signal. Changes below "
-        "~0.3 on any dimension are likely within run-to-run noise."
-    )
-    return "\n".join(lines) + "\n"
-
-
-def load_baseline_summaries(baseline_dir: Path) -> List[Dict[str, Any]]:
-    """Load summaries from a previous eval run for --compare-to.
-
-    Reads the dumped per-run JSONs and re-summarises them so the
-    aggregation matches whatever summariser was current at the time of
-    the new run (forward-compatible with schema additions).
-    """
-    if not baseline_dir.exists():
-        raise FileNotFoundError(f"baseline dir not found: {baseline_dir}")
-
-    by_fixture: Dict[str, List[Dict[str, Any]]] = {}
-    for path in sorted(baseline_dir.glob("*-run-*.json")):
-        with path.open() as fh:
-            payload = json.load(fh)
-        by_fixture.setdefault(payload["fixture_name"], []).append(payload)
-
-    return [summarize_fixture_runs(runs) for runs in by_fixture.values()]
@@ -1,198 +0,0 @@
-"""Rubric for probe-based compression eval grading.
-
-Six dimensions scored 0-5 by a judge model. The scoring anchors are spelled
-out so the judge interpretation is stable across runs and across judge
-models.
-
-Adapted from the methodology in
-https://factory.ai/news/evaluating-compression. Their scoreboard is not
-adopted; only the dimension definitions and the 0-5 scale.
-"""
-from __future__ import annotations
-
-from typing import Any, Dict, List
-
-# Canonical dimension order. All reports, parsers, and comparisons derive
-# from this list — do not hardcode the order elsewhere.
-DIMENSIONS: List[str] = [
-    "accuracy",
-    "context_awareness",
-    "artifact_trail",
-    "completeness",
-    "continuity",
-    "instruction_following",
-]
-
-DIMENSION_DESCRIPTIONS: Dict[str, str] = {
-    "accuracy": (
-        "Are concrete facts correct — file paths, function names, PR/issue "
-        "numbers, error codes, command outputs, line numbers? A single wrong "
-        "path or error code should cost points. Vague but non-contradicting "
-        "answers score mid-range."
-    ),
-    "context_awareness": (
-        "Does the answer reflect the CURRENT state of the session, not a "
-        "mid-session snapshot? For example, if a file was modified then "
-        "reverted, does the answer describe the reverted state? If three "
-        "PRs were opened, does the answer know which was merged?"
-    ),
-    "artifact_trail": (
-        "Does the answer correctly enumerate the artifacts (files read, "
-        "files modified, commands run, tools called, PRs opened, cron jobs "
-        "created)? Missing artifacts cost more than extra unrelated ones."
-    ),
-    "completeness": (
-        "Does the answer address ALL parts of the probe question? If the "
-        "probe asks for three things and only two are answered, that is "
-        "incomplete regardless of accuracy on the two."
-    ),
-    "continuity": (
-        "Could the next assistant continue the work using only this answer, "
-        "without having to re-fetch files or re-explore the codebase? An "
-        "answer that lists files by name but doesn't mention the change is "
-        "poor continuity even if accurate."
-    ),
-    "instruction_following": (
-        "Is the answer in the format the probe requested (list, number, "
-        "short phrase, yes/no)? Ignore tone and length, only assess "
-        "whether the requested form was honoured."
-    ),
-}
-
-SCORE_SCALE: Dict[int, str] = {
-    0: "No useful information; wrong or hallucinated.",
-    1: "Major gaps or a key fact is wrong.",
-    2: "Partially correct but significant omissions.",
-    3: "Mostly correct with minor omissions or imprecision.",
-    4: "Correct and complete with only trivial imprecision.",
-    5: "Fully correct, complete, and in the requested format.",
-}
-
-
-_RUBRIC_HEADER = """You are an evaluator grading a single answer produced by an AI assistant \
-that was given a COMPRESSED handoff summary of an earlier conversation and \
-asked a probe question. You are NOT evaluating the compression summary \
-directly — you are evaluating whether the answer the assistant produced \
-from that summary is correct, complete, and useful.
-
-Grade on six dimensions, each 0-5:
-
-{dimension_block}
-
-0-5 scale:
-{scale_block}
-
-Grade strictly. Fractional scores are NOT allowed — output integers only. \
-If the answer is ambiguous, use the lower of the two candidate scores."""
-
-
-def build_judge_prompt(
-    *,
-    probe_question: str,
-    probe_type: str,
-    expected_facts: List[str],
-    assistant_answer: str,
-) -> str:
-    """Build the full judge prompt for one (probe, answer) pair.
-
-    The judge is told the expected_facts up front so grading is anchored to
-    concrete signal rather than judge taste. Expected facts are intentionally
-    NOT shown to the assistant that produces the answer.
-    """
-    dim_block = "\n".join(
-        f"- {d}: {DIMENSION_DESCRIPTIONS[d]}" for d in DIMENSIONS
-    )
-    scale_block = "\n".join(
-        f"  {score}: {desc}" for score, desc in sorted(SCORE_SCALE.items())
-    )
-    header = _RUBRIC_HEADER.format(
-        dimension_block=dim_block,
-        scale_block=scale_block,
-    )
-
-    expected_block = (
-        "\n".join(f"- {f}" for f in expected_facts) if expected_facts else "(none provided)"
-    )
-
-    output_schema = (
-        "Respond with ONLY a JSON object, no prose before or after, matching "
-        "this schema exactly:\n"
-        "{\n"
-        '  "accuracy": <int 0-5>,\n'
-        '  "context_awareness": <int 0-5>,\n'
-        '  "artifact_trail": <int 0-5>,\n'
-        '  "completeness": <int 0-5>,\n'
-        '  "continuity": <int 0-5>,\n'
-        '  "instruction_following": <int 0-5>,\n'
-        '  "notes": "<one short sentence, <=200 chars, identifying the '
-        'single biggest issue with the answer if any>"\n'
-        "}"
-    )
-
-    return (
-        f"{header}\n\n"
-        f"PROBE TYPE: {probe_type}\n\n"
-        f"PROBE QUESTION:\n{probe_question}\n\n"
-        f"EXPECTED FACTS (the answer should contain these concrete anchors; "
-        f"missing any is a material defect in accuracy and/or completeness):\n"
-        f"{expected_block}\n\n"
-        f"ASSISTANT ANSWER TO GRADE:\n{assistant_answer}\n\n"
-        f"{output_schema}"
-    )
-
-
-def parse_judge_response(raw: str) -> Dict[str, Any]:
-    """Parse the judge model's JSON response into a score dict.
-
-    Tolerates surrounding prose (judges ignore instructions sometimes) by
-    extracting the first {...} block. Validates that every dimension is
-    present as an integer 0-5.
-
-    Returns dict with keys: scores (dim->int), notes (str), overall (float).
-    Raises ValueError if the response cannot be parsed into a complete
-    score set.
-    """
-    import json
-    import re
-
-    if not raw or not raw.strip():
-        raise ValueError("empty judge response")
-
-    # Strip code fences and any ```json prefix judges sometimes emit.
-    stripped = raw.strip()
-    fence_match = re.match(r"^```(?:json)?\s*(.*?)\s*```$", stripped, re.DOTALL)
-    if fence_match:
-        stripped = fence_match.group(1).strip()
-
-    # Extract the first {...} block greedy-to-matching-brace.
-    brace_match = re.search(r"\{.*\}", stripped, re.DOTALL)
-    if not brace_match:
-        raise ValueError(f"no JSON object found in judge response: {raw[:200]!r}")
-    candidate = brace_match.group(0)
-
-    try:
-        parsed = json.loads(candidate)
-    except json.JSONDecodeError as exc:
-        raise ValueError(f"judge response not valid JSON: {exc}; raw={candidate[:200]!r}")
-
-    scores: Dict[str, int] = {}
-    for dim in DIMENSIONS:
-        if dim not in parsed:
-            raise ValueError(f"judge response missing dimension {dim!r}: {parsed}")
-        value = parsed[dim]
-        if isinstance(value, bool) or not isinstance(value, (int, float)):
-            raise ValueError(f"dimension {dim} is not numeric: {value!r}")
-        int_val = int(round(value))
-        if int_val < 0 or int_val > 5:
-            raise ValueError(f"dimension {dim} out of range: {int_val}")
-        scores[dim] = int_val
-
-    notes_val = parsed.get("notes", "")
-    notes = str(notes_val)[:200] if notes_val else ""
-
-    overall = sum(scores.values()) / len(scores)
-    return {
-        "scores": scores,
-        "notes": notes,
-        "overall": overall,
-    }
@@ -1,383 +0,0 @@
-#!/usr/bin/env python3
-"""Compression eval — entry point.
-
-Runs the full probe-based eval over one or more fixtures, produces a
-markdown report in ``results/<label>/report.md`` paired with per-run JSON
-for later diffing.
-
-Not a pytest. Requires a configured provider + credentials (same path the
-agent uses). Does not run in CI. See README.md for usage examples.
-"""
-from __future__ import annotations
-
-import json
-import logging
-import sys
-import time
-from datetime import datetime
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-_HERE = Path(__file__).resolve().parent
-_REPO_ROOT = _HERE.parents[1]
-if str(_REPO_ROOT) not in sys.path:
-    sys.path.insert(0, str(_REPO_ROOT))
-# Make our sibling modules importable whether invoked as a script or as -m.
-if str(_HERE) not in sys.path:
-    sys.path.insert(0, str(_HERE))
-
-try:
-    import fire  # noqa: F401
-except ImportError:
-    fire = None  # fallback to argparse if fire is unavailable
-
-from hermes_cli.runtime_provider import resolve_runtime_provider  # noqa: E402
-
-from compressor_driver import run_compression  # noqa: E402
-from grader import answer_probe, grade_probe  # noqa: E402
-from report import (  # noqa: E402
-    load_baseline_summaries,
-    render_report,
-    summarize_fixture_runs,
-    write_run_json,
-)
-
-logger = logging.getLogger("compression_eval")
-
-
-FIXTURES_DIR = _HERE / "fixtures"
-PROBES_DIR = _HERE / "probes"
-RESULTS_DIR = _HERE / "results"
-
-
-def _load_fixture(name: str) -> Dict[str, Any]:
-    path = FIXTURES_DIR / f"{name}.json"
-    if not path.exists():
-        available = sorted(p.stem for p in FIXTURES_DIR.glob("*.json"))
-        raise FileNotFoundError(
-            f"Fixture not found: {name}. Available: {available}"
-        )
-    with path.open() as fh:
-        return json.load(fh)
-
-
-def _load_probes(name: str) -> Dict[str, Any]:
-    path = PROBES_DIR / f"{name}.probes.json"
-    if not path.exists():
-        raise FileNotFoundError(f"Probe bank not found for fixture {name}: {path}")
-    with path.open() as fh:
-        return json.load(fh)
-
-
-def _resolve_runtime(
-    *,
-    provider_override: Optional[str],
-    model_override: Optional[str],
-) -> Dict[str, Any]:
-    """Resolve provider credentials via the same path the agent uses."""
-    runtime = resolve_runtime_provider(
-        requested=provider_override,
-        target_model=model_override,
-    )
-    if not runtime.get("api_key") and not runtime.get("base_url"):
-        raise RuntimeError(
-            "No provider configured. Run `hermes setup` or set provider "
-            "credentials in the environment before running the eval."
-        )
-    return runtime
-
-
-def _available_fixtures() -> List[str]:
-    return sorted(p.stem for p in FIXTURES_DIR.glob("*.json"))
-
-
-def _run_one_fixture(
-    *,
-    fixture_name: str,
-    run_index: int,
-    compressor_runtime: Dict[str, Any],
-    compressor_model: str,
-    judge_runtime: Dict[str, Any],
-    judge_model: str,
-    focus_topic: Optional[str],
-) -> Dict[str, Any]:
-    fx = _load_fixture(fixture_name)
-    probes = _load_probes(fixture_name)
-
-    logger.info(
-        "[%s run=%d] compressing (%d messages, ctx=%d)",
-        fixture_name, run_index, len(fx["messages"]), fx["context_length"],
-    )
-    compression = run_compression(
-        messages=fx["messages"],
-        compressor_model=compressor_model,
-        compressor_provider=compressor_runtime["provider"],
-        compressor_base_url=compressor_runtime["base_url"],
-        compressor_api_key=compressor_runtime["api_key"],
-        compressor_api_mode=compressor_runtime.get("api_mode", ""),
-        context_length=fx["context_length"],
-        focus_topic=focus_topic,
-        # Force the compressor to use the model we're testing, bypassing
-        # any auxiliary.compression.model config override. Without this,
-        # ContextCompressor.call_llm(task="compression") routes through
-        # the user's config which may pin a different model (e.g.
-        # google/gemini-3-flash-preview).
-        summary_model_override=compressor_model,
-    )
-    logger.info(
-        "[%s run=%d] compressed %d -> %d tokens (%.1f%%)",
-        fixture_name, run_index,
-        compression["pre_tokens"], compression["post_tokens"],
-        compression["compression_ratio"] * 100,
-    )
-
-    probe_results: List[Dict[str, Any]] = []
-    for probe in probes["probes"]:
-        t0 = time.monotonic()
-        try:
-            answer = answer_probe(
-                compressed_messages=compression["compressed_messages"],
-                probe_question=probe["question"],
-                provider=compressor_runtime["provider"],
-                model=compressor_model,
-                base_url=compressor_runtime["base_url"],
-                api_key=compressor_runtime["api_key"],
-            )
-        except Exception as exc:
-            logger.warning(
-                "[%s run=%d probe=%s] continuation failed: %s",
-                fixture_name, run_index, probe["id"], exc,
-            )
-            answer = ""
-
-        try:
-            grade = grade_probe(
-                probe_question=probe["question"],
-                probe_type=probe["type"],
-                expected_facts=probe.get("expected_facts", []),
-                assistant_answer=answer,
-                judge_provider=judge_runtime["provider"],
-                judge_model=judge_model,
-                judge_base_url=judge_runtime["base_url"],
-                judge_api_key=judge_runtime["api_key"],
-            )
-        except Exception as exc:
-            logger.warning(
-                "[%s run=%d probe=%s] grading failed: %s",
-                fixture_name, run_index, probe["id"], exc,
-            )
-            from rubric import DIMENSIONS
-            grade = {
-                "scores": {d: 0 for d in DIMENSIONS},
-                "notes": f"grading error: {exc}",
-                "overall": 0.0,
-                "raw": "",
-                "parse_error": str(exc),
-            }
-
-        elapsed = time.monotonic() - t0
-        logger.info(
-            "[%s run=%d probe=%s] overall=%.2f (%.1fs)",
-            fixture_name, run_index, probe["id"], grade["overall"], elapsed,
-        )
-
-        probe_results.append({
-            "id": probe["id"],
-            "type": probe["type"],
-            "question": probe["question"],
-            "expected_facts": probe.get("expected_facts", []),
-            "answer": answer,
-            "scores": grade["scores"],
-            "overall": grade["overall"],
-            "notes": grade["notes"],
-            "parse_error": grade["parse_error"],
-            "elapsed_seconds": elapsed,
-        })
-
-    return {
-        "fixture_name": fixture_name,
-        "run_index": run_index,
-        "compression": {
-            "pre_tokens": compression["pre_tokens"],
-            "post_tokens": compression["post_tokens"],
-            "compression_ratio": compression["compression_ratio"],
-            "pre_message_count": compression["pre_message_count"],
-            "post_message_count": compression["post_message_count"],
-            "summary_text": compression["summary_text"],
-        },
-        "probes": probe_results,
-    }
-
-
-def _coerce_fixtures_arg(arg: Optional[str]) -> List[str]:
-    if not arg:
-        return _available_fixtures()
-    return [s.strip() for s in arg.split(",") if s.strip()]
-
-
-def main(
-    fixtures: Optional[str] = None,
-    runs: int = 3,
-    judge_model: Optional[str] = None,
-    judge_provider: Optional[str] = None,
-    compressor_model: Optional[str] = None,
-    compressor_provider: Optional[str] = None,
-    label: Optional[str] = None,
-    focus_topic: Optional[str] = None,
-    compare_to: Optional[str] = None,
-    verbose: bool = False,
-) -> int:
-    """Run the compression eval.
-
-    Args:
-        fixtures: Comma-separated fixture names; default = all in fixtures/.
-        runs: Runs per fixture. Medians reported. Default 3.
-        judge_model: Override the judge model (default = same as
-            compressor model resolved from config).
-        judge_provider: Override the judge provider.
-        compressor_model: Override the compressor model (default =
-            whatever resolve_runtime_provider returns for the active
-            configuration).
-        compressor_provider: Override the compressor provider.
-        label: Output subdirectory under results/. Default = timestamp.
-        focus_topic: Optional focus topic passed through to
-            ContextCompressor.compress(focus_topic=...).
-        compare_to: Path to a previous run directory (e.g.
-            results/2026-04-24_baseline) to diff against in the report.
-        verbose: Print debug logs.
-    """
-    logging.basicConfig(
-        level=logging.DEBUG if verbose else logging.INFO,
-        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
-    )
-
-    fixture_names = _coerce_fixtures_arg(fixtures)
-    # Validate every fixture has a probe bank before spending any money.
-    for name in fixture_names:
-        _load_fixture(name)
-        _load_probes(name)
-
-    compressor_runtime = _resolve_runtime(
-        provider_override=compressor_provider,
-        model_override=compressor_model,
-    )
-    effective_compressor_model = (
-        compressor_model or compressor_runtime.get("resolved_model") or "auto"
-    )
-    if effective_compressor_model == "auto":
-        # resolve_runtime_provider doesn't always fill resolved_model;
-        # fall back to reading model.default from config.
-        from hermes_cli.config import load_config
-        cfg = load_config()
-        mc = cfg.get("model", {}) or {}
-        if isinstance(mc, dict):
-            effective_compressor_model = (
-                mc.get("default") or mc.get("model") or "anthropic/claude-sonnet-4.6"
-            )
-        else:
-            effective_compressor_model = str(mc) or "anthropic/claude-sonnet-4.6"
-
-    if judge_provider or judge_model:
-        judge_runtime = _resolve_runtime(
-            provider_override=judge_provider,
-            model_override=judge_model,
-        )
-        effective_judge_model = judge_model or effective_compressor_model
-    else:
-        judge_runtime = compressor_runtime
-        effective_judge_model = effective_compressor_model
-
-    effective_label = label or datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    out_dir = RESULTS_DIR / effective_label
-    out_dir.mkdir(parents=True, exist_ok=True)
-
-    logger.info(
-        "Compression eval starting: label=%s fixtures=%s runs=%d "
-        "compressor=%s judge=%s out=%s",
-        effective_label, fixture_names, runs,
-        effective_compressor_model, effective_judge_model, out_dir,
-    )
-
-    all_summaries: List[Dict[str, Any]] = []
-    for fixture_name in fixture_names:
-        per_run: List[Dict[str, Any]] = []
-        for run_i in range(1, runs + 1):
-            payload = _run_one_fixture(
-                fixture_name=fixture_name,
-                run_index=run_i,
-                compressor_runtime=compressor_runtime,
-                compressor_model=effective_compressor_model,
-                judge_runtime=judge_runtime,
-                judge_model=effective_judge_model,
-                focus_topic=focus_topic,
-            )
-            write_run_json(
-                results_dir=out_dir,
-                fixture_name=fixture_name,
-                run_index=run_i,
-                payload=payload,
-            )
-            per_run.append(payload)
-        summary = summarize_fixture_runs(per_run)
-        all_summaries.append(summary)
-
-    baseline_summaries: Optional[List[Dict[str, Any]]] = None
-    if compare_to:
-        baseline_path = Path(compare_to)
-        if not baseline_path.is_absolute():
-            baseline_path = _HERE / baseline_path
-        baseline_summaries = load_baseline_summaries(baseline_path)
-
-    report_md = render_report(
-        label=effective_label,
-        compressor_model=effective_compressor_model,
-        judge_model=effective_judge_model,
-        runs_per_fixture=runs,
-        summaries=all_summaries,
-        baseline_summaries=baseline_summaries,
-    )
-    report_path = out_dir / "report.md"
-    report_path.write_text(report_md)
-
-    # Also write a machine-readable summary.json alongside the human report.
-    summary_path = out_dir / "summary.json"
-    with summary_path.open("w") as fh:
-        json.dump(
-            {
-                "label": effective_label,
-                "compressor_model": effective_compressor_model,
-                "judge_model": effective_judge_model,
-                "runs_per_fixture": runs,
-                "fixtures": all_summaries,
-            },
-            fh,
-            indent=2,
-            ensure_ascii=False,
-        )
-
-    print()
-    print(report_md)
-    print(f"Report written to {report_path}")
-    print(f"Per-run JSON in {out_dir}")
-    return 0
-
-
-if __name__ == "__main__":
-    if fire is not None:
-        # fire preserves docstrings as --help and handles kwarg-style CLI.
-        sys.exit(fire.Fire(main))
-    else:
-        import argparse
-        p = argparse.ArgumentParser()
-        p.add_argument("--fixtures")
-        p.add_argument("--runs", type=int, default=3)
-        p.add_argument("--judge-model", dest="judge_model")
-        p.add_argument("--judge-provider", dest="judge_provider")
-        p.add_argument("--compressor-model", dest="compressor_model")
-        p.add_argument("--compressor-provider", dest="compressor_provider")
-        p.add_argument("--label")
-        p.add_argument("--focus-topic", dest="focus_topic")
-        p.add_argument("--compare-to", dest="compare_to")
-        p.add_argument("--verbose", action="store_true")
-        args = p.parse_args()
-        sys.exit(main(**vars(args)))
@@ -1,381 +0,0 @@
-"""One-shot fixture scrubber for scripts/compression_eval/fixtures/.
-
-Source: ~/.hermes/sessions/<file>.jsonl
-Output: .worktrees/.../scripts/compression_eval/fixtures/<name>.json
-
-Scrubbing passes:
-  1. agent.redact.redact_sensitive_text — API keys, tokens, connection strings
-  2. Username paths — /home/teknium/ → /home/user/, ~/.hermes/ preserved as-is
-     (that path is universal)
-  3. Personal handles — "Teknium"/"teknium"/"teknium1" → "user"
-  4. Reasoning scratchpads — strip <REASONING_SCRATCHPAD>...</REASONING_SCRATCHPAD>
-     blocks and <think>...</think> tags (personality leakage risk)
-  5. session_meta line — drop entirely, we only need the messages
-  6. User message personality — lightly paraphrase the first user message to keep
-     task intent while removing "vibe"; subsequent user turns kept verbatim
-     since they're short instructions
-
-The fixture format matches DESIGN.md:
-  {
-    "name": "...",
-    "description": "...",
-    "model": "...",           # best guess from original session
-    "context_length": 200000,
-    "messages": [...],        # OpenAI-format, only role/content/tool_calls/tool_call_id/tool_name
-    "notes": "Scrubbed from ~/.hermes/sessions/... on 2026-04-24"
-  }
-"""
-from __future__ import annotations
-
-import json
-import re
-import sys
-from datetime import datetime
-from pathlib import Path
-from typing import Any, Dict, List
-
-# Resolve the hermes-agent checkout relative to this script so agent.redact
-# imports cleanly whether we run from a worktree or a main clone.
-_REPO_ROOT = Path(__file__).resolve().parents[2]
-sys.path.insert(0, str(_REPO_ROOT))
-from agent.redact import redact_sensitive_text  # noqa: E402
-
-
-SESSION_DIR = Path.home() / ".hermes" / "sessions"
-# Resolve FIXTURES_DIR relative to this script so the scrubber runs the
-# same way inside a worktree, a main checkout, or from a contributor's
-# clone at a different path.
-FIXTURES_DIR = Path(__file__).resolve().parent / "fixtures"
-
-# (source_file, output_name, description, user_first_paraphrase, model_guess, context_length, truncate_at)
-# truncate_at: keep messages[:truncate_at] (None = keep all). Applied BEFORE
-# orphan-empty-assistant cleanup.
-SPECS = [
-    (
-        "20260321_060441_fef7be92.jsonl",
-        "feature-impl-context-priority",
-        "~75-turn feature-impl: user asks how multiple project-context files "
-        "(.hermes.md / AGENTS.md / CLAUDE.md / .cursorrules) are handled when "
-        "all are present; agent investigates the codebase, designs a priority "
-        "order, patches the loader + tests, live-tests with a scenario "
-        "directory, commits to a feature branch, opens a PR, and merges after "
-        "approval. Exercises investigate → decide → implement → verify → "
-        "ship flow with clear artifact trail (2 files modified, 1 PR).",
-        (
-            "If .hermes.md, AGENTS.md, CLAUDE.md, and .cursorrules all exist in "
-            "the same directory, does the agent load all of them or pick one? "
-            "Use the hermes-agent-dev skill to check."
-        ),
-        "anthropic/claude-sonnet-4.6",
-        200000,
-        74,  # cut at "Merged and pulled. Main is current." — drops trailing unrelated cron-delivery messages
-    ),
-    (
-        "20260412_233741_3f2119a8.jsonl",
-        "debug-session-feishu-id-model",
-        "~60-turn debug/triage PR-review session: a third-party bug report "
-        "says the gateway's Feishu adapter misuses the open_id / union_id / "
-        "user_id identity model (open_id is app-scoped, not the bot's "
-        "canonical ID). An open community PR (#8388) tries to fix it. Agent "
-        "reviews the PR against current main, fetches upstream Feishu/Lark "
-        "identity docs, and produces a decision. Exercises long tool-heavy "
-        "context with PR diffs, upstream docs, and a clear decision at the "
-        "end — the classic 'can the summary still name the PR number, the "
-        "root cause, and the decision?' scenario.",
-        (
-            "A community user reports the Feishu/Lark adapter gets the identity "
-            "model wrong — open_id is app-scoped, not the bot's canonical ID. "
-            "There's an open PR #8388 trying to fix it. Use the hermes-agent-dev "
-            "skill and the pr-triage-salvage skill to review it."
-        ),
-        "anthropic/claude-sonnet-4.6",
-        200000,
-        58,  # end at "Here's my review: ..." — clean decision point before the "close it, implement cleaner" pivot
-    ),
-    (
-        "20260328_160817_77bd258b.jsonl",
-        "config-build-competitive-scouts",
-        "~60-turn iterative config/build session: user wants a set of weekly "
-        "cron jobs that scan competing AI coding agents (openclaw, nanoclaw, "
-        "ironclaw, codex, opencode, claude-code, kilo-code, gemini-cli, "
-        "cline, aider, roo) for merged PRs or web updates worth porting to "
-        "hermes-agent. User adds one target per turn; agent creates each cron "
-        "job and re-states the accumulated schedule. Exercises artifact trail "
-        "(which jobs are configured, which days) and iterative state "
-        "accumulation — the canonical case for iterative-merge summarization.",
-        (
-            "Set up a cron job for the agent every Sunday to scan all PRs "
-            "merged into openclaw that week, decide which are worth adding to "
-            "hermes-agent, and open PRs porting those features."
-        ),
-        "anthropic/claude-sonnet-4.6",
-        200000,
-        None,
-    ),
-]
-
-
-# Tool output truncation is DELIBERATELY DISABLED.
-#
-# An earlier iteration truncated tool outputs > 2KB to keep fixture JSON
-# files small, but that defeats the whole purpose of the eval. Real
-# sessions have 30KB skill_view dumps, 10KB read_file outputs, 5KB
-# web_extract bodies — compression has to either head-protect them,
-# summarize them, or drop them. A fixture without that load doesn't
-# exercise the compressor. The size win wasn't worth the signal loss.
-#
-# The function remains so the scrubbing_passes record in the fixture
-# JSON continues to truthfully describe what was applied (no-op in this
-# configuration).
-_TOOL_OUTPUT_MAX = None  # None disables truncation entirely
-
-
-def _maybe_truncate_tool_output(text: str, tool_name: str) -> str:
-    if _TOOL_OUTPUT_MAX is None or not text or len(text) <= _TOOL_OUTPUT_MAX:
-        return text
-    keep = _TOOL_OUTPUT_MAX - 200
-    head = text[:keep]
-    return (
-        head
-        + f"\n\n[... tool output truncated for fixture — original was {len(text)} chars"
-        + (f" from {tool_name}" if tool_name else "")
-        + "]"
-    )
-
-
-_PATH_RE = re.compile(r"/home/teknium\b")
-# No \b boundaries — some tool content stores newlines as the literal
-# two-char sequence "\\n" (escaped JSON), so a "\\nTeknium..." run has a
-# word char ('n') immediately before 'T' and \b fails. Substring match is
-# safer here; "Teknium" as a substring of an unrelated word is
-# implausible in this corpus.
-_USER_RE = re.compile(r"teknium1|Teknium|teknium", re.IGNORECASE)
-# Only strip scratchpads in ASSISTANT content, not tool results (might be legit)
-_SCRATCH_RE = re.compile(
-    r"<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>\s*", re.DOTALL
-)
-_THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
-# Discord/Telegram user mention leakage in messaging-platform sessions
-_USER_MENTION_RE = re.compile(r"<@\*{3}>|<@\d+>")
-# Contributor emails (from git show output etc) — anything@domain.tld
-# Keep noreply@github-style placeholders obvious; real personal emails get
-# replaced with a contributor placeholder.
-_EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
-# "Author: Name <email>" git-show headers — rewrite the whole line
-_GIT_AUTHOR_RE = re.compile(r"Author:\s*[^<\n]+<[^>]+>")
-
-
-def _scrub_text(text: str, *, drop_scratchpads: bool = False) -> str:
-    """Apply the pipeline to a raw text string.
-
-    drop_scratchpads only affects assistant messages — tool outputs that
-    happen to contain similar markers are left alone.
-    """
-    if not text:
-        return text
-    if drop_scratchpads:
-        text = _SCRATCH_RE.sub("", text)
-        text = _THINK_RE.sub("", text)
-    text = _PATH_RE.sub("/home/user", text)
-    text = _USER_RE.sub("user", text)
-    text = _USER_MENTION_RE.sub("<@user>", text)
-    # Rewrite git "Author: Name <email>" lines before generic email replace
-    text = _GIT_AUTHOR_RE.sub("Author: contributor <contributor@example.com>", text)
-    text = _EMAIL_RE.sub("contributor@example.com", text)
-    text = redact_sensitive_text(text)
-    return text
-
-
-def _content_to_str(content: Any) -> str:
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        parts = []
-        for p in content:
-            if isinstance(p, dict) and "text" in p:
-                parts.append(p["text"])
-            elif isinstance(p, str):
-                parts.append(p)
-        return "\n".join(parts)
-    return str(content)
-
-
-def _scrub_tool_calls(tool_calls: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    out = []
-    for tc in tool_calls or []:
-        if not isinstance(tc, dict):
-            continue
-        fn = tc.get("function", {}) or {}
-        args = fn.get("arguments", "")
-        if isinstance(args, str):
-            args = _scrub_text(args)
-        new_tc = {
-            "id": tc.get("id", ""),
-            "type": tc.get("type", "function"),
-            "function": {
-                "name": fn.get("name", ""),
-                "arguments": args,
-            },
-        }
-        out.append(new_tc)
-    return out
-
-
-def _scrub_message(m: Dict[str, Any], *, first_user_paraphrase: str | None, user_turn_idx: List[int]) -> Dict[str, Any] | None:
-    role = m.get("role")
-    if role in (None, "session_meta"):
-        return None
-
-    content = _content_to_str(m.get("content"))
-
-    if role == "assistant":
-        content = _scrub_text(content, drop_scratchpads=True)
-    elif role == "user":
-        # Use paraphrase for the very first user turn only
-        user_turn_idx[0] += 1
-        if user_turn_idx[0] == 1 and first_user_paraphrase is not None:
-            content = first_user_paraphrase
-        else:
-            content = _scrub_text(content)
-    else:
-        content = _scrub_text(content)
-        # Truncate large tool outputs
-        if role == "tool":
-            tn = m.get("tool_name") or m.get("name") or ""
-            content = _maybe_truncate_tool_output(content, tn)
-
-    new_msg: Dict[str, Any] = {"role": role, "content": content}
-
-    if role == "assistant":
-        tcs = m.get("tool_calls") or []
-        if tcs:
-            new_msg["tool_calls"] = _scrub_tool_calls(tcs)
-    if role == "tool":
-        if m.get("tool_call_id"):
-            new_msg["tool_call_id"] = m["tool_call_id"]
-        if m.get("tool_name") or m.get("name"):
-            new_msg["tool_name"] = m.get("tool_name") or m.get("name")
-
-    return new_msg
-
-
-def build_fixture(
-    source_file: str,
-    output_name: str,
-    description: str,
-    first_user_paraphrase: str,
-    model_guess: str,
-    context_length: int,
-    truncate_at: int | None = None,
-) -> Dict[str, Any]:
-    src = SESSION_DIR / source_file
-    raw_msgs: List[Dict[str, Any]] = []
-    with src.open() as fh:
-        for line in fh:
-            try:
-                raw_msgs.append(json.loads(line))
-            except Exception:
-                pass
-
-    # Skip session_meta lines up front so truncate_at counts real messages
-    raw_msgs = [m for m in raw_msgs if m.get("role") != "session_meta"]
-    if truncate_at is not None:
-        raw_msgs = raw_msgs[:truncate_at]
-
-    user_turn_counter = [0]
-    scrubbed: List[Dict[str, Any]] = []
-    for m in raw_msgs:
-        new = _scrub_message(
-            m,
-            first_user_paraphrase=first_user_paraphrase,
-            user_turn_idx=user_turn_counter,
-        )
-        if new is not None:
-            scrubbed.append(new)
-
-    # Drop empty-content assistant messages that have no tool_calls
-    # (artifact of scratchpad-only turns post-scrub)
-    pruned: List[Dict[str, Any]] = []
-    for m in scrubbed:
-        if (
-            m["role"] == "assistant"
-            and not (m.get("content") or "").strip()
-            and not m.get("tool_calls")
-        ):
-            continue
-        pruned.append(m)
-    # Trim trailing orphan tool messages (no matching assistant)
-    while pruned and pruned[-1]["role"] == "tool":
-        pruned.pop()
-    scrubbed = pruned
-
-    # Inject a synthetic public-safe system message so the compressor has
-    # a head to anchor on. The real system prompts embed personality and
-    # platform-specific content we don't want checked in.
-    system_msg = {
-        "role": "system",
-        "content": (
-            "You are a helpful AI coding assistant with access to tools "
-            "(terminal, file editing, search, web, etc.). You operate in a "
-            "conversational loop: the user gives you a task, you call tools "
-            "to accomplish it, and you report back concisely."
-        ),
-    }
-    if scrubbed and scrubbed[0].get("role") == "system":
-        scrubbed[0] = system_msg
-    else:
-        scrubbed.insert(0, system_msg)
-
-    fixture = {
-        "name": output_name,
-        "description": description,
-        "model": model_guess,
-        "context_length": context_length,
-        "source": f"~/.hermes/sessions/{source_file}",
-        "truncated_to": truncate_at,
-        "scrubbed_at": datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
-        "scrubbing_passes": [
-            "redact_sensitive_text (agent.redact)",
-            "username paths replaced with /home/user",
-            "personal handles (all case variants of the maintainer name) replaced with 'user'",
-            "email addresses replaced with contributor@example.com",
-            "git 'Author: Name <addr>' header lines normalised",
-            "reasoning scratchpad blocks stripped from assistant content",
-            "think tag blocks stripped from assistant content",
-            "messaging-platform user mentions replaced with <@user>",
-            "first user message paraphrased to remove personal voice",
-            "subsequent user messages kept verbatim (after above redactions)",
-            "system prompt replaced with generic public-safe placeholder",
-            "orphan empty-assistant messages and trailing tool messages dropped",
-            "tool outputs preserved verbatim (truncation disabled so the compressor sees real load)",
-        ],
-        "messages": scrubbed,
-    }
-    return fixture
-
-
-def main() -> int:
-    FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
-    for spec in SPECS:
-        source_file, output_name, description, paraphrase, model, ctx, truncate = spec
-        fixture = build_fixture(
-            source_file=source_file,
-            output_name=output_name,
-            description=description,
-            first_user_paraphrase=paraphrase,
-            model_guess=model,
-            context_length=ctx,
-            truncate_at=truncate,
-        )
-        out_path = FIXTURES_DIR / f"{output_name}.json"
-        with out_path.open("w") as fh:
-            json.dump(fixture, fh, indent=2, ensure_ascii=False)
-        size_kb = out_path.stat().st_size / 1024
-        print(f"  {output_name}.json  {size_kb:.1f} KB  {len(fixture['messages'])} msgs")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -29,10 +29,25 @@ BOLD='\033[1m'
 REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
 REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
 HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
-INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
+# INSTALL_DIR is resolved AFTER arg parsing and OS detection so we can pick an
+# FHS-style layout for root installs.  Track whether the user gave us an
+# explicit directory — if so we never override it.
+if [ -n "${HERMES_INSTALL_DIR:-}" ]; then
+    INSTALL_DIR="$HERMES_INSTALL_DIR"
+    INSTALL_DIR_EXPLICIT=true
+else
+    INSTALL_DIR=""
+    INSTALL_DIR_EXPLICIT=false
+fi
 PYTHON_VERSION="3.11"
 NODE_VERSION="22"

+# FHS-style root install layout (set by resolve_install_layout when applicable):
+#   code at /usr/local/lib/hermes-agent, command at /usr/local/bin/hermes,
+#   data still at /root/.hermes (HERMES_HOME).  Matches Claude Code / Codex CLI
+#   and keeps Docker bind-mounted /root/ volumes lean.
+ROOT_FHS_LAYOUT=false
+
 # Options
 USE_VENV=true
 RUN_SETUP=true
@@ -64,6 +79,7 @@ while [[ $# -gt 0 ]]; do
            ;;
        --dir)
            INSTALL_DIR="$2"
+            INSTALL_DIR_EXPLICIT=true
            shift 2
            ;;
        --hermes-home)
@@ -79,9 +95,20 @@ while [[ $# -gt 0 ]]; do
            echo "  --no-venv      Don't create virtual environment"
            echo "  --skip-setup   Skip interactive setup wizard"
            echo "  --branch NAME  Git branch to install (default: main)"
-            echo "  --dir PATH     Installation directory (default: ~/.hermes/hermes-agent)"
+            echo "  --dir PATH     Installation directory"
+            echo "                   default (non-root):  ~/.hermes/hermes-agent"
+            echo "                   default (root, Linux): /usr/local/lib/hermes-agent"
            echo "  --hermes-home PATH  Data directory (default: ~/.hermes, or \$HERMES_HOME)"
            echo "  -h, --help     Show this help"
+            echo ""
+            echo "Notes:"
+            echo "  When running as root on Linux, Hermes installs the code under"
+            echo "  /usr/local/lib/hermes-agent and links the command into"
+            echo "  /usr/local/bin/hermes (FHS layout — matches Claude Code / Codex CLI)."
+            echo "  Data, config, sessions, and logs still live in \$HERMES_HOME"
+            echo "  (default /root/.hermes).  This keeps Docker bind-mounted volumes"
+            echo "  small and ensures the command is on PATH for all shells."
+            echo "  Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place."
            exit 0
            ;;
        *)
@@ -163,9 +190,60 @@ is_termux() {
    [ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]]
 }

+# Decide where the repo checkout + venv live, and where the `hermes` command
+# symlink goes.  Called after detect_os so $OS/$DISTRO are known.
+#
+# Defaults:
+#   - Non-root, any OS:       INSTALL_DIR = $HERMES_HOME/hermes-agent
+#                             command link in $HOME/.local/bin
+#   - Termux (any uid):       INSTALL_DIR = $HERMES_HOME/hermes-agent
+#                             command link in $PREFIX/bin (already on PATH)
+#   - Root on Linux (new):    INSTALL_DIR = /usr/local/lib/hermes-agent
+#                             command link in /usr/local/bin
+#                             (unless a legacy install already exists at
+#                              $HERMES_HOME/hermes-agent — then preserve it)
+#
+# Always no-op when the user set --dir or $HERMES_INSTALL_DIR.
+resolve_install_layout() {
+    if [ "$INSTALL_DIR_EXPLICIT" = true ]; then
+        log_info "Install directory: $INSTALL_DIR (explicit)"
+        return 0
+    fi
+
+    # Termux: package manager manages /data/data/..., keep code in HERMES_HOME.
+    if is_termux; then
+        INSTALL_DIR="$HERMES_HOME/hermes-agent"
+        return 0
+    fi
+
+    # Root on Linux: prefer FHS layout unless a legacy install already exists.
+    # macOS root installs keep the legacy layout because /usr/local/ on macOS
+    # is Homebrew territory and we don't want to fight that.
+    if [ "$OS" = "linux" ] && [ "$(id -u)" -eq 0 ]; then
+        if [ -d "$HERMES_HOME/hermes-agent/.git" ]; then
+            INSTALL_DIR="$HERMES_HOME/hermes-agent"
+            log_info "Existing install detected at $INSTALL_DIR — keeping legacy layout"
+            log_info "  (new root installs use /usr/local/lib/hermes-agent)"
+            return 0
+        fi
+        INSTALL_DIR="/usr/local/lib/hermes-agent"
+        ROOT_FHS_LAYOUT=true
+        log_info "Root install on Linux — using FHS layout"
+        log_info "  Code:    $INSTALL_DIR"
+        log_info "  Command: /usr/local/bin/hermes"
+        log_info "  Data:    $HERMES_HOME (unchanged)"
+        return 0
+    fi
+
+    # Default: non-root, non-Termux → legacy user-scoped layout.
+    INSTALL_DIR="$HERMES_HOME/hermes-agent"
+}
+
 get_command_link_dir() {
    if is_termux && [ -n "${PREFIX:-}" ]; then
        echo "$PREFIX/bin"
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo "/usr/local/bin"
    else
        echo "$HOME/.local/bin"
    fi
@@ -174,6 +252,8 @@ get_command_link_dir() {
 get_command_link_display_dir() {
    if is_termux && [ -n "${PREFIX:-}" ]; then
        echo '$PREFIX/bin'
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo '/usr/local/bin'
    else
        echo '~/.local/bin'
    fi
@@ -975,6 +1055,14 @@ setup_path() {
        return 0
    fi

+    # FHS layout: /usr/local/bin is on PATH for every standard shell, nothing to inject.
+    if [ "$ROOT_FHS_LAYOUT" = true ]; then
+        export PATH="$command_link_dir:$PATH"
+        log_info "/usr/local/bin is already on PATH for all shells"
+        log_success "hermes command ready"
+        return 0
+    fi
+
    # Check if ~/.local/bin is on PATH; if not, add it to shell config.
    # Detect the user's actual login shell (not the shell running this script,
    # which is always bash when piped from curl).
@@ -1339,12 +1427,12 @@ print_success() {
    echo ""

    # Show file locations
-    echo -e "${CYAN}${BOLD}📁 Your files (all in ~/.hermes/):${NC}"
+    echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
    echo ""
-    echo -e "   ${YELLOW}Config:${NC}    ~/.hermes/config.yaml"
-    echo -e "   ${YELLOW}API Keys:${NC}  ~/.hermes/.env"
-    echo -e "   ${YELLOW}Data:${NC}      ~/.hermes/cron/, sessions/, logs/"
-    echo -e "   ${YELLOW}Code:${NC}      ~/.hermes/hermes-agent/"
+    echo -e "   ${YELLOW}Config:${NC}    $HERMES_HOME/config.yaml"
+    echo -e "   ${YELLOW}API Keys:${NC}  $HERMES_HOME/.env"
+    echo -e "   ${YELLOW}Data:${NC}      $HERMES_HOME/cron/, sessions/, logs/"
+    echo -e "   ${YELLOW}Code:${NC}      $INSTALL_DIR"
    echo ""

    echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
@@ -1364,6 +1452,9 @@ print_success() {
    if [ "$DISTRO" = "termux" ]; then
        echo -e "${YELLOW}⚡ 'hermes' was linked into $(get_command_link_display_dir), which is already on PATH in Termux.${NC}"
        echo ""
+    elif [ "$ROOT_FHS_LAYOUT" = true ]; then
+        echo -e "${YELLOW}⚡ 'hermes' was linked into /usr/local/bin and is ready to use — no shell reload needed.${NC}"
+        echo ""
    else
        echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}"
        echo ""
@@ -1415,6 +1506,7 @@ main() {
    print_banner

    detect_os
+    resolve_install_layout
    install_uv
    check_python
    check_git
@@ -48,6 +48,9 @@ AUTHOR_MAP = {
    "jefferson@heimdallstrategy.com": "Mind-Dragon",
    "130918800+devorun@users.noreply.github.com": "devorun",
    "maks.mir@yahoo.com": "say8hi",
+    "web3blind@users.noreply.github.com": "web3blind",
+    "julia@alexland.us": "alexg0bot",
+    "1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
    # contributors (from noreply pattern)
    "david.vv@icloud.com": "davidvv",
    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
@@ -59,14 +62,19 @@ AUTHOR_MAP = {
    "keifergu@tencent.com": "keifergu",
    "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
    "abner.the.foreman@agentmail.to": "Abnertheforeman",
+    "thomasgeorgevii09@gmail.com": "tochukwuada",
    "harryykyle1@gmail.com": "hharry11",
    "kshitijk4poor@gmail.com": "kshitijk4poor",
    "keira.voss94@gmail.com": "keiravoss94",
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
+    "simbamax99@gmail.com": "simbam99",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
    "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
+    "cyprian@ironin.pl": "iRonin",
    "valdi.jorge@gmail.com": "jvcl",
+    "q19dcp@gmail.com": "aj-nt",
+    "ebukau84@gmail.com": "UgwujaGeorge",
    "francip@gmail.com": "francip",
    "omni@comelse.com": "omnissiah-comelse",
    "oussama.redcode@gmail.com": "mavrickdeveloper",
@@ -84,6 +92,7 @@ AUTHOR_MAP = {
    "104278804+Sertug17@users.noreply.github.com": "Sertug17",
    "112503481+caentzminger@users.noreply.github.com": "caentzminger",
    "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
+    "xydarcher@uestc.edu.cn": "Readon",
    "sir_even@icloud.com": "sirEven",
    "36056348+sirEven@users.noreply.github.com": "sirEven",
    "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
@@ -106,6 +115,7 @@ AUTHOR_MAP = {
    "30841158+n-WN@users.noreply.github.com": "n-WN",
    "tsuijinglei@gmail.com": "hiddenpuppy",
    "jerome@clawwork.ai": "HiddenPuppy",
+    "jerome.benoit@sap.com": "jerome-benoit",
    "wysie@users.noreply.github.com": "Wysie",
    "leoyuan0099@gmail.com": "keyuyuan",
    "bxzt2006@163.com": "Only-Code-A",
@@ -200,6 +210,9 @@ AUTHOR_MAP = {
    "1434494126@qq.com": "5park1e",
    "158153005+5park1e@users.noreply.github.com": "5park1e",
    "innocarpe@gmail.com": "innocarpe",
+    "noreply@ked.com": "qike-ms",
+    "andrekurait@gmail.com": "AndreKurait",
+    "bsgdigital@users.noreply.github.com": "bsgdigital",
    "numman.ali@gmail.com": "nummanali",
    "rohithsaimidigudla@gmail.com": "whitehatjr1001",
    "0xNyk@users.noreply.github.com": "0xNyk",
@@ -490,6 +503,9 @@ AUTHOR_MAP = {
    "zhangxicen@example.com": "zhangxicen",
    "codex@openai.invalid": "teknium1",
    "screenmachine@gmail.com": "teknium1",
+    "chenzeshi@live.com": "chen1749144759",
+    "mor.aleksandr@yahoo.com": "MorAlekss",
+    "ash@users.noreply.github.com": "ash",
 }


@@ -134,6 +134,7 @@ masks = processor.image_processor.post_process_masks(

 ### Model architecture

+<!-- ascii-guard-ignore -->
 ```
 SAM Architecture:
 ┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
@@ -144,6 +145,7 @@ SAM Architecture:
   Image Embeddings      Prompt Embeddings         Masks + IoU
   (computed once)       (per prompt)             predictions
 ```
+<!-- ascii-guard-ignore-end -->

 ### Model variants

@@ -0,0 +1,42 @@
+"""Resolve HERMES_HOME for standalone skill scripts.
+
+Skill scripts may run outside the Hermes process (e.g. system Python,
+nix env, CI) where ``hermes_constants`` is not importable.  This module
+provides the same ``get_hermes_home()`` and ``display_hermes_home()``
+contracts as ``hermes_constants`` without requiring it on ``sys.path``.
+
+When ``hermes_constants`` IS available it is used directly so that any
+future enhancements (profile resolution, Docker detection, etc.) are
+picked up automatically.  The fallback path replicates the core logic
+from ``hermes_constants.py`` using only the stdlib.
+
+All scripts under ``google-workspace/scripts/`` should import from here
+instead of duplicating the ``HERMES_HOME = Path(os.getenv(...))`` pattern.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+try:
+    from hermes_constants import display_hermes_home as display_hermes_home
+    from hermes_constants import get_hermes_home as get_hermes_home
+except (ModuleNotFoundError, ImportError):
+
+    def get_hermes_home() -> Path:
+        """Return the Hermes home directory (default: ~/.hermes).
+
+        Mirrors ``hermes_constants.get_hermes_home()``."""
+        val = os.environ.get("HERMES_HOME", "").strip()
+        return Path(val) if val else Path.home() / ".hermes"
+
+    def display_hermes_home() -> str:
+        """Return a user-friendly ``~/``-shortened display string.
+
+        Mirrors ``hermes_constants.display_hermes_home()``."""
+        home = get_hermes_home()
+        try:
+            return "~/" + str(home.relative_to(Path.home()))
+        except ValueError:
+            return str(home)
@@ -31,7 +31,14 @@ from datetime import datetime, timedelta, timezone
 from email.mime.text import MIMEText
 from pathlib import Path

-HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+# Ensure sibling modules (_hermes_home) are importable when run standalone.
+_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
+if _SCRIPTS_DIR not in sys.path:
+    sys.path.insert(0, _SCRIPTS_DIR)
+
+from _hermes_home import get_hermes_home
+
+HERMES_HOME = get_hermes_home()
 TOKEN_PATH = HERMES_HOME / "google_token.json"
 CLIENT_SECRET_PATH = HERMES_HOME / "google_client_secret.json"

@@ -10,9 +10,12 @@ import sys
 from datetime import datetime, timezone
 from pathlib import Path

+# Ensure sibling modules (_hermes_home) are importable when run standalone.
+_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
+if _SCRIPTS_DIR not in sys.path:
+    sys.path.insert(0, _SCRIPTS_DIR)

-def get_hermes_home() -> Path:
-    return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+from _hermes_home import get_hermes_home


 def get_token_path() -> Path:
@@ -21,6 +21,8 @@ Agent workflow:
  6. Run --check to verify. Done.
 """

+from __future__ import annotations  # allow PEP 604 `X | None` on Python 3.9+
+
 import argparse
 import json
 import os
@@ -28,13 +30,12 @@ import subprocess
 import sys
 from pathlib import Path

-try:
-    from hermes_constants import display_hermes_home, get_hermes_home
-except ModuleNotFoundError:
-    HERMES_AGENT_ROOT = Path(__file__).resolve().parents[4]
-    if HERMES_AGENT_ROOT.exists():
-        sys.path.insert(0, str(HERMES_AGENT_ROOT))
-    from hermes_constants import display_hermes_home, get_hermes_home
+# Ensure sibling modules (_hermes_home) are importable when run standalone.
+_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
+if _SCRIPTS_DIR not in sys.path:
+    sys.path.insert(0, _SCRIPTS_DIR)
+
+from _hermes_home import display_hermes_home, get_hermes_home

 HERMES_HOME = get_hermes_home()
 TOKEN_PATH = HERMES_HOME / "google_token.json"
@@ -111,7 +112,11 @@ def install_deps():
        return True
    except subprocess.CalledProcessError as e:
        print(f"ERROR: Failed to install dependencies: {e}")
-        print(f"Try manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
+        print(
+            "On environments without pip (e.g. Nix), install the optional extra instead:"
+        )
+        print("  pip install 'hermes-agent[google]'")
+        print(f"Or manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
        return False


@@ -22,6 +22,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target

 This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops.

+<!-- ascii-guard-ignore -->
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                    RESEARCH PAPER PIPELINE                  │
@@ -41,6 +42,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n
 │                                                             │
 └─────────────────────────────────────────────────────────────┘
 ```
+<!-- ascii-guard-ignore-end -->

 ---

@@ -386,7 +386,7 @@ class TestProvidersDictApiModeAnthropicMessages:
                },
            },
            "auxiliary": {
-                "flush_memories": {
+                "compression": {
                    "provider": "myrelay",
                    "model": "claude-sonnet-4.6",
                },
@@ -399,11 +399,11 @@ class TestProvidersDictApiModeAnthropicMessages:
            AnthropicAuxiliaryClient,
            AsyncAnthropicAuxiliaryClient,
        )
-        async_client, async_model = get_async_text_auxiliary_client("flush_memories")
+        async_client, async_model = get_async_text_auxiliary_client("compression")
        assert isinstance(async_client, AsyncAnthropicAuxiliaryClient)
        assert async_model == "claude-sonnet-4.6"

-        sync_client, sync_model = get_text_auxiliary_client("flush_memories")
+        sync_client, sync_model = get_text_auxiliary_client("compression")
        assert isinstance(sync_client, AnthropicAuxiliaryClient)
        assert sync_model == "claude-sonnet-4.6"

@@ -1230,3 +1230,210 @@ class TestEmptyTextBlockFix:
        from agent.bedrock_adapter import _convert_content_to_converse
        blocks = _convert_content_to_converse("Hello")
        assert blocks[0]["text"] == "Hello"
+
+
+# ---------------------------------------------------------------------------
+# Stale-connection detection and per-region client invalidation
+# ---------------------------------------------------------------------------
+
+class TestInvalidateRuntimeClient:
+    """Per-region eviction used to discard dead/stale bedrock-runtime clients."""
+
+    def test_evicts_only_the_target_region(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            invalidate_runtime_client,
+            reset_client_cache,
+        )
+        reset_client_cache()
+        _bedrock_runtime_client_cache["us-east-1"] = "dead-client"
+        _bedrock_runtime_client_cache["us-west-2"] = "live-client"
+
+        evicted = invalidate_runtime_client("us-east-1")
+
+        assert evicted is True
+        assert "us-east-1" not in _bedrock_runtime_client_cache
+        assert _bedrock_runtime_client_cache["us-west-2"] == "live-client"
+
+    def test_returns_false_when_region_not_cached(self):
+        from agent.bedrock_adapter import invalidate_runtime_client, reset_client_cache
+        reset_client_cache()
+        assert invalidate_runtime_client("eu-west-1") is False
+
+
+class TestIsStaleConnectionError:
+    """Classifier that decides whether an exception warrants client eviction."""
+
+    def test_detects_botocore_connection_closed_error(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from botocore.exceptions import ConnectionClosedError
+        exc = ConnectionClosedError(endpoint_url="https://bedrock.example")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_botocore_endpoint_connection_error(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from botocore.exceptions import EndpointConnectionError
+        exc = EndpointConnectionError(endpoint_url="https://bedrock.example")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_botocore_read_timeout(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from botocore.exceptions import ReadTimeoutError
+        exc = ReadTimeoutError(endpoint_url="https://bedrock.example")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_urllib3_protocol_error(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        from urllib3.exceptions import ProtocolError
+        exc = ProtocolError("Connection broken")
+        assert is_stale_connection_error(exc) is True
+
+    def test_detects_library_internal_assertion_error(self):
+        """A bare AssertionError raised from inside urllib3/botocore signals
+        a corrupted connection-pool invariant and should trigger eviction."""
+        from agent.bedrock_adapter import is_stale_connection_error
+
+        # Fabricate an AssertionError whose traceback's last frame belongs
+        # to a module named "urllib3.connectionpool". We do this by exec'ing
+        # a tiny `assert False` under a fake globals dict — the resulting
+        # frame's ``f_globals["__name__"]`` is what the classifier inspects.
+        fake_globals = {"__name__": "urllib3.connectionpool"}
+        try:
+            exec("def _boom():\n    assert False\n_boom()", fake_globals)
+        except AssertionError as exc:
+            assert is_stale_connection_error(exc) is True
+        else:
+            pytest.fail("AssertionError not raised")
+
+    def test_detects_botocore_internal_assertion_error(self):
+        """Same as above but for a frame inside the botocore namespace."""
+        from agent.bedrock_adapter import is_stale_connection_error
+        fake_globals = {"__name__": "botocore.httpsession"}
+        try:
+            exec("def _boom():\n    assert False\n_boom()", fake_globals)
+        except AssertionError as exc:
+            assert is_stale_connection_error(exc) is True
+        else:
+            pytest.fail("AssertionError not raised")
+
+    def test_ignores_application_assertion_error(self):
+        """AssertionError from application code (not urllib3/botocore) should
+        NOT be classified as stale — those are real test/code bugs."""
+        from agent.bedrock_adapter import is_stale_connection_error
+        try:
+            assert False, "test-only"  # noqa: B011
+        except AssertionError as exc:
+            assert is_stale_connection_error(exc) is False
+
+    def test_ignores_unrelated_exceptions(self):
+        from agent.bedrock_adapter import is_stale_connection_error
+        assert is_stale_connection_error(ValueError("bad input")) is False
+        assert is_stale_connection_error(KeyError("missing")) is False
+
+
+class TestCallConverseInvalidatesOnStaleError:
+    """call_converse / call_converse_stream evict the cached client when the
+    boto3 call raises a stale-connection error — so the next invocation
+    reconnects instead of reusing the dead socket."""
+
+    def test_converse_evicts_client_on_stale_error(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse,
+            reset_client_cache,
+        )
+        from botocore.exceptions import ConnectionClosedError
+
+        reset_client_cache()
+        dead_client = MagicMock()
+        dead_client.converse.side_effect = ConnectionClosedError(
+            endpoint_url="https://bedrock.example",
+        )
+        _bedrock_runtime_client_cache["us-east-1"] = dead_client
+
+        with pytest.raises(ConnectionClosedError):
+            call_converse(
+                region="us-east-1",
+                model="anthropic.claude-3-sonnet-20240229-v1:0",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert "us-east-1" not in _bedrock_runtime_client_cache, (
+            "stale client should have been evicted so the retry reconnects"
+        )
+
+    def test_converse_stream_evicts_client_on_stale_error(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse_stream,
+            reset_client_cache,
+        )
+        from botocore.exceptions import ConnectionClosedError
+
+        reset_client_cache()
+        dead_client = MagicMock()
+        dead_client.converse_stream.side_effect = ConnectionClosedError(
+            endpoint_url="https://bedrock.example",
+        )
+        _bedrock_runtime_client_cache["us-east-1"] = dead_client
+
+        with pytest.raises(ConnectionClosedError):
+            call_converse_stream(
+                region="us-east-1",
+                model="anthropic.claude-3-sonnet-20240229-v1:0",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert "us-east-1" not in _bedrock_runtime_client_cache
+
+    def test_converse_does_not_evict_on_non_stale_error(self):
+        """Non-stale errors (e.g. ValidationException) leave the client cache alone."""
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse,
+            reset_client_cache,
+        )
+        from botocore.exceptions import ClientError
+
+        reset_client_cache()
+        live_client = MagicMock()
+        live_client.converse.side_effect = ClientError(
+            error_response={"Error": {"Code": "ValidationException", "Message": "bad"}},
+            operation_name="Converse",
+        )
+        _bedrock_runtime_client_cache["us-east-1"] = live_client
+
+        with pytest.raises(ClientError):
+            call_converse(
+                region="us-east-1",
+                model="anthropic.claude-3-sonnet-20240229-v1:0",
+                messages=[{"role": "user", "content": "hi"}],
+            )
+
+        assert _bedrock_runtime_client_cache.get("us-east-1") is live_client, (
+            "validation errors do not indicate a dead connection — keep the client"
+        )
+
+    def test_converse_leaves_successful_client_in_cache(self):
+        from agent.bedrock_adapter import (
+            _bedrock_runtime_client_cache,
+            call_converse,
+            reset_client_cache,
+        )
+
+        reset_client_cache()
+        live_client = MagicMock()
+        live_client.converse.return_value = {
+            "output": {"message": {"role": "assistant", "content": [{"text": "hi"}]}},
+            "stopReason": "end_turn",
+            "usage": {"inputTokens": 1, "outputTokens": 1, "totalTokens": 2},
+        }
+        _bedrock_runtime_client_cache["us-east-1"] = live_client
+
+        call_converse(
+            region="us-east-1",
+            model="anthropic.claude-3-sonnet-20240229-v1:0",
+            messages=[{"role": "user", "content": "hi"}],
+        )
+
+        assert _bedrock_runtime_client_cache.get("us-east-1") is live_client
@@ -376,17 +376,15 @@ class TestBedrockModelNameNormalization:
            "apac.anthropic.claude-haiku-4-5", preserve_dots=True
        ) == "apac.anthropic.claude-haiku-4-5"

-    def test_preserve_false_mangles_as_documented(self):
-        """Canary: with ``preserve_dots=False`` the function still
-        produces the broken all-hyphen form — this is the shape that
-        Bedrock rejected and that the fix avoids.  Keeping this test
-        locks in the existing behaviour of ``normalize_model_name`` so a
-        future refactor doesn't accidentally decouple the knob from its
-        effect."""
+    def test_bedrock_prefix_preserved_without_preserve_dots(self):
+        """Bedrock inference profile IDs are auto-detected by prefix and
+        always returned unmangled -- ``preserve_dots`` is irrelevant for
+        these IDs because the dots are namespace separators, not version
+        separators.  Regression for #12295."""
        from agent.anthropic_adapter import normalize_model_name
        assert normalize_model_name(
            "global.anthropic.claude-opus-4-7", preserve_dots=False
-        ) == "global-anthropic-claude-opus-4-7"
+        ) == "global.anthropic.claude-opus-4-7"

    def test_bare_foundation_model_id_preserved(self):
        """Non-inference-profile Bedrock IDs
@@ -422,12 +420,11 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
            f"{kwargs['model']!r}"
        )

-    def test_bedrock_model_mangled_without_preserve_dots(self):
-        """Inverse canary: without the flag, ``build_anthropic_kwargs``
-        still produces the broken form — so the fix in
-        ``_anthropic_preserve_dots`` is the load-bearing piece that
-        wires ``preserve_dots=True`` through to this builder for the
-        Bedrock case."""
+    def test_bedrock_model_preserved_without_preserve_dots(self):
+        """Bedrock inference profile IDs survive ``build_anthropic_kwargs``
+        even without ``preserve_dots=True`` -- the prefix auto-detection
+        in ``normalize_model_name`` is the load-bearing piece.
+        Regression for #12295."""
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="global.anthropic.claude-opus-4-7",
@@ -437,4 +434,157 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
            reasoning_config=None,
            preserve_dots=False,
        )
-        assert kwargs["model"] == "global-anthropic-claude-opus-4-7"
+        assert kwargs["model"] == "global.anthropic.claude-opus-4-7"
+
+
+class TestBedrockModelIdDetection:
+    """Tests for ``_is_bedrock_model_id`` and the auto-detection that
+    makes ``normalize_model_name`` preserve dots for Bedrock IDs
+    regardless of ``preserve_dots``.  Regression for #12295."""
+
+    def test_bare_bedrock_id_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("anthropic.claude-opus-4-7") is True
+
+    def test_regional_us_prefix_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("us.anthropic.claude-sonnet-4-5-v1:0") is True
+
+    def test_regional_global_prefix_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("global.anthropic.claude-opus-4-7") is True
+
+    def test_regional_eu_prefix_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("eu.anthropic.claude-sonnet-4-6") is True
+
+    def test_openrouter_format_not_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("claude-opus-4.6") is False
+
+    def test_bare_claude_not_detected(self):
+        from agent.anthropic_adapter import _is_bedrock_model_id
+        assert _is_bedrock_model_id("claude-opus-4-7") is False
+
+    def test_bare_bedrock_id_preserved_without_flag(self):
+        """The primary bug from #12295: ``anthropic.claude-opus-4-7``
+        sent to bedrock-mantle via auxiliary clients that don't pass
+        ``preserve_dots=True``."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name(
+            "anthropic.claude-opus-4-7", preserve_dots=False
+        ) == "anthropic.claude-opus-4-7"
+
+    def test_openrouter_dots_still_converted(self):
+        """Non-Bedrock dotted model names must still be converted."""
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6"
+
+    def test_bare_bedrock_id_survives_build_kwargs(self):
+        """End-to-end: bare Bedrock ID through ``build_anthropic_kwargs``
+        without ``preserve_dots=True`` -- the auxiliary client path."""
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        kwargs = build_anthropic_kwargs(
+            model="anthropic.claude-opus-4-7",
+            messages=[{"role": "user", "content": "hi"}],
+            tools=None,
+            max_tokens=1024,
+            reasoning_config=None,
+            preserve_dots=False,
+        )
+        assert kwargs["model"] == "anthropic.claude-opus-4-7"
+
+
+# ---------------------------------------------------------------------------
+# auxiliary_client Bedrock resolution — fix for #13919
+# ---------------------------------------------------------------------------
+# Before the fix, resolve_provider_client("bedrock", ...) fell through to the
+# "unhandled auth_type" warning and returned (None, None), breaking all
+# auxiliary tasks (compression, memory, summarization) for Bedrock users.
+
+
+class TestAuxiliaryClientBedrockResolution:
+    """Verify resolve_provider_client handles Bedrock's aws_sdk auth type."""
+
+    def test_bedrock_returns_client_with_credentials(self, monkeypatch):
+        """With valid AWS credentials, Bedrock should return a usable client."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+        monkeypatch.setenv("AWS_REGION", "us-west-2")
+
+        mock_anthropic_bedrock = MagicMock()
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=mock_anthropic_bedrock):
+            from agent.auxiliary_client import resolve_provider_client, AnthropicAuxiliaryClient
+            client, model = resolve_provider_client("bedrock", None)
+
+        assert client is not None, (
+            "resolve_provider_client('bedrock') returned None — "
+            "aws_sdk auth type is not handled"
+        )
+        assert isinstance(client, AnthropicAuxiliaryClient)
+        assert model is not None
+        assert client.api_key == "aws-sdk"
+        assert "us-west-2" in client.base_url
+
+    def test_bedrock_returns_none_without_credentials(self, monkeypatch):
+        """Without AWS credentials, Bedrock should return (None, None) gracefully."""
+        with patch("agent.bedrock_adapter.has_aws_credentials", return_value=False):
+            from agent.auxiliary_client import resolve_provider_client
+            client, model = resolve_provider_client("bedrock", None)
+
+        assert client is None
+        assert model is None
+
+    def test_bedrock_uses_configured_region(self, monkeypatch):
+        """Bedrock client base_url should reflect AWS_REGION."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+        monkeypatch.setenv("AWS_REGION", "eu-central-1")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client
+            client, _ = resolve_provider_client("bedrock", None)
+
+        assert client is not None
+        assert "eu-central-1" in client.base_url
+
+    def test_bedrock_respects_explicit_model(self, monkeypatch):
+        """When caller passes an explicit model, it should be used."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client
+            _, model = resolve_provider_client(
+                "bedrock", "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
+            )
+
+        assert "claude-sonnet" in model
+
+    def test_bedrock_async_mode(self, monkeypatch):
+        """Async mode should return an AsyncAnthropicAuxiliaryClient."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client, AsyncAnthropicAuxiliaryClient
+            client, model = resolve_provider_client("bedrock", None, async_mode=True)
+
+        assert client is not None
+        assert isinstance(client, AsyncAnthropicAuxiliaryClient)
+
+    def test_bedrock_default_model_is_haiku(self, monkeypatch):
+        """Default auxiliary model for Bedrock should be Haiku (fast, cheap)."""
+        monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
+        monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
+
+        with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
+                   return_value=MagicMock()):
+            from agent.auxiliary_client import resolve_provider_client
+            _, model = resolve_provider_client("bedrock", None)
+
+        assert "haiku" in model.lower()
@@ -847,6 +847,32 @@ class TestTokenBudgetTailProtection:
        assert isinstance(pruned, int)


+class TestUpdateModelBudgets:
+    """Regression: update_model() must recalculate token budgets."""
+
+    def test_tail_budget_recalculated(self):
+        """tail_token_budget must change after switching to a different context length."""
+        from unittest.mock import patch
+        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
+            comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
+        old_tail = comp.tail_token_budget
+        old_max_summary = comp.max_summary_tokens
+
+        comp.update_model("model-b", context_length=32_000)
+        assert comp.tail_token_budget != old_tail, "tail_token_budget should change"
+        assert comp.tail_token_budget < old_tail, "smaller context → smaller budget"
+        assert comp.max_summary_tokens != old_max_summary, "max_summary_tokens should change"
+
+    def test_budgets_proportional(self):
+        """Budgets should be proportional to context_length after update."""
+        from unittest.mock import patch
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
+        comp.update_model("model-b", context_length=10_000)
+        assert comp.tail_token_budget == int(comp.threshold_tokens * comp.summary_target_ratio)
+        assert comp.max_summary_tokens == min(int(10_000 * 0.05), 4000)
+
+
 class TestTruncateToolCallArgsJson:
    """Regression tests for #11762.

--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`{"default":{"identifier":"default","description":"Default Hermes GUI permissions","local":true,"windows":["main"],"permissions":["core:default","notification:default","opener:default"]}}`